commit 6a644cebe360b81e85ca7efb761709257139caed from: Romain VINCENT date: Thu Jan 15 19:39:17 2026 UTC Implement Chapter parser. commit - a85d745221f8b5a40c756c3ab8a60e8e49f3d2ca commit + 6a644cebe360b81e85ca7efb761709257139caed blob - 67482e9de4b056003fe953336d319b19e7b00997 blob + f0222648eb27d3bf21d1157bb87a81e269c94ae4 --- eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs +++ eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs @@ -2,6 +2,7 @@ use scraper::{ElementRef, Selector}; use thiserror::Error; use crate::models::enacting_terms::{Chapter, EnactingTerms}; +use crate::parsers::article::{ArticleParser, ArticleParserError}; use crate::parsers::section::{SectionParser, SectionParserError}; pub struct EnactingTermParser {} @@ -37,7 +38,7 @@ pub enum ChapterParserError { #[error("error while parsing section")] SectionError(SectionParserError), #[error("error while parsing article")] - ArticleError(), + ArticleError(ArticleParserError), } impl From for ChapterParserError { @@ -46,20 +47,30 @@ impl From for ChapterParserError { } } +impl From for ChapterParserError { + fn from(value: ArticleParserError) -> Self { + ChapterParserError::ArticleError(value) + } +} + impl ChapterParser { pub fn parse(element: ElementRef) -> Result { let mut chapter = Chapter::default(); - // Since we are in a chapter already, cpt_ should refer to section - let section_selector = Selector::parse("[id^=cpt_]").unwrap(); + let section_selector = + Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap(); let section_count = element.select(§ion_selector).count(); + // If there are sections, parse them. Otherwise, it must be articles. if section_count > 0 { for section in element.select(§ion_selector) { let section = SectionParser::parse(section)?; chapter.push(section); } } else { - // should parse article - todo!() + let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap(); + for article in element.select(&article_selector) { + let article = ArticleParser::parse(article)?; + chapter.push(article); + } } Ok(chapter) }