commit - beca2d924c6ed4ad96dc9cf4d26ffe711a82d841
commit + 97618e6ac9f8771f0da9e3dedc2a385983d1cb0f
blob - f0222648eb27d3bf21d1157bb87a81e269c94ae4
blob + 8cd11075c2c7fd3c11a88b0a0eb9cda1c364dae0
--- eur-lex-scraper/src/parsers/enacting_terms_parser.rs
+++ eur-lex-scraper/src/parsers/enacting_terms_parser.rs
pub enum EnactingTermParserError {
#[error("error while parsing enacting term")]
GenericError,
+ #[error("error while parsing chapter")]
+ ChapterError(ChapterParserError),
+ #[error("error while parsing aricles")]
+ ArticleError(ArticleParserError),
}
+impl From<ChapterParserError> for EnactingTermParserError {
+ fn from(value: ChapterParserError) -> Self {
+ EnactingTermParserError::ChapterError(value)
+ }
+}
+
+impl From<ArticleParserError> for EnactingTermParserError {
+ fn from(value: ArticleParserError) -> Self {
+ EnactingTermParserError::ArticleError(value)
+ }
+}
+
impl EnactingTermParser {
pub fn parse(element: ElementRef) -> Result<EnactingTerms, EnactingTermParserError> {
let mut enacting_terms = EnactingTerms::default();
let chapter_selector = Selector::parse("[id^=cpt_]").unwrap();
let chapter_count = element.select(&chapter_selector).count();
if chapter_count > 0 {
- // should parse chapter
- todo!()
+ for chapter in element.select(&chapter_selector) {
+ let chapter = ChapterParser::parse(chapter)?;
+ enacting_terms.push(chapter);
+ }
} else {
- // should parse article
- todo!()
+ // See following document for document with articles only
+ // https://eur-lex.europa.eu/legal-content/EN/TXT/HTML/?uri=CELEX:32006D0443
+ let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
+ for article in element.select(&article_selector) {
+ let article = ArticleParser::parse(article)?;
+ enacting_terms.push(article);
+ }
}
Ok(enacting_terms)
}
chapter.push(section);
}
} else {
- let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
+ let article_selector =
+ Selector::parse(r#"[id^="cpt_"][id*="art_"]:not([id*=".tit"])"#).unwrap();
for article in element.select(&article_selector) {
let article = ArticleParser::parse(article)?;
chapter.push(article);