commit - a85d745221f8b5a40c756c3ab8a60e8e49f3d2ca
commit + 6a644cebe360b81e85ca7efb761709257139caed
blob - 67482e9de4b056003fe953336d319b19e7b00997
blob + f0222648eb27d3bf21d1157bb87a81e269c94ae4
--- eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
+++ eur-lex-scraper-naive/src/parsers/enacting_terms_parser.rs
use thiserror::Error;
use crate::models::enacting_terms::{Chapter, EnactingTerms};
+use crate::parsers::article::{ArticleParser, ArticleParserError};
use crate::parsers::section::{SectionParser, SectionParserError};
pub struct EnactingTermParser {}
#[error("error while parsing section")]
SectionError(SectionParserError),
#[error("error while parsing article")]
- ArticleError(),
+ ArticleError(ArticleParserError),
}
impl From<SectionParserError> for ChapterParserError {
}
}
+impl From<ArticleParserError> for ChapterParserError {
+ fn from(value: ArticleParserError) -> Self {
+ ChapterParserError::ArticleError(value)
+ }
+}
+
impl ChapterParser {
pub fn parse(element: ElementRef) -> Result<Chapter, ChapterParserError> {
let mut chapter = Chapter::default();
- // Since we are in a chapter already, cpt_ should refer to section
- let section_selector = Selector::parse("[id^=cpt_]").unwrap();
+ let section_selector =
+ Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap();
let section_count = element.select(§ion_selector).count();
+ // If there are sections, parse them. Otherwise, it must be articles.
if section_count > 0 {
for section in element.select(§ion_selector) {
let section = SectionParser::parse(section)?;
chapter.push(section);
}
} else {
- // should parse article
- todo!()
+ let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
+ for article in element.select(&article_selector) {
+ let article = ArticleParser::parse(article)?;
+ chapter.push(article);
+ }
}
Ok(chapter)
}