commit - 358471e349765354d6c4aa8172b94e27c2b2515c
commit + 78f7dcfd62590dd043b3efe7fedd36d806be1f21
blob - 59939a3bd4f42b8d2691492a71e9f1e56e6fa925
blob + c9fb979d512b95d86f965fe461e65d25d888b000
--- eur-lex-scraper/src/models/section.rs
+++ eur-lex-scraper/src/models/section.rs
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub struct Section {
+ title: String,
items: Vec<Article>,
}
}
impl Section {
+ pub fn set_title(&mut self, title: String) {
+ self.title = title;
+ }
+ pub fn get_title(&self) -> &str {
+ &self.title
+ }
pub fn push(&mut self, article: Article) {
self.items.push(article)
}
blob - 161624a33298dea9b4ce298f8fa8d7975108ed54
blob + 0eaf1d07e47fd85d37c69b9cf1af59c94fd8b783
--- eur-lex-scraper/src/parsers/section.rs
+++ eur-lex-scraper/src/parsers/section.rs
pub enum SectionParserError {
#[error("error while parsing section")]
GenericError,
+ #[error("error while parsing the title")]
+ TitleError,
#[error("error parsing article")]
ArticleError(ArticleParserError),
}
impl SectionParser {
pub fn parse(element: ElementRef) -> Result<Section, SectionParserError> {
let mut section = Section::default();
+ // This class should appear only once per section
+ let section_title_selector = Selector::parse(r#".oj-ti-section-2"#).unwrap();
+ let title = match element.select(§ion_title_selector).next() {
+ Some(title) => title.inner_html(),
+ None => return Err(SectionParserError::TitleError),
+ };
+ section.set_title(nanohtml2text::html2text(&title).trim().to_string());
// select article but not titles
let article_selector = Selector::parse(r#"[id^="art_"]:not([id*=".tit"])"#).unwrap();
for article in element.select(&article_selector) {
}
#[test]
- fn parsing_article() {
+ fn parsing_section_1() {
let html = Html::parse_fragment(&get_test_section_1());
let selector = Selector::parse(r#"[id*="sct_"]:not([id*=".tit_"])"#).unwrap();
let element_ref = html.select(&selector).next().unwrap();
let section_left = SectionParser::parse(element_ref).unwrap();
+ assert_eq!(
+ section_left.get_title(),
+ "Classification of AI systems as high-risk"
+ );
assert_eq!(section_left.len(), 2);
assert_eq!(section_left.get(0).unwrap().get_number(), 6);
assert_eq!(