Commit Diff


commit - 5e29929b3959d317321b463d031ce893f92c8c61
commit + a85d745221f8b5a40c756c3ab8a60e8e49f3d2ca
blob - d56f0bb587161ce5d3c502e8725f2036ffc8c7aa
blob + 9033263b69faa760527389cc64fe58f760605606
--- eur-lex-scraper-naive/src/models/articles.rs
+++ eur-lex-scraper-naive/src/models/articles.rs
@@ -2,6 +2,7 @@ use crate::models::enacting_terms::Item;
 
 #[derive(Debug, Default, PartialEq, Eq)]
 pub struct Article {
+    pub title: String,
     pub number: u32,
     pub text: String,
 }
blob - aecc33e03d3cc640dd0e922a36574765f9ef04d7
blob + ee8b814984b4deee0de30c78b56ce4a99f6e0ac2
--- eur-lex-scraper-naive/src/parsers/article.rs
+++ eur-lex-scraper-naive/src/parsers/article.rs
@@ -1,6 +1,6 @@
 use crate::models::articles::Article;
 use nanohtml2text::html2text;
-use scraper::ElementRef;
+use scraper::{ElementRef, Selector};
 use thiserror::Error;
 
 pub struct ArticleParser {}
@@ -24,8 +24,17 @@ impl ArticleParser {
             Ok(number) => number,
             Err(_) => return Err(ArticleParserError::ErrorNumber),
         };
+        let article_title_selector = Selector::parse(r#".oj-sti-art"#).unwrap();
+        let title = match element.select(&article_title_selector).next() {
+            Some(title) => title.inner_html(),
+            None => "".to_string(),
+        };
         let text = html2text(&element.inner_html());
-        let article = Article { number, text };
+        let article = Article {
+            title,
+            number,
+            text,
+        };
         Ok(article)
     }
 }
@@ -164,6 +173,7 @@ mod tests {
         let element_ref = html.select(&selector).next().unwrap();
         let article_left = ArticleParser::parse(element_ref).unwrap();
         let article_right = Article {
+            title: "Subject matter`".to_string(),
             number: 1,
             text: html2text(&element_ref.inner_html()),
         };
blob - c5371e6926b80bd6c694beed7b1a850dd88803ee
blob + 1e54aa19a2acfbddd28779e7d31ff3c84cb06dc9
--- eur-lex-scraper-naive/src/parsers/section.rs
+++ eur-lex-scraper-naive/src/parsers/section.rs
@@ -433,7 +433,15 @@ mod tests {
         let section_left = SectionParser::parse(element_ref).unwrap();
         assert_eq!(section_left.items.len(), 2);
         assert_eq!(section_left.items.get(0).unwrap().number, 6);
+        assert_eq!(
+            section_left.items.get(0).unwrap().title,
+            "Classification rules for high-risk AI systems".to_string()
+        );
         assert_eq!(section_left.items.get(1).unwrap().number, 7);
+        assert_eq!(
+            section_left.items.get(1).unwrap().title,
+            "Amendments to Annex III".to_string()
+        );
 
         /*
         let article_right = Article {