Commit Diff


commit - f3bb73ef08e4bd67cf31d05f22684185c2602d24
commit + 328366910293466b36dbfcb39511b8b6bc273550
blob - 9e74a47e650e208d76c209b89754a9e632484150
blob + 05395d79f03075225880582debaf53dc5ddf5eba
--- eur-lex-scraper/src/models/acts.rs
+++ eur-lex-scraper/src/models/acts.rs
@@ -1,7 +1,8 @@
-use crate::models::preambles::Preamble;
+use crate::models::{enacting_terms::EnactingTerms, preambles::Preamble};
 
 #[derive(Default, Debug, Clone, PartialEq)]
 pub struct EUAct {
     pub title: String,
     pub preamble: Preamble,
+    pub enacting_terms: EnactingTerms,
 }
blob - 9033263b69faa760527389cc64fe58f760605606
blob + 2b17d016d325384df22aeedde464d4682028a58c
--- eur-lex-scraper/src/models/articles.rs
+++ eur-lex-scraper/src/models/articles.rs
@@ -1,6 +1,6 @@
 use crate::models::enacting_terms::Item;
 
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Article {
     pub title: String,
     pub number: u32,
blob - 0e4d0727da318cfba1bf97599d6c313d32ee3971
blob + be404e25c4d19e77c886af9625174c2655747147
--- eur-lex-scraper/src/models/enacting_terms.rs
+++ eur-lex-scraper/src/models/enacting_terms.rs
@@ -1,6 +1,6 @@
 use crate::models::articles::Article;
 
-#[derive(Debug, PartialEq, Eq)]
+#[derive(Clone, Debug, PartialEq, Eq)]
 pub enum Item {
     Article(Article),
     Chapter(Chapter),
@@ -34,7 +34,7 @@ impl Item {
     }
 }
 
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct EnactingTerms {
     pub items: Vec<Item>,
 }
@@ -45,7 +45,7 @@ impl EnactingTerms {
     }
 }
 
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Chapter {
     pub items: Vec<Item>,
 }
@@ -62,7 +62,7 @@ impl Into<Item> for Chapter {
     }
 }
 
-#[derive(Debug, Default, PartialEq, Eq)]
+#[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Section {
     pub items: Vec<Article>,
 }
blob - 61f872aec5803980ed69405184f3191d255ff1cc
blob + a3b88f56fc9ba356d030385d3c405172de36f99c
--- eur-lex-scraper/src/parsers/act.rs
+++ eur-lex-scraper/src/parsers/act.rs
@@ -1,4 +1,5 @@
 use crate::models::acts::EUAct;
+use crate::parsers::enacting_terms::EnactingTermParser;
 use crate::parsers::{
     act_title::{EUActTileParser, EUActTitleParserError},
     preamble::PreambleParser,
@@ -37,7 +38,17 @@ impl EUActParser {
         let preamble_section = act_html.select(&preamble_selector).next().unwrap();
         let preamble = PreambleParser::parse(preamble_section).unwrap();
 
-        Ok(EUAct { title, preamble })
+        // /////////////
+        // Get enacting terms
+        let enacting_terms_selector = Selector::parse("#enc_1").unwrap();
+        let enacting_terms_section = act_html.select(&enacting_terms_selector).next().unwrap();
+        let enacting_terms = EnactingTermParser::parse(enacting_terms_section).unwrap();
+
+        Ok(EUAct {
+            title,
+            preamble,
+            enacting_terms,
+        })
     }
 }
 
@@ -70,5 +81,6 @@ mod tests {
         let title = get_act_title();
         let act = EUActParser::parse(&get_act_html()).unwrap();
         assert_eq!(act.title, title);
+        assert_eq!(act.enacting_terms.items.len(), 13);
     }
 }
blob - 0bff7ef487c062276c514afa7897c63cf440e898
blob + 89bcd19843271bc087ffb5a7cdd9ea05f2becce1
--- eur-lex-scraper/src/parsers/act_title.rs
+++ eur-lex-scraper/src/parsers/act_title.rs
@@ -1,6 +1,5 @@
-use crate::models::{acts::EUAct, preambles::Preamble};
 use nanohtml2text::html2text;
-use scraper::{ElementRef, Html, Selector};
+use scraper::{ElementRef, Selector};
 use thiserror::Error;
 
 #[derive(Error, Debug)]
@@ -32,41 +31,12 @@ impl EUActTileParser {
     }
 }
 
-#[derive(Error, Debug)]
-pub enum EUActParserError {
-    #[error("error while parsing title: {0}")]
-    TitleError(EUActTitleParserError),
-}
-
-impl From<EUActTitleParserError> for EUActParserError {
-    fn from(value: EUActTitleParserError) -> Self {
-        EUActParserError::TitleError(value)
-    }
-}
-
-pub struct EUActParser {}
-
-impl EUActParser {
-    pub fn parse(html: &str) -> Result<EUAct, EUActParserError> {
-        let act_html = Html::parse_document(html);
-
-        // //////////////
-        // Get act title
-        let title_selector = Selector::parse(".eli-main-title").unwrap();
-        let title_element = act_html.select(&title_selector).next().unwrap();
-        let title = EUActTileParser::parse(title_element)?;
-        Ok(EUAct {
-            title,
-            preamble: Preamble::default(),
-        })
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use std::fs;
 
     use super::*;
+    use crate::parsers::act::EUActParser;
 
     fn get_act_html_simple() -> String {
         fs::read_to_string("data/test_act_simple.html").unwrap()
blob - 85aa9edf97b6993d072ce51b24010110ef6d1c2d
blob + 8a51f9f779ddd722b54f402d244d3884f8005d91
--- eur-lex-scraper/src/parsers/chapter.rs
+++ eur-lex-scraper/src/parsers/chapter.rs
@@ -33,7 +33,7 @@ impl ChapterParser {
     pub fn parse(element: ElementRef) -> Result<Chapter, ChapterParserError> {
         let mut chapter = Chapter::default();
         let section_selector =
-            Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])}"#).unwrap();
+            Selector::parse(r#"[id^="cpt_"][id*="sct_"]:not([id*="tit_"])"#).unwrap();
         let section_count = element.select(&section_selector).count();
         // If there are sections, parse them. Otherwise, it must be articles.
         if section_count > 0 {
blob - 1e54aa19a2acfbddd28779e7d31ff3c84cb06dc9
blob + 1c7c1d21b0adfecb2a355453f4306ac9ba4ebea0
--- eur-lex-scraper/src/parsers/section.rs
+++ eur-lex-scraper/src/parsers/section.rs
@@ -442,13 +442,5 @@ mod tests {
             section_left.items.get(1).unwrap().title,
             "Amendments to Annex&nbsp;III".to_string()
         );
-
-        /*
-        let article_right = Article {
-            number: 1,
-            text: html2text(&element_ref.inner_html()),
-        };
-        assert_eq!(article_left, article_right)
-        */
     }
 }