Commit Diff


commit - 6cfd378669b68e2439bb02e8421310d902dca804
commit + 6436654c67889ef2154e666bca85e79e480a68d8
blob - e505b7f9f73ef8f4ada2a76cc4741d178e5019f3
blob + 94274aaac69f160afe7573495bb90883e31f1c91
--- eur-lex-scraper/src/models/section.rs
+++ eur-lex-scraper/src/models/section.rs
@@ -3,6 +3,7 @@ use crate::models::{articles::Article, enacting_terms:
 #[derive(Clone, Debug, Default, PartialEq, Eq)]
 pub struct Section {
     id: String,
+    number: u32,
     title: String,
     items: Vec<Article>,
 }
@@ -29,6 +30,12 @@ impl Section {
     pub fn get_id(&self) -> &str {
         &self.id
     }
+    pub fn set_number(&mut self, number: u32) {
+        self.number = number;
+    }
+    pub fn get_number(&self) -> u32 {
+        self.number
+    }
     pub fn set_title(&mut self, title: String) {
         self.title = title;
     }
blob - 4a66f70c66e89d36e60bbfac4f79d562addd26de
blob + fc3c38861494e1e0faf715a29326a6b0cd9db345
--- eur-lex-scraper/src/parsers/section.rs
+++ eur-lex-scraper/src/parsers/section.rs
@@ -10,6 +10,8 @@ pub struct SectionParser {}
 pub enum SectionParserError {
     #[error("error while parsing unique id")]
     UniqueIdError,
+    #[error("error while parsing section number")]
+    NoNumber,
     #[error("error while parsing section")]
     GenericError,
     #[error("error while parsing the title")]
@@ -37,6 +39,22 @@ impl SectionParser {
             None => return Err(SectionParserError::UniqueIdError),
         };
         section.set_id(id);
+        // number selection
+        let id = match element.attr("id") {
+            Some(id) => id.to_string(),
+            None => return Err(SectionParserError::NoNumber),
+        };
+        let number_str: Vec<&str> = id.split('.').collect();
+        let number_str = match number_str.get(1) {
+            Some(number_str) => number_str,
+            None => return Err(SectionParserError::NoNumber),
+        };
+        let number_str = number_str.replace("sct_", "");
+        let number: u32 = match number_str.parse() {
+            Ok(number) => number,
+            Err(_) => return Err(SectionParserError::NoNumber),
+        };
+        section.set_number(number);
         // This class should appear only once per section
         let section_title_selector = Selector::parse(r#".oj-ti-section-2"#).unwrap();
         let title = match element.select(&section_title_selector).next() {
@@ -79,6 +97,7 @@ mod tests {
             "Classification of AI systems as high-risk"
         );
         assert_eq!(section_left.get_id(), "d1e3012-1-1");
+        assert_eq!(section_left.get_number(), 1);
         assert_eq!(section_left.len(), 2);
         assert_eq!(section_left.get(0).unwrap().get_number(), 6);
         assert_eq!(