commit fa3ca37ff9b9e995cf8dd43e6c2aea9d3c7c0f34 from: Romain VINCENT date: Sat Jan 17 08:42:32 2026 UTC Improvre article parser tests coverage. commit - 5cfb505c92887a2789a67f433b7030da648b5f0f commit + fa3ca37ff9b9e995cf8dd43e6c2aea9d3c7c0f34 blob - /dev/null blob + cb0ed49c2e95a725feac9289d5fb201519b65b2a (mode 644) --- /dev/null +++ eur-lex-scraper/data/tests/parsers/article/article_test_2.html @@ -0,0 +1,7 @@ +
+

Article 4

+
+

AI literacy

+
+

Providers and deployers of AI systems shall take measures to ensure, to their best extent, a sufficient level of AI literacy of their staff and other persons dealing with the operation and use of AI systems on their behalf, taking into account their technical knowledge, experience, education and training and the context the AI systems are to be used in, and considering the persons or groups of persons on whom the AI systems are to be used.

+
blob - /dev/null blob + 490f229466de01de985693f36feb246aa07a2cb7 (mode 644) --- /dev/null +++ eur-lex-scraper/data/tests/parsers/article/article_test_3.html @@ -0,0 +1,4 @@ +
+

Article 4

+

Providers and deployers of AI systems shall take measures to ensure, to their best extent, a sufficient level of AI literacy of their staff and other persons dealing with the operation and use of AI systems on their behalf, taking into account their technical knowledge, experience, education and training and the context the AI systems are to be used in, and considering the persons or groups of persons on whom the AI systems are to be used.

+
blob - /dev/null blob + e4a8d06b6590a9d6f3882e876cba5666f8826026 (mode 644) --- /dev/null +++ eur-lex-scraper/data/tests/parsers/article/article_test_4.html @@ -0,0 +1,7 @@ +
+

Article 4

+
+

AI literacy

+
+

Providers and deployers of AI systems shall take measures to ensure, to their best extent, a sufficient level of AI literacy of their staff and other persons dealing with the operation and use of AI systems on their behalf, taking into account their technical knowledge, experience, education and training and the context the AI systems are to be used in, and considering the persons or groups of persons on whom the AI systems are to be used.

+
blob - 108137e5d89c59765d119e7824718feb26e8f348 blob + 3e3dcd2532da8ba483a7310b7e3ab65c387f68f5 --- eur-lex-scraper/src/parsers/act_title.rs +++ eur-lex-scraper/src/parsers/act_title.rs @@ -33,10 +33,8 @@ impl EUActTileParser { #[cfg(test)] mod tests { - use std::fs; - - use super::*; use crate::parsers::act::EUActParser; + use std::fs; fn get_act_html_simple() -> String { fs::read_to_string("data/tests/parsers/act/test_act_simple.html").unwrap() blob - fe66bc85915175e8f495cdebb223e928eb197f37 blob + d433b9cd088a8ce5c945d648a1da87d176b28850 --- eur-lex-scraper/src/parsers/article.rs +++ eur-lex-scraper/src/parsers/article.rs @@ -7,17 +7,19 @@ pub struct ArticleParser {} #[derive(Error, Debug, PartialEq, PartialOrd)] pub enum ArticleParserError { - #[error("error while parsing article")] - GenericError, + #[error("error while parsing article title")] + TitleError, #[error("error while parsing article number")] ErrorNumber, + #[error("error while parsing article id: no id attribute")] + NoIdAttribute, } impl ArticleParser { pub fn parse(element: ElementRef) -> Result { let id = match element.attr("id") { Some(id) => id.to_string(), - None => return Err(ArticleParserError::ErrorNumber), + None => return Err(ArticleParserError::NoIdAttribute), }; let number_str = id.replace("art_", ""); let number: u32 = match number_str.parse() { @@ -27,7 +29,7 @@ impl ArticleParser { let article_title_selector = Selector::parse(r#".oj-sti-art"#).unwrap(); let title = match element.select(&article_title_selector).next() { Some(title) => title.inner_html(), - None => "".to_string(), + None => return Err(ArticleParserError::TitleError), }; let text = html2text(&element.inner_html()); let article = Article { @@ -46,21 +48,63 @@ mod tests { use scraper::{Html, Selector}; use std::fs; + // Well formed article fn get_test_article_1() -> String { fs::read_to_string("data/tests/parsers/article/article_test_1.html").unwrap() } + // Article with missing number + fn get_test_article_2() -> String { + fs::read_to_string("data/tests/parsers/article/article_test_2.html").unwrap() + } + // Article with no title + fn get_test_article_3() -> String { + fs::read_to_string("data/tests/parsers/article/article_test_3.html").unwrap() + } + // Article with no id attribute + fn get_test_article_4() -> String { + fs::read_to_string("data/tests/parsers/article/article_test_4.html").unwrap() + } #[test] - fn parsing_article() { + fn parsing_article_1_well_formed() { let html = Html::parse_fragment(&get_test_article_1()); let selector = Selector::parse("[id^=art_]").unwrap(); let element_ref = html.select(&selector).next().unwrap(); let article_left = ArticleParser::parse(element_ref).unwrap(); let article_right = Article { - title: "Subject matter`".to_string(), + title: "AI literacy".to_string(), number: 4, text: html2text(&element_ref.inner_html()), }; assert_eq!(article_left, article_right) } + #[test] + fn parsing_article_no_number_return_number_error() { + let html = Html::parse_fragment(&get_test_article_2()); + let selector = Selector::parse("[id^=art_]").unwrap(); + let element_ref = html.select(&selector).next().unwrap(); + let article_left = ArticleParser::parse(element_ref).unwrap_err(); + let article_right = ArticleParserError::ErrorNumber; + assert_eq!(article_left, article_right) + } + #[test] + fn parsing_article_no_title_return_title_error() { + let html = Html::parse_fragment(&get_test_article_3()); + let selector = Selector::parse("[id^=art_]").unwrap(); + let element_ref = html.select(&selector).next().unwrap(); + let article_left = ArticleParser::parse(element_ref).unwrap_err(); + let article_right = ArticleParserError::TitleError; + assert_eq!(article_left, article_right) + } + #[test] + fn parsing_article_no_id_return_no_id_attribute_error() { + let html = Html::parse_fragment(&get_test_article_4()); + // Change the selector for tests pruposes since we cannot + // use id + let selector = Selector::parse(".eli-subdivision").unwrap(); + let element_ref = html.select(&selector).next().unwrap(); + let article_left = ArticleParser::parse(element_ref).unwrap_err(); + let article_right = ArticleParserError::NoIdAttribute; + assert_eq!(article_left, article_right) + } }