Handle umlauts and eszetts when looking for definitions master
authorJacob <dev@jacobcasper.com>
Sat, 19 Jul 2025 01:04:03 +0000 (20:04 -0500)
committerJacob <dev@jacobcasper.com>
Sat, 19 Jul 2025 01:04:03 +0000 (20:04 -0500)
src/main.rs

index fadd4160a97ffa0784140f56c38fd40b16b1166d..c3819f1f4cf9eead729a9cd1ea3862149f80e706 100644 (file)
@@ -7,19 +7,25 @@ fn main() {
     let div_selector =
         scraper::Selector::parse("div#block-numero-wordoftheday a.scene__title-link")
             .expect("failed to parse selector");
-    let mut div = doc
+    let div = doc
         .select(&div_selector)
         .next()
         .expect("no div found")
         .inner_html();
     // For some reason browser output doesn't contain this but text version does, a soft-hyphen
     // char for pronunciation.
-    div.retain(|c| c != '\u{ad}');
-    println!("Wort: {div}");
-    let def_body = reqwest::blocking::get(format!("https://duden.de/rechtschreibung/{div}"))
-        .expect("request failed")
-        .text()
-        .expect("no body");
+    let word = div.replace('\u{ad}', "");
+    println!("Wort: {word}, ({div})");
+    let url_version = word
+        .replace("ä", "ae")
+        .replace("ö", "oe")
+        .replace("ü", "ue")
+        .replace("ß", "ss");
+    let def_body =
+        reqwest::blocking::get(format!("https://duden.de/rechtschreibung/{url_version}"))
+            .expect("request failed")
+            .text()
+            .expect("no body");
     let def_doc = scraper::Html::parse_document(&def_body);
     let single_def_div_selector =
         scraper::Selector::parse("div#bedeutung p").expect("failed to parse selector");