From a3798584a47959cec9b26eb6f2663495572586a3 Mon Sep 17 00:00:00 2001 From: Jacob Date: Fri, 18 Jul 2025 20:04:03 -0500 Subject: [PATCH] Handle umlauts and eszetts when looking for definitions --- src/main.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main.rs b/src/main.rs index fadd416..c3819f1 100644 --- a/src/main.rs +++ b/src/main.rs @@ -7,19 +7,25 @@ fn main() { let div_selector = scraper::Selector::parse("div#block-numero-wordoftheday a.scene__title-link") .expect("failed to parse selector"); - let mut div = doc + let div = doc .select(&div_selector) .next() .expect("no div found") .inner_html(); // For some reason browser output doesn't contain this but text version does, a soft-hyphen // char for pronunciation. - div.retain(|c| c != '\u{ad}'); - println!("Wort: {div}"); - let def_body = reqwest::blocking::get(format!("https://duden.de/rechtschreibung/{div}")) - .expect("request failed") - .text() - .expect("no body"); + let word = div.replace('\u{ad}', ""); + println!("Wort: {word}, ({div})"); + let url_version = word + .replace("ä", "ae") + .replace("ö", "oe") + .replace("ü", "ue") + .replace("ß", "ss"); + let def_body = + reqwest::blocking::get(format!("https://duden.de/rechtschreibung/{url_version}")) + .expect("request failed") + .text() + .expect("no body"); let def_doc = scraper::Html::parse_document(&def_body); let single_def_div_selector = scraper::Selector::parse("div#bedeutung p").expect("failed to parse selector"); -- 2.30.2