diff options
| author | Jokler <jokler.contact@gmail.com> | 2018-04-18 05:01:19 +0200 |
|---|---|---|
| committer | Jokler <jokler.contact@gmail.com> | 2018-04-18 05:01:19 +0200 |
| commit | fd2ea42d6702b1d5a2e8a0c8a0072c2b3e376a20 (patch) | |
| tree | 470fbba8fd4b8b081111b25b80627cdd6116d3f2 | |
| parent | f8ac65f479a5e877363eccdad80ade7f33a75ba4 (diff) | |
| download | frippy-fd2ea42d6702b1d5a2e8a0c8a0072c2b3e376a20.tar.gz frippy-fd2ea42d6702b1d5a2e8a0c8a0072c2b3e376a20.zip | |
Drop useless titles again
| -rw-r--r-- | src/plugins/url.rs | 34 |
1 files changed, 23 insertions, 11 deletions
diff --git a/src/plugins/url.rs b/src/plugins/url.rs index 5999d98..a54cabf 100644 --- a/src/plugins/url.rs +++ b/src/plugins/url.rs @@ -24,11 +24,11 @@ pub struct UrlTitles { } #[derive(Clone, Debug)] -struct Title(String); +struct Title(String, Option<usize>); impl From<String> for Title { fn from(title: String) -> Self { - Title(title) + Title(title, None) } } @@ -72,7 +72,7 @@ impl Title { } // TODO Improve logic - pub fn usefulness(&self, url: &str) -> usize { + fn get_usefulness(self, url: &str) -> Self { let mut usefulness = 0; for word in WORD_RE.find_iter(&self.0) { let w = word.as_str().to_lowercase(); @@ -81,21 +81,25 @@ impl Title { } } - usefulness + Title(self.0, Some(usefulness)) + } + + pub fn usefulness(&self) -> usize { + self.1.expect("Usefulness should be calculated already") } fn clean_up(self) -> Self { - self.0.trim().replace('\n', "|").replace('\r', "|").into() + Title(self.0.trim().replace('\n', "|").replace('\r', "|"), self.1) } pub fn find_clean_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> { - Self::find_ogtitle(body) - .map(|t| t.clean_up()) + let title = Self::find_ogtitle(body)?; + Ok(title.get_usefulness(url).clean_up()) } pub fn find_clean_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> { - Self::find_title(body) - .map(|t| t.clean_up()) + let title = Self::find_title(body)?; + Ok(title.get_usefulness(url).clean_up()) } } @@ -123,7 +127,7 @@ impl UrlTitles { let title = match (title, og_title) { (Ok(title), Ok(og_title)) => { - if title.usefulness(url.as_str()) > og_title.usefulness(url.as_str()) { + if title.usefulness() > og_title.usefulness() { title } else { og_title @@ -134,7 +138,11 @@ impl UrlTitles { (Err(e), _) => Err(e)?, }; - Ok(title.into()) + if title.usefulness() > 1 { + Ok(title.into()) + } else { + Err(ErrorKind::UselessTitle.into()) + } } } @@ -194,6 +202,10 @@ pub mod error { #[fail(display = "No title was found")] MissingTitle, + /// Useless title error + #[fail(display = "The titles found were not useful enough")] + UselessTitle, + /// Html decoding error #[fail(display = "Failed to decode Html characters")] HtmlDecoding, |
