diff options
| author | Jokler <jokler.contact@gmail.com> | 2018-04-11 18:52:33 +0200 |
|---|---|---|
| committer | Jokler <jokler.contact@gmail.com> | 2018-04-11 18:52:33 +0200 |
| commit | f8ac65f479a5e877363eccdad80ade7f33a75ba4 (patch) | |
| tree | 296a92c509e4a6105799092f3f82764742f91400 /src | |
| parent | 5c45046794e3c93f875d69193bb12d6608d45a8c (diff) | |
| download | frippy-f8ac65f479a5e877363eccdad80ade7f33a75ba4.tar.gz frippy-f8ac65f479a5e877363eccdad80ade7f33a75ba4.zip | |
Take better title instead of first found
Diffstat (limited to 'src')
| -rw-r--r-- | src/plugins/url.rs | 43 |
1 files changed, 18 insertions, 25 deletions
diff --git a/src/plugins/url.rs b/src/plugins/url.rs index aba5b0d..5999d98 100644 --- a/src/plugins/url.rs +++ b/src/plugins/url.rs @@ -72,38 +72,29 @@ impl Title { } // TODO Improve logic - fn is_useful(&self, url: &str) -> bool { + pub fn usefulness(&self, url: &str) -> usize { + let mut usefulness = 0; for word in WORD_RE.find_iter(&self.0) { let w = word.as_str().to_lowercase(); if w.len() > 2 && !url.to_lowercase().contains(&w) { - return true; + usefulness += 1; } } - return false; - } - - fn into_useful_title<'a>(self, url: &str) -> Result<Self, UrlError> { - if self.is_useful(url) { - Ok(self) - } else { - Err(ErrorKind::UselessTitle)? - } + usefulness } fn clean_up(self) -> Self { self.0.trim().replace('\n', "|").replace('\r', "|").into() } - pub fn find_useful_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + pub fn find_clean_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> { Self::find_ogtitle(body) - .and_then(|t| t.into_useful_title(url)) .map(|t| t.clean_up()) } - pub fn find_useful_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + pub fn find_clean_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> { Self::find_title(body) - .and_then(|t| t.into_useful_title(url)) .map(|t| t.clean_up()) } } @@ -127,14 +118,20 @@ impl UrlTitles { .max_kib(self.max_kib); let body = url.request().context(ErrorKind::Download)?; - let title = match Title::find_useful_ogtitle(&body, url.as_str()) { - Ok(t) => t, - Err(e) => match e.kind() { - ErrorKind::MissingTitle | ErrorKind::UselessTitle => { - Title::find_useful_title(&body, url.as_str())? + let title = Title::find_clean_title(&body, url.as_str()); + let og_title = Title::find_clean_ogtitle(&body, url.as_str()); + + let title = match (title, og_title) { + (Ok(title), Ok(og_title)) => { + if title.usefulness(url.as_str()) > og_title.usefulness(url.as_str()) { + title + } else { + og_title } - _ => Err(e)?, }, + (Ok(title), _) => title, + (_, Ok(title)) => title, + (Err(e), _) => Err(e)?, }; Ok(title.into()) @@ -197,10 +194,6 @@ pub mod error { #[fail(display = "No title was found")] MissingTitle, - /// Useless title error - #[fail(display = "Title was not helpful")] - UselessTitle, - /// Html decoding error #[fail(display = "Failed to decode Html characters")] HtmlDecoding, |
