aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorJokler <jokler.contact@gmail.com>2018-04-11 18:52:33 +0200
committerJokler <jokler.contact@gmail.com>2018-04-11 18:52:33 +0200
commitf8ac65f479a5e877363eccdad80ade7f33a75ba4 (patch)
tree296a92c509e4a6105799092f3f82764742f91400 /src
parent5c45046794e3c93f875d69193bb12d6608d45a8c (diff)
downloadfrippy-f8ac65f479a5e877363eccdad80ade7f33a75ba4.tar.gz
frippy-f8ac65f479a5e877363eccdad80ade7f33a75ba4.zip
Take better title instead of first found
Diffstat (limited to 'src')
-rw-r--r--src/plugins/url.rs43
1 files changed, 18 insertions, 25 deletions
diff --git a/src/plugins/url.rs b/src/plugins/url.rs
index aba5b0d..5999d98 100644
--- a/src/plugins/url.rs
+++ b/src/plugins/url.rs
@@ -72,38 +72,29 @@ impl Title {
}
// TODO Improve logic
- fn is_useful(&self, url: &str) -> bool {
+ pub fn usefulness(&self, url: &str) -> usize {
+ let mut usefulness = 0;
for word in WORD_RE.find_iter(&self.0) {
let w = word.as_str().to_lowercase();
if w.len() > 2 && !url.to_lowercase().contains(&w) {
- return true;
+ usefulness += 1;
}
}
- return false;
- }
-
- fn into_useful_title<'a>(self, url: &str) -> Result<Self, UrlError> {
- if self.is_useful(url) {
- Ok(self)
- } else {
- Err(ErrorKind::UselessTitle)?
- }
+ usefulness
}
fn clean_up(self) -> Self {
self.0.trim().replace('\n', "|").replace('\r', "|").into()
}
- pub fn find_useful_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> {
+ pub fn find_clean_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> {
Self::find_ogtitle(body)
- .and_then(|t| t.into_useful_title(url))
.map(|t| t.clean_up())
}
- pub fn find_useful_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> {
+ pub fn find_clean_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> {
Self::find_title(body)
- .and_then(|t| t.into_useful_title(url))
.map(|t| t.clean_up())
}
}
@@ -127,14 +118,20 @@ impl UrlTitles {
.max_kib(self.max_kib);
let body = url.request().context(ErrorKind::Download)?;
- let title = match Title::find_useful_ogtitle(&body, url.as_str()) {
- Ok(t) => t,
- Err(e) => match e.kind() {
- ErrorKind::MissingTitle | ErrorKind::UselessTitle => {
- Title::find_useful_title(&body, url.as_str())?
+ let title = Title::find_clean_title(&body, url.as_str());
+ let og_title = Title::find_clean_ogtitle(&body, url.as_str());
+
+ let title = match (title, og_title) {
+ (Ok(title), Ok(og_title)) => {
+ if title.usefulness(url.as_str()) > og_title.usefulness(url.as_str()) {
+ title
+ } else {
+ og_title
}
- _ => Err(e)?,
},
+ (Ok(title), _) => title,
+ (_, Ok(title)) => title,
+ (Err(e), _) => Err(e)?,
};
Ok(title.into())
@@ -197,10 +194,6 @@ pub mod error {
#[fail(display = "No title was found")]
MissingTitle,
- /// Useless title error
- #[fail(display = "Title was not helpful")]
- UselessTitle,
-
/// Html decoding error
#[fail(display = "Failed to decode Html characters")]
HtmlDecoding,