diff options
Diffstat (limited to 'src/plugins/url.rs')
| -rw-r--r-- | src/plugins/url.rs | 148 |
1 files changed, 99 insertions, 49 deletions
diff --git a/src/plugins/url.rs b/src/plugins/url.rs index ec98900..aba5b0d 100644 --- a/src/plugins/url.rs +++ b/src/plugins/url.rs @@ -5,46 +5,49 @@ use irc::client::prelude::*; use regex::Regex; use plugin::*; -use utils; +use utils::Url; use self::error::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::Fail; use failure::ResultExt; lazy_static! { - static ref RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap(); + static ref URL_RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap(); + static ref WORD_RE: Regex = Regex::new(r"(\w+)").unwrap(); } #[derive(PluginName, Debug)] -pub struct Url { +pub struct UrlTitles { max_kib: usize, } -impl Url { - /// If a file is larger than `max_kib` KiB the download is stopped - pub fn new(max_kib: usize) -> Url { - Url { max_kib: max_kib } - } - - fn grep_url(&self, msg: &str) -> Option<String> { - let captures = RE.captures(msg)?; - debug!("Url captures: {:?}", captures); +#[derive(Clone, Debug)] +struct Title(String); - Some(captures.get(2)?.as_str().to_owned()) +impl From<String> for Title { + fn from(title: String) -> Self { + Title(title) } +} +impl From<Title> for String { + fn from(title: Title) -> Self { + title.0 + } +} - fn get_ogtitle<'a>(&self, body: &str) -> Result<String, UrlError> { - let title = body.find("property=\"og:title\"") +impl Title { + fn find_by_delimiters(body: &str, delimiters: [&str; 3]) -> Result<Self, UrlError> { + let title = body.find(delimiters[0]) .map(|tag| { body[tag..] - .find("content=\"") - .map(|offset| tag + offset + 9) + .find(delimiters[1]) + .map(|offset| tag + offset + delimiters[1].len()) .map(|start| { body[start..] - .find("\"") + .find(delimiters[2]) .map(|offset| start + offset) .map(|end| &body[start..end]) }) @@ -52,53 +55,96 @@ impl Url { .and_then(|s| s.and_then(|s| s)) .ok_or(ErrorKind::MissingTitle)?; - debug!("Title: {:?}", title); + debug!("delimiters: {:?}", delimiters); + debug!("title: {:?}", title); - htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into()) + htmlescape::decode_html(title) + .map(|t| t.into()) + .map_err(|_| ErrorKind::HtmlDecoding.into()) } - fn get_title<'a>(&self, body: &str) -> Result<String, UrlError> { - let title = body.find("<title") - .map(|tag| { - body[tag..] - .find('>') - .map(|offset| tag + offset + 1) - .map(|start| { - body[start..] - .find("</title>") - .map(|offset| start + offset) - .map(|end| &body[start..end]) - }) - }) - .and_then(|s| s.and_then(|s| s)) - .ok_or(ErrorKind::MissingTitle)?; + fn find_ogtitle<'a>(body: &str) -> Result<Self, UrlError> { + Self::find_by_delimiters(body, ["property=\"og:title\"", "content=\"", "\""]) + } - debug!("Title: {:?}", title); + fn find_title<'a>(body: &str) -> Result<Self, UrlError> { + Self::find_by_delimiters(body, ["<title", ">", "</title>"]) + } - htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into()) + // TODO Improve logic + fn is_useful(&self, url: &str) -> bool { + for word in WORD_RE.find_iter(&self.0) { + let w = word.as_str().to_lowercase(); + if w.len() > 2 && !url.to_lowercase().contains(&w) { + return true; + } + } + + return false; + } + + fn into_useful_title<'a>(self, url: &str) -> Result<Self, UrlError> { + if self.is_useful(url) { + Ok(self) + } else { + Err(ErrorKind::UselessTitle)? + } + } + + fn clean_up(self) -> Self { + self.0.trim().replace('\n', "|").replace('\r', "|").into() + } + + pub fn find_useful_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + Self::find_ogtitle(body) + .and_then(|t| t.into_useful_title(url)) + .map(|t| t.clean_up()) + } + + pub fn find_useful_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + Self::find_title(body) + .and_then(|t| t.into_useful_title(url)) + .map(|t| t.clean_up()) + } +} + +impl UrlTitles { + /// If a file is larger than `max_kib` KiB the download is stopped + pub fn new(max_kib: usize) -> Self { + UrlTitles { max_kib: max_kib } + } + + fn grep_url<'a>(&self, msg: &'a str) -> Option<Url<'a>> { + let captures = URL_RE.captures(msg)?; + debug!("Url captures: {:?}", captures); + + Some(captures.get(2)?.as_str().into()) } fn url(&self, text: &str) -> Result<String, UrlError> { - let url = self.grep_url(text).ok_or(ErrorKind::MissingUrl)?; - let body = utils::download(&url, Some(self.max_kib)).context(ErrorKind::Download)?; + let url = self.grep_url(text) + .ok_or(ErrorKind::MissingUrl)? + .max_kib(self.max_kib); + let body = url.request().context(ErrorKind::Download)?; - let title = match self.get_ogtitle(&body) { + let title = match Title::find_useful_ogtitle(&body, url.as_str()) { Ok(t) => t, - Err(e) => if e.kind() == ErrorKind::MissingTitle { - self.get_title(&body)? - } else { - Err(e)? - } + Err(e) => match e.kind() { + ErrorKind::MissingTitle | ErrorKind::UselessTitle => { + Title::find_useful_title(&body, url.as_str())? + } + _ => Err(e)?, + }, }; - Ok(title.trim().replace('\n', "|").replace('\r', "|")) + Ok(title.into()) } } -impl Plugin for Url { +impl Plugin for UrlTitles { fn execute(&self, _: &IrcClient, message: &Message) -> ExecutionStatus { match message.command { - Command::PRIVMSG(_, ref msg) => if RE.is_match(msg) { + Command::PRIVMSG(_, ref msg) => if URL_RE.is_match(msg) { ExecutionStatus::RequiresThread } else { ExecutionStatus::Done @@ -151,6 +197,10 @@ pub mod error { #[fail(display = "No title was found")] MissingTitle, + /// Useless title error + #[fail(display = "Title was not helpful")] + UselessTitle, + /// Html decoding error #[fail(display = "Failed to decode Html characters")] HtmlDecoding, |
