diff options
| author | Jokler <jokler.contact@gmail.com> | 2018-04-07 18:19:24 +0200 |
|---|---|---|
| committer | Jokler <jokler.contact@gmail.com> | 2018-04-07 18:19:24 +0200 |
| commit | 5c45046794e3c93f875d69193bb12d6608d45a8c (patch) | |
| tree | a446511384f7ba7eb0dba2c70970a955cbfe1403 /src/plugins | |
| parent | 4624f7e153769fa97401f5e906c6d17cf1127083 (diff) | |
| download | frippy-5c45046794e3c93f875d69193bb12d6608d45a8c.tar.gz frippy-5c45046794e3c93f875d69193bb12d6608d45a8c.zip | |
Add usefulness rating function to url titles
Diffstat (limited to 'src/plugins')
| -rw-r--r-- | src/plugins/currency.rs | 2 | ||||
| -rw-r--r-- | src/plugins/emoji.rs | 2 | ||||
| -rw-r--r-- | src/plugins/factoids/database.rs | 10 | ||||
| -rw-r--r-- | src/plugins/factoids/mod.rs | 24 | ||||
| -rw-r--r-- | src/plugins/factoids/utils.rs | 7 | ||||
| -rw-r--r-- | src/plugins/help.rs | 2 | ||||
| -rw-r--r-- | src/plugins/keepnick.rs | 2 | ||||
| -rw-r--r-- | src/plugins/mod.rs | 10 | ||||
| -rw-r--r-- | src/plugins/sed.rs | 10 | ||||
| -rw-r--r-- | src/plugins/tell/database.rs | 8 | ||||
| -rw-r--r-- | src/plugins/tell/mod.rs | 15 | ||||
| -rw-r--r-- | src/plugins/url.rs | 148 |
12 files changed, 149 insertions, 91 deletions
diff --git a/src/plugins/currency.rs b/src/plugins/currency.rs index 99f46c8..e7a802d 100644 --- a/src/plugins/currency.rs +++ b/src/plugins/currency.rs @@ -13,8 +13,8 @@ use self::serde_json::Value; use plugin::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::ResultExt; #[derive(PluginName, Default, Debug)] diff --git a/src/plugins/emoji.rs b/src/plugins/emoji.rs index f1d9376..4ec7265 100644 --- a/src/plugins/emoji.rs +++ b/src/plugins/emoji.rs @@ -6,8 +6,8 @@ use irc::client::prelude::*; use plugin::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::Fail; use failure::ResultExt; diff --git a/src/plugins/factoids/database.rs b/src/plugins/factoids/database.rs index 7788d7c..321931f 100644 --- a/src/plugins/factoids/database.rs +++ b/src/plugins/factoids/database.rs @@ -1,20 +1,20 @@ #[cfg(feature = "mysql")] extern crate dotenv; +use std::collections::HashMap; #[cfg(feature = "mysql")] use std::sync::Arc; -use std::collections::HashMap; #[cfg(feature = "mysql")] +use diesel::mysql::MysqlConnection; +#[cfg(feature = "mysql")] use diesel::prelude::*; #[cfg(feature = "mysql")] -use diesel::mysql::MysqlConnection; +use failure::ResultExt; #[cfg(feature = "mysql")] use r2d2::Pool; #[cfg(feature = "mysql")] use r2d2_diesel::ConnectionManager; -#[cfg(feature = "mysql")] -use failure::ResultExt; use chrono::NaiveDateTime; @@ -124,8 +124,8 @@ impl Database for Arc<Pool<ConnectionManager<MysqlConnection>>> { } fn delete_factoid(&mut self, name: &str, idx: i32) -> Result<(), FactoidsError> { - use diesel; use self::factoids::columns; + use diesel; let conn = &*self.get().context(ErrorKind::NoConnection)?; match diesel::delete( diff --git a/src/plugins/factoids/mod.rs b/src/plugins/factoids/mod.rs index 10e512a..ba3ee8a 100644 --- a/src/plugins/factoids/mod.rs +++ b/src/plugins/factoids/mod.rs @@ -1,13 +1,13 @@ extern crate rlua; -use std::fmt; -use std::str::FromStr; use self::rlua::prelude::*; -use irc::client::prelude::*; use antidote::RwLock; +use irc::client::prelude::*; +use std::fmt; +use std::str::FromStr; -use time; use chrono::NaiveDateTime; +use time; use plugin::*; pub mod database; @@ -15,11 +15,12 @@ use self::database::Database; mod utils; use self::utils::*; +use utils::Url; -use failure::ResultExt; +use self::error::*; use error::ErrorKind as FrippyErrorKind; use error::FrippyError; -use self::error::*; +use failure::ResultExt; static LUA_SANDBOX: &'static str = include_str!("sandbox.lua"); @@ -52,7 +53,8 @@ impl<T: Database> Factoids<T> { created: NaiveDateTime::from_timestamp(tm.sec, 0u32), }; - Ok(self.factoids.write() + Ok(self.factoids + .write() .insert_factoid(&factoid) .map(|()| "Successfully added!")?) } @@ -75,7 +77,10 @@ impl<T: Database> Factoids<T> { let name = command.tokens.remove(0); let url = &command.tokens[0]; - let content = ::utils::download(url, Some(1024)).context(ErrorKind::Download)?; + let content = Url::from(url.as_ref()) + .max_kib(1024) + .request() + .context(ErrorKind::Download)?; Ok(self.create_factoid(&name, &content, &command.source)?) } @@ -118,7 +123,8 @@ impl<T: Database> Factoids<T> { } }; - let factoid = self.factoids.read() + let factoid = self.factoids + .read() .get_factoid(name, idx) .context(ErrorKind::NotFound)?; diff --git a/src/plugins/factoids/utils.rs b/src/plugins/factoids/utils.rs index 70ac8a7..fd08da1 100644 --- a/src/plugins/factoids/utils.rs +++ b/src/plugins/factoids/utils.rs @@ -3,17 +3,18 @@ extern crate reqwest; use std::thread; use std::time::Duration; -use utils; use super::rlua::prelude::*; +use utils::Url; use self::LuaError::RuntimeError; pub fn download(_: &Lua, url: String) -> Result<String, LuaError> { - match utils::download(&url, Some(1024)) { + let url = Url::from(url).max_kib(1024); + match url.request() { Ok(v) => Ok(v), Err(e) => Err(RuntimeError(format!( "Failed to download {} - {}", - url, + url.as_str(), e.to_string() ))), } diff --git a/src/plugins/help.rs b/src/plugins/help.rs index 7e3658d..9eb152a 100644 --- a/src/plugins/help.rs +++ b/src/plugins/help.rs @@ -2,8 +2,8 @@ use irc::client::prelude::*; use plugin::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::ResultExt; #[derive(PluginName, Default, Debug)] diff --git a/src/plugins/keepnick.rs b/src/plugins/keepnick.rs index 58ac167..aa2e485 100644 --- a/src/plugins/keepnick.rs +++ b/src/plugins/keepnick.rs @@ -2,8 +2,8 @@ use irc::client::prelude::*; use plugin::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::ResultExt; #[derive(PluginName, Default, Debug)] diff --git a/src/plugins/mod.rs b/src/plugins/mod.rs index 6aed95e..a8fc818 100644 --- a/src/plugins/mod.rs +++ b/src/plugins/mod.rs @@ -1,9 +1,9 @@ //! Collection of plugins included -pub mod help; -pub mod url; -pub mod sed; -pub mod emoji; -pub mod tell; pub mod currency; +pub mod emoji; pub mod factoids; +pub mod help; pub mod keepnick; +pub mod sed; +pub mod tell; +pub mod url; diff --git a/src/plugins/sed.rs b/src/plugins/sed.rs index 8ccb2f7..f766809 100644 --- a/src/plugins/sed.rs +++ b/src/plugins/sed.rs @@ -1,17 +1,17 @@ -use std::collections::HashMap; +use antidote::RwLock; use circular_queue::CircularQueue; use regex::{Regex, RegexBuilder}; -use antidote::RwLock; +use std::collections::HashMap; use irc::client::prelude::*; use plugin::*; -use failure::Fail; -use failure::ResultExt; +use self::error::*; use error::ErrorKind as FrippyErrorKind; use error::FrippyError; -use self::error::*; +use failure::Fail; +use failure::ResultExt; lazy_static! { static ref RE: Regex = Regex::new(r"^s/((?:\\/|[^/])+)/((?:\\/|[^/])*)/(?:(\w+))?\s*$").unwrap(); diff --git a/src/plugins/tell/database.rs b/src/plugins/tell/database.rs index 42c0d88..522df5a 100644 --- a/src/plugins/tell/database.rs +++ b/src/plugins/tell/database.rs @@ -1,15 +1,15 @@ #[cfg(feature = "mysql")] extern crate dotenv; +use std::collections::HashMap; #[cfg(feature = "mysql")] use std::sync::Arc; -use std::collections::HashMap; #[cfg(feature = "mysql")] -use diesel::prelude::*; -#[cfg(feature = "mysql")] use diesel::mysql::MysqlConnection; #[cfg(feature = "mysql")] +use diesel::prelude::*; +#[cfg(feature = "mysql")] use r2d2::Pool; #[cfg(feature = "mysql")] use r2d2_diesel::ConnectionManager; @@ -138,8 +138,8 @@ impl Database for Arc<Pool<ConnectionManager<MysqlConnection>>> { } fn delete_tells(&mut self, receiver: &str) -> Result<(), TellError> { - use diesel; use self::tells::columns; + use diesel; let conn = &*self.get().context(ErrorKind::NoConnection)?; diesel::delete(tells::table.filter(columns::receiver.eq(receiver))) diff --git a/src/plugins/tell/mod.rs b/src/plugins/tell/mod.rs index 42032be..c681d43 100644 --- a/src/plugins/tell/mod.rs +++ b/src/plugins/tell/mod.rs @@ -1,18 +1,18 @@ -use irc::client::prelude::*; use antidote::RwLock; +use irc::client::prelude::*; -use time; -use std::time::Duration; use chrono::NaiveDateTime; use humantime::format_duration; +use std::time::Duration; +use time; use plugin::*; -use failure::Fail; -use failure::ResultExt; +use self::error::*; use error::ErrorKind as FrippyErrorKind; use error::FrippyError; -use self::error::*; +use failure::Fail; +use failure::ResultExt; pub mod database; use self::database::Database; @@ -98,7 +98,8 @@ impl<T: Database> Tell<T> { } fn on_namelist(&self, client: &IrcClient, channel: &str) -> Result<(), FrippyError> { - let receivers = self.tells.read() + let receivers = self.tells + .read() .get_receivers() .context(FrippyErrorKind::Tell)?; diff --git a/src/plugins/url.rs b/src/plugins/url.rs index ec98900..aba5b0d 100644 --- a/src/plugins/url.rs +++ b/src/plugins/url.rs @@ -5,46 +5,49 @@ use irc::client::prelude::*; use regex::Regex; use plugin::*; -use utils; +use utils::Url; use self::error::*; -use error::FrippyError; use error::ErrorKind as FrippyErrorKind; +use error::FrippyError; use failure::Fail; use failure::ResultExt; lazy_static! { - static ref RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap(); + static ref URL_RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap(); + static ref WORD_RE: Regex = Regex::new(r"(\w+)").unwrap(); } #[derive(PluginName, Debug)] -pub struct Url { +pub struct UrlTitles { max_kib: usize, } -impl Url { - /// If a file is larger than `max_kib` KiB the download is stopped - pub fn new(max_kib: usize) -> Url { - Url { max_kib: max_kib } - } - - fn grep_url(&self, msg: &str) -> Option<String> { - let captures = RE.captures(msg)?; - debug!("Url captures: {:?}", captures); +#[derive(Clone, Debug)] +struct Title(String); - Some(captures.get(2)?.as_str().to_owned()) +impl From<String> for Title { + fn from(title: String) -> Self { + Title(title) } +} +impl From<Title> for String { + fn from(title: Title) -> Self { + title.0 + } +} - fn get_ogtitle<'a>(&self, body: &str) -> Result<String, UrlError> { - let title = body.find("property=\"og:title\"") +impl Title { + fn find_by_delimiters(body: &str, delimiters: [&str; 3]) -> Result<Self, UrlError> { + let title = body.find(delimiters[0]) .map(|tag| { body[tag..] - .find("content=\"") - .map(|offset| tag + offset + 9) + .find(delimiters[1]) + .map(|offset| tag + offset + delimiters[1].len()) .map(|start| { body[start..] - .find("\"") + .find(delimiters[2]) .map(|offset| start + offset) .map(|end| &body[start..end]) }) @@ -52,53 +55,96 @@ impl Url { .and_then(|s| s.and_then(|s| s)) .ok_or(ErrorKind::MissingTitle)?; - debug!("Title: {:?}", title); + debug!("delimiters: {:?}", delimiters); + debug!("title: {:?}", title); - htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into()) + htmlescape::decode_html(title) + .map(|t| t.into()) + .map_err(|_| ErrorKind::HtmlDecoding.into()) } - fn get_title<'a>(&self, body: &str) -> Result<String, UrlError> { - let title = body.find("<title") - .map(|tag| { - body[tag..] - .find('>') - .map(|offset| tag + offset + 1) - .map(|start| { - body[start..] - .find("</title>") - .map(|offset| start + offset) - .map(|end| &body[start..end]) - }) - }) - .and_then(|s| s.and_then(|s| s)) - .ok_or(ErrorKind::MissingTitle)?; + fn find_ogtitle<'a>(body: &str) -> Result<Self, UrlError> { + Self::find_by_delimiters(body, ["property=\"og:title\"", "content=\"", "\""]) + } - debug!("Title: {:?}", title); + fn find_title<'a>(body: &str) -> Result<Self, UrlError> { + Self::find_by_delimiters(body, ["<title", ">", "</title>"]) + } - htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into()) + // TODO Improve logic + fn is_useful(&self, url: &str) -> bool { + for word in WORD_RE.find_iter(&self.0) { + let w = word.as_str().to_lowercase(); + if w.len() > 2 && !url.to_lowercase().contains(&w) { + return true; + } + } + + return false; + } + + fn into_useful_title<'a>(self, url: &str) -> Result<Self, UrlError> { + if self.is_useful(url) { + Ok(self) + } else { + Err(ErrorKind::UselessTitle)? + } + } + + fn clean_up(self) -> Self { + self.0.trim().replace('\n', "|").replace('\r', "|").into() + } + + pub fn find_useful_ogtitle<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + Self::find_ogtitle(body) + .and_then(|t| t.into_useful_title(url)) + .map(|t| t.clean_up()) + } + + pub fn find_useful_title<'a>(body: &str, url: &str) -> Result<Self, UrlError> { + Self::find_title(body) + .and_then(|t| t.into_useful_title(url)) + .map(|t| t.clean_up()) + } +} + +impl UrlTitles { + /// If a file is larger than `max_kib` KiB the download is stopped + pub fn new(max_kib: usize) -> Self { + UrlTitles { max_kib: max_kib } + } + + fn grep_url<'a>(&self, msg: &'a str) -> Option<Url<'a>> { + let captures = URL_RE.captures(msg)?; + debug!("Url captures: {:?}", captures); + + Some(captures.get(2)?.as_str().into()) } fn url(&self, text: &str) -> Result<String, UrlError> { - let url = self.grep_url(text).ok_or(ErrorKind::MissingUrl)?; - let body = utils::download(&url, Some(self.max_kib)).context(ErrorKind::Download)?; + let url = self.grep_url(text) + .ok_or(ErrorKind::MissingUrl)? + .max_kib(self.max_kib); + let body = url.request().context(ErrorKind::Download)?; - let title = match self.get_ogtitle(&body) { + let title = match Title::find_useful_ogtitle(&body, url.as_str()) { Ok(t) => t, - Err(e) => if e.kind() == ErrorKind::MissingTitle { - self.get_title(&body)? - } else { - Err(e)? - } + Err(e) => match e.kind() { + ErrorKind::MissingTitle | ErrorKind::UselessTitle => { + Title::find_useful_title(&body, url.as_str())? + } + _ => Err(e)?, + }, }; - Ok(title.trim().replace('\n', "|").replace('\r', "|")) + Ok(title.into()) } } -impl Plugin for Url { +impl Plugin for UrlTitles { fn execute(&self, _: &IrcClient, message: &Message) -> ExecutionStatus { match message.command { - Command::PRIVMSG(_, ref msg) => if RE.is_match(msg) { + Command::PRIVMSG(_, ref msg) => if URL_RE.is_match(msg) { ExecutionStatus::RequiresThread } else { ExecutionStatus::Done @@ -151,6 +197,10 @@ pub mod error { #[fail(display = "No title was found")] MissingTitle, + /// Useless title error + #[fail(display = "Title was not helpful")] + UselessTitle, + /// Html decoding error #[fail(display = "Failed to decode Html characters")] HtmlDecoding, |
