summaryrefslogtreecommitdiffstats
path: root/src/plugins/url.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/url.rs')
-rw-r--r--src/plugins/url.rs199
1 files changed, 151 insertions, 48 deletions
diff --git a/src/plugins/url.rs b/src/plugins/url.rs
index bff840f..a884c66 100644
--- a/src/plugins/url.rs
+++ b/src/plugins/url.rs
@@ -1,50 +1,59 @@
extern crate htmlescape;
-extern crate regex;
+
+use std::marker::PhantomData;
+use std::time::Duration;
use irc::client::prelude::*;
-use self::regex::Regex;
+use regex::Regex;
use plugin::*;
-use utils;
+use utils::Url;
+use FrippyClient;
use self::error::*;
-use error::FrippyError;
use error::ErrorKind as FrippyErrorKind;
+use error::FrippyError;
use failure::Fail;
use failure::ResultExt;
lazy_static! {
- static ref RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap();
+ static ref URL_RE: Regex = Regex::new(r"(^|\s)(https?://\S+)").unwrap();
+ static ref WORD_RE: Regex = Regex::new(r"(\w+)").unwrap();
}
#[derive(PluginName, Debug)]
-pub struct Url {
+pub struct UrlTitles<C> {
max_kib: usize,
+ phantom: PhantomData<C>,
}
-impl Url {
- /// If a file is larger than `max_kib` KiB the download is stopped
- pub fn new(max_kib: usize) -> Url {
- Url { max_kib: max_kib }
- }
+#[derive(Clone, Debug)]
+struct Title(String, Option<usize>);
- fn grep_url(&self, msg: &str) -> Option<String> {
- let captures = RE.captures(msg)?;
- debug!("Url captures: {:?}", captures);
+impl From<String> for Title {
+ fn from(title: String) -> Self {
+ Title(title, None)
+ }
+}
- Some(captures.get(2)?.as_str().to_owned())
+impl From<Title> for String {
+ fn from(title: Title) -> Self {
+ title.0
}
+}
- fn get_title<'a>(&self, body: &str) -> Result<String, UrlError> {
- let title = body.find("<title")
+impl Title {
+ fn find_by_delimiters(body: &str, delimiters: [&str; 3]) -> Result<Self, UrlError> {
+ let title = body
+ .find(delimiters[0])
.map(|tag| {
body[tag..]
- .find('>')
- .map(|offset| tag + offset + 1)
+ .find(delimiters[1])
+ .map(|offset| tag + offset + delimiters[1].len())
.map(|start| {
body[start..]
- .find("</title>")
+ .find(delimiters[2])
.map(|offset| start + offset)
.map(|end| &body[start..end])
})
@@ -52,55 +61,145 @@ impl Url {
.and_then(|s| s.and_then(|s| s))
.ok_or(ErrorKind::MissingTitle)?;
- debug!("Title: {:?}", title);
+ debug!("Found title {:?} with delimiters {:?}", title, delimiters);
- htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into())
+ htmlescape::decode_html(title)
+ .map(|t| t.into())
+ .map_err(|_| ErrorKind::HtmlDecoding.into())
}
- fn url(&self, text: &str) -> Result<String, UrlError> {
- let url = self.grep_url(text).ok_or(ErrorKind::MissingUrl)?;
- let body = utils::download(&url, Some(self.max_kib)).context(ErrorKind::Download)?;
+ fn find_ogtitle(body: &str) -> Result<Self, UrlError> {
+ Self::find_by_delimiters(body, ["property=\"og:title\"", "content=\"", "\""])
+ }
+
+ fn find_title(body: &str) -> Result<Self, UrlError> {
+ Self::find_by_delimiters(body, ["<title", ">", "</title>"])
+ }
+
+ // TODO Improve logic
+ fn get_usefulness(self, url: &str) -> Self {
+ let mut usefulness = 0;
+ for word in WORD_RE.find_iter(&self.0) {
+ let w = word.as_str().to_lowercase();
+ if w.len() > 2 && !url.to_lowercase().contains(&w) {
+ usefulness += 1;
+ }
+ }
+
+ Title(self.0, Some(usefulness))
+ }
+
+ pub fn usefulness(&self) -> usize {
+ self.1.expect("Usefulness should be calculated already")
+ }
+
+ fn clean_up(self) -> Self {
+ Title(self.0.trim().replace('\n', "|").replace('\r', "|"), self.1)
+ }
+
+ pub fn find_clean_ogtitle(body: &str, url: &str) -> Result<Self, UrlError> {
+ let title = Self::find_ogtitle(body)?;
+ Ok(title.get_usefulness(url).clean_up())
+ }
+
+ pub fn find_clean_title(body: &str, url: &str) -> Result<Self, UrlError> {
+ let title = Self::find_title(body)?;
+ Ok(title.get_usefulness(url).clean_up())
+ }
+}
+
+impl<C: FrippyClient> UrlTitles<C> {
+ /// If a file is larger than `max_kib` KiB the download is stopped
+ pub fn new(max_kib: usize) -> Self {
+ UrlTitles {
+ max_kib,
+ phantom: PhantomData,
+ }
+ }
- let title = self.get_title(&body)?;
+ fn grep_url<'a>(&self, msg: &'a str) -> Option<Url<'a>> {
+ let captures = URL_RE.captures(msg)?;
+ debug!("Url captures: {:?}", captures);
+
+ Some(captures.get(2)?.as_str().into())
+ }
+
+ fn url(&self, text: &str) -> Result<String, UrlError> {
+ let url = self
+ .grep_url(text)
+ .ok_or(ErrorKind::MissingUrl)?
+ .max_kib(self.max_kib)
+ .timeout(Duration::from_secs(5));
+ let body = url.request().context(ErrorKind::Download)?;
+
+ let title = Title::find_clean_title(&body, url.as_str());
+ let og_title = Title::find_clean_ogtitle(&body, url.as_str());
+
+ let title = match (title, og_title) {
+ (Ok(title), Ok(og_title)) => {
+ if title.usefulness() > og_title.usefulness() {
+ title
+ } else {
+ og_title
+ }
+ }
+ (Ok(title), _) => title,
+ (_, Ok(title)) => title,
+ (Err(e), _) => Err(e)?,
+ };
+
+ if title.usefulness() == 0 {
+ Err(ErrorKind::UselessTitle)?;
+ }
- Ok(title.replace('\n', "|").replace('\r', "|"))
+ Ok(title.into())
}
}
-impl Plugin for Url {
- fn execute(&self, _: &IrcClient, message: &Message) -> ExecutionStatus {
+impl<C: FrippyClient> Plugin for UrlTitles<C> {
+ type Client = C;
+ fn execute(&self, _: &Self::Client, message: &Message) -> ExecutionStatus {
match message.command {
- Command::PRIVMSG(_, ref msg) => if RE.is_match(msg) {
- ExecutionStatus::RequiresThread
- } else {
- ExecutionStatus::Done
- },
+ Command::PRIVMSG(_, ref msg) => {
+ if URL_RE.is_match(msg) {
+ ExecutionStatus::RequiresThread
+ } else {
+ ExecutionStatus::Done
+ }
+ }
_ => ExecutionStatus::Done,
}
}
- fn execute_threaded(&self, client: &IrcClient, message: &Message) -> Result<(), FrippyError> {
- Ok(match message.command {
- Command::PRIVMSG(_, ref content) => match self.url(content) {
- Ok(title) => client
- .send_privmsg(message.response_target().unwrap(), &title)
- .context(FrippyErrorKind::Connection)?,
- Err(e) => Err(e).context(FrippyErrorKind::Url)?,
- },
- _ => (),
- })
+ fn execute_threaded(
+ &self,
+ client: &Self::Client,
+ message: &Message,
+ ) -> Result<(), FrippyError> {
+ if let Command::PRIVMSG(_, ref content) = message.command {
+ let title = self.url(content).context(FrippyErrorKind::Url)?;
+ let response = format!("[URL] {}", title);
+
+ client
+ .send_privmsg(message.response_target().unwrap(), &response)
+ .context(FrippyErrorKind::Connection)?;
+ }
+
+ Ok(())
}
- fn command(&self, client: &IrcClient, command: PluginCommand) -> Result<(), FrippyError> {
- Ok(client
+ fn command(&self, client: &Self::Client, command: PluginCommand) -> Result<(), FrippyError> {
+ client
.send_notice(
&command.source,
"This Plugin does not implement any commands.",
)
- .context(FrippyErrorKind::Connection)?)
+ .context(FrippyErrorKind::Connection)?;
+
+ Ok(())
}
- fn evaluate(&self, _: &IrcClient, command: PluginCommand) -> Result<String, String> {
+ fn evaluate(&self, _: &Self::Client, command: PluginCommand) -> Result<String, String> {
self.url(&command.tokens[0])
.map_err(|e| e.cause().unwrap().to_string())
}
@@ -123,6 +222,10 @@ pub mod error {
#[fail(display = "No title was found")]
MissingTitle,
+ /// Useless title error
+ #[fail(display = "The titles found were not useful enough")]
+ UselessTitle,
+
/// Html decoding error
#[fail(display = "Failed to decode Html characters")]
HtmlDecoding,