From 9be7f31ee2d37800c7d23a9fff7d7ab8a2e076ed Mon Sep 17 00:00:00 2001 From: Jokler Date: Sat, 10 Mar 2018 01:30:13 +0100 Subject: Decode html encoded characters in titles --- src/plugins/url.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'src/plugins/url.rs') diff --git a/src/plugins/url.rs b/src/plugins/url.rs index 4c33cba..e75d893 100644 --- a/src/plugins/url.rs +++ b/src/plugins/url.rs @@ -1,3 +1,4 @@ +extern crate htmlescape; extern crate regex; use irc::client::prelude::*; @@ -35,23 +36,24 @@ impl Url { Some(captures.get(2)?.as_str().to_owned()) } - fn get_title<'a>(&self, body: &'a str) -> Option<&'a str> { + fn get_title<'a>(&self, body: &str) -> Result { let title = body.find("") .map(|start| body.find("").map(|end| &body[start + 7..end])) - .and_then(|s| s); + .and_then(|s| s).ok_or(ErrorKind::MissingTitle)?; + debug!("Title: {:?}", title); - title + htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into()) } fn url(&self, text: &str) -> Result { let url = self.grep_url(text).ok_or(ErrorKind::MissingUrl)?; let body = utils::download(&url, Some(self.max_kib)).context(ErrorKind::Download)?; - let title = self.get_title(&body).ok_or(ErrorKind::MissingTitle)?; + let title = self.get_title(&body)?; - Ok(title.to_owned()) + Ok(title.replace('\n', "|").replace('\r', "|")) } } @@ -110,5 +112,9 @@ pub mod error { /// Missing title error #[fail(display = "No title was found")] MissingTitle, + + /// Html decoding error + #[fail(display = "Failed to decode Html characters")] + HtmlDecoding, } } -- cgit v1.2.3-70-g09d2