summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJokler <jokler.contact@gmail.com>2018-03-10 01:30:13 +0100
committerJokler <jokler.contact@gmail.com>2018-03-10 01:30:13 +0100
commit9be7f31ee2d37800c7d23a9fff7d7ab8a2e076ed (patch)
tree9c89710c7669cf3b3fbed35a2292f8c142bb35bf
parente5e7a8d49729601b62e81d28e547d3828e839b28 (diff)
downloadfrippy-9be7f31ee2d37800c7d23a9fff7d7ab8a2e076ed.tar.gz
frippy-9be7f31ee2d37800c7d23a9fff7d7ab8a2e076ed.zip
Decode html encoded characters in titles
-rw-r--r--Cargo.lock7
-rw-r--r--Cargo.toml1
-rw-r--r--src/plugins/url.rs16
3 files changed, 19 insertions, 5 deletions
diff --git a/Cargo.lock b/Cargo.lock
index ef78edb..2b6a8fa 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -373,6 +373,7 @@ dependencies = [
"failure 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"frippy_derive 0.1.0",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
+ "htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"humantime 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"irc 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -441,6 +442,11 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
+name = "htmlescape"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
+[[package]]
name = "httparse"
version = "1.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1481,6 +1487,7 @@ dependencies = [
"checksum gcc 0.3.54 (registry+https://github.com/rust-lang/crates.io-index)" = "5e33ec290da0d127825013597dbdfc28bee4964690c7ce1166cbc2a7bd08b1bb"
"checksum getopts 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "b900c08c1939860ce8b54dc6a89e26e00c04c380fd0e09796799bd7f12861e05"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
+"checksum htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163"
"checksum httparse 1.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "c2f407128745b78abc95c0ffbe4e5d37427fdc0d45470710cfef8c44522a2e37"
"checksum humantime 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5369e01a05e3404c421b5d6dcfea6ecf7d5e65eba8a275948151358cd8282042"
"checksum hyper 0.11.19 (registry+https://github.com/rust-lang/crates.io-index)" = "47659bb1cb7ef3cd7b4f9bd2a11349b8d92097d34f9597a3c09e9bcefaf92b61"
diff --git a/Cargo.toml b/Cargo.toml
index 3f280b6..192939a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,6 +39,7 @@ glob = "0.2.11"
failure = "0.1.1"
frippy_derive = { path = "frippy_derive" }
+htmlescape = "0.3.1"
[dependencies.unicode_names]
git = 'https://github.com/Jokler/unicode_names'
diff --git a/src/plugins/url.rs b/src/plugins/url.rs
index 4c33cba..e75d893 100644
--- a/src/plugins/url.rs
+++ b/src/plugins/url.rs
@@ -1,3 +1,4 @@
+extern crate htmlescape;
extern crate regex;
use irc::client::prelude::*;
@@ -35,23 +36,24 @@ impl Url {
Some(captures.get(2)?.as_str().to_owned())
}
- fn get_title<'a>(&self, body: &'a str) -> Option<&'a str> {
+ fn get_title<'a>(&self, body: &str) -> Result<String, UrlError> {
let title = body.find("<title>")
.map(|start| body.find("</title>").map(|end| &body[start + 7..end]))
- .and_then(|s| s);
+ .and_then(|s| s).ok_or(ErrorKind::MissingTitle)?;
+
debug!("Title: {:?}", title);
- title
+ htmlescape::decode_html(title).map_err(|_| ErrorKind::HtmlDecoding.into())
}
fn url(&self, text: &str) -> Result<String, UrlError> {
let url = self.grep_url(text).ok_or(ErrorKind::MissingUrl)?;
let body = utils::download(&url, Some(self.max_kib)).context(ErrorKind::Download)?;
- let title = self.get_title(&body).ok_or(ErrorKind::MissingTitle)?;
+ let title = self.get_title(&body)?;
- Ok(title.to_owned())
+ Ok(title.replace('\n', "|").replace('\r', "|"))
}
}
@@ -110,5 +112,9 @@ pub mod error {
/// Missing title error
#[fail(display = "No title was found")]
MissingTitle,
+
+ /// Html decoding error
+ #[fail(display = "Failed to decode Html characters")]
+ HtmlDecoding,
}
}