From 9ce77c472bef59918d14b52af1332e454516e486 Mon Sep 17 00:00:00 2001 From: Vika Date: Sat, 1 Jul 2023 20:33:37 +0300 Subject: New debug utilities to test checking webmentions and parsing mf2 --- kittybox-rs/Cargo.toml | 10 ++ kittybox-rs/src/bin/kittybox-check-webmention.rs | 152 +++++++++++++++++++++++ kittybox-rs/src/bin/kittybox-mf2.rs | 49 ++++++++ 3 files changed, 211 insertions(+) create mode 100644 kittybox-rs/src/bin/kittybox-check-webmention.rs create mode 100644 kittybox-rs/src/bin/kittybox-mf2.rs diff --git a/kittybox-rs/Cargo.toml b/kittybox-rs/Cargo.toml index 8f9dff6..6f7c3c4 100644 --- a/kittybox-rs/Cargo.toml +++ b/kittybox-rs/Cargo.toml @@ -36,6 +36,16 @@ name = "kittybox-indieauth-helper" path = "src/bin/kittybox-indieauth-helper.rs" required-features = ["cli"] +[[bin]] +name = "kittybox-check-webmention" +path = "src/bin/kittybox-check-webmention.rs" +required-features = ["cli"] + +[[bin]] +name = "kittybox-mf2" +path = "src/bin/kittybox-mf2.rs" +required-features = ["cli"] + [workspace] members = [".", "./util", "./templates", "./indieauth"] default-members = [".", "./util", "./templates", "./indieauth"] diff --git a/kittybox-rs/src/bin/kittybox-check-webmention.rs b/kittybox-rs/src/bin/kittybox-check-webmention.rs new file mode 100644 index 0000000..5307830 --- /dev/null +++ b/kittybox-rs/src/bin/kittybox-check-webmention.rs @@ -0,0 +1,152 @@ +use std::cell::{RefCell, Ref}; +use std::rc::Rc; + +use clap::Parser; +use microformats::types::PropertyValue; +use microformats::html5ever; +use microformats::html5ever::tendril::TendrilSink; + +#[derive(thiserror::Error, Debug)] +enum Error { + #[error("http request error: {0}")] + Http(#[from] reqwest::Error), + #[error("microformats error: {0}")] + Microformats(#[from] microformats::Error), + #[error("json error: {0}")] + Json(#[from] serde_json::Error), + #[error("url parse error: {0}")] + UrlParse(#[from] url::ParseError), +} + +#[derive(Debug)] +enum MentionType { + Reply, + Like, + Repost, + Bookmark, + Mention +} + +fn check_mention(document: impl AsRef, base_url: &url::Url, link: &url::Url) -> Result, Error> { + // First, check the document for MF2 markup + let document = microformats::from_html(document.as_ref(), base_url.clone())?; + + // Get an iterator of all items + let items_iter = document.items.iter() + .map(AsRef::as_ref) + .map(RefCell::borrow); + + for item in items_iter { + let props = item.properties.borrow(); + for (prop, interaction_type) in [ + ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like), + ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost) + ] { + if let Some(propvals) = props.get(prop) { + for val in propvals { + if let PropertyValue::Url(url) = val { + if url == link { + return Ok(Some(interaction_type)) + } + } + } + } + } + // Process `content` + if let Some(PropertyValue::Fragment(content)) = props.get("content") + .map(Vec::as_slice) + .unwrap_or_default() + .first() + { + let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default()) + .from_utf8() + .one(content.html.to_owned().as_bytes()) + .document; + + // This is a trick to unwrap recursion into a loop + // + // A list of unprocessed node is made. Then, in each + // iteration, the list is "taken" and replaced with an + // empty list, which is populated with nodes for the next + // iteration of the loop. + // + // Empty list means all nodes were processed. + let mut unprocessed_nodes: Vec> = root.children.borrow().iter().cloned().collect(); + while unprocessed_nodes.len() > 0 { + // "Take" the list out of its memory slot, replace it with an empty list + let nodes = std::mem::take(&mut unprocessed_nodes); + for node in nodes.into_iter() { + // Add children nodes to the list for the next iteration + unprocessed_nodes.extend(node.children.borrow().iter().cloned()); + + if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data { + // If it's not ``, skip it + if name.local != *"a" { continue; } + let mut is_mention: bool = false; + for attr in attrs.borrow().iter() { + // if it's not ``, skip it + if attr.name.local != *"href" { continue; } + if attr.name.local == *"rel" { + // Don't count `rel="nofollow"` links — a web crawler should ignore them + // and so for purposes of driving visitors they are useless + if attr.value.as_ref().split([',', ' ']).any(|v| v == "nofollow") { continue; } + } + // Be forgiving in parsing URLs, and resolve them against the base URL + if let Ok(url) = base_url.join(attr.value.as_ref()) { + if &url == link { + is_mention = true; + } + } + } + if is_mention { + return Ok(Some(MentionType::Mention)); + } + } + } + } + + } + } + + Ok(None) +} + +#[derive(Parser, Debug)] +#[clap( + name = "kittybox-check-webmention", + author = "Vika ", + version = env!("CARGO_PKG_VERSION"), + about = "Verify an incoming webmention" +)] +struct Args { + #[clap(value_parser)] + url: url::Url, + #[clap(value_parser)] + link: url::Url +} + +#[tokio::main] +async fn main() -> Result<(), self::Error> { + let args = Args::parse(); + + let http: reqwest::Client = { + #[allow(unused_mut)] + let mut builder = reqwest::Client::builder() + .user_agent(concat!( + env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION") + )); + + builder.build().unwrap() + }; + + let response = http.get(args.url.clone()).send().await?; + let text = response.text().await?; + + if let Some(mention_type) = check_mention(text, &args.url, &args.link)? { + println!("{:?}", mention_type); + + Ok(()) + } else { + std::process::exit(1) + } +} diff --git a/kittybox-rs/src/bin/kittybox-mf2.rs b/kittybox-rs/src/bin/kittybox-mf2.rs new file mode 100644 index 0000000..4366cb8 --- /dev/null +++ b/kittybox-rs/src/bin/kittybox-mf2.rs @@ -0,0 +1,49 @@ +use clap::Parser; + +#[derive(Parser, Debug)] +#[clap( + name = "kittybox-mf2", + author = "Vika ", + version = env!("CARGO_PKG_VERSION"), + about = "Fetch HTML and turn it into MF2-JSON" +)] +struct Args { + #[clap(value_parser)] + url: url::Url, +} + +#[derive(thiserror::Error, Debug)] +enum Error { + #[error("http request error: {0}")] + Http(#[from] reqwest::Error), + #[error("microformats error: {0}")] + Microformats(#[from] microformats::Error), + #[error("json error: {0}")] + Json(#[from] serde_json::Error), + #[error("url parse error: {0}")] + UrlParse(#[from] url::ParseError), +} + +#[tokio::main] +async fn main() -> Result<(), Error> { + let args = Args::parse(); + + let http: reqwest::Client = { + #[allow(unused_mut)] + let mut builder = reqwest::Client::builder() + .user_agent(concat!( + env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION") + )); + + builder.build().unwrap() + }; + + let response = http.get(args.url.clone()).send().await?; + let text = response.text().await?; + + let mf2 = microformats::from_html(text.as_ref(), args.url)?; + + println!("{}", serde_json::to_string_pretty(&mf2)?); + + Ok(()) +} -- cgit 1.4.1