diff options
author | Vika <vika@fireburn.ru> | 2023-07-29 21:59:56 +0300 |
---|---|---|
committer | Vika <vika@fireburn.ru> | 2023-07-29 21:59:56 +0300 |
commit | 0617663b249f9ca488e5de652108b17d67fbaf45 (patch) | |
tree | 11564b6c8fa37bf9203a0a4cc1c4e9cc088cb1a5 /kittybox-rs/src/bin/kittybox-check-webmention.rs | |
parent | 26c2b79f6a6380ae3224e9309b9f3352f5717bd7 (diff) | |
download | kittybox-0617663b249f9ca488e5de652108b17d67fbaf45.tar.zst |
Moved the entire Kittybox tree into the root
Diffstat (limited to 'kittybox-rs/src/bin/kittybox-check-webmention.rs')
-rw-r--r-- | kittybox-rs/src/bin/kittybox-check-webmention.rs | 152 |
1 files changed, 0 insertions, 152 deletions
diff --git a/kittybox-rs/src/bin/kittybox-check-webmention.rs b/kittybox-rs/src/bin/kittybox-check-webmention.rs deleted file mode 100644 index f02032c..0000000 --- a/kittybox-rs/src/bin/kittybox-check-webmention.rs +++ /dev/null @@ -1,152 +0,0 @@ -use std::cell::{RefCell, Ref}; -use std::rc::Rc; - -use clap::Parser; -use microformats::types::PropertyValue; -use microformats::html5ever; -use microformats::html5ever::tendril::TendrilSink; - -#[derive(thiserror::Error, Debug)] -enum Error { - #[error("http request error: {0}")] - Http(#[from] reqwest::Error), - #[error("microformats error: {0}")] - Microformats(#[from] microformats::Error), - #[error("json error: {0}")] - Json(#[from] serde_json::Error), - #[error("url parse error: {0}")] - UrlParse(#[from] url::ParseError), -} - -use kittybox_util::MentionType; - -fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> { - // First, check the document for MF2 markup - let document = microformats::from_html(document.as_ref(), base_url.clone())?; - - // Get an iterator of all items - let items_iter = document.items.iter() - .map(AsRef::as_ref) - .map(RefCell::borrow); - - for item in items_iter { - let props = item.properties.borrow(); - for (prop, interaction_type) in [ - ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like), - ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost) - ] { - if let Some(propvals) = props.get(prop) { - for val in propvals { - if let PropertyValue::Url(url) = val { - if url == link { - return Ok(Some(interaction_type)) - } - } - } - } - } - // Process `content` - if let Some(PropertyValue::Fragment(content)) = props.get("content") - .map(Vec::as_slice) - .unwrap_or_default() - .first() - { - let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default()) - .from_utf8() - .one(content.html.to_owned().as_bytes()) - .document; - - // This is a trick to unwrap recursion into a loop - // - // A list of unprocessed node is made. Then, in each - // iteration, the list is "taken" and replaced with an - // empty list, which is populated with nodes for the next - // iteration of the loop. - // - // Empty list means all nodes were processed. - let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect(); - while unprocessed_nodes.len() > 0 { - // "Take" the list out of its memory slot, replace it with an empty list - let nodes = std::mem::take(&mut unprocessed_nodes); - 'nodes_loop: for node in nodes.into_iter() { - // Add children nodes to the list for the next iteration - unprocessed_nodes.extend(node.children.borrow().iter().cloned()); - - if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data { - // If it's not `<a>`, skip it - if name.local != *"a" { continue; } - let mut is_mention: bool = false; - for attr in attrs.borrow().iter() { - if attr.name.local == *"rel" { - // Don't count `rel="nofollow"` links — a web crawler should ignore them - // and so for purposes of driving visitors they are useless - if attr.value - .as_ref() - .split([',', ' ']) - .any(|v| v == "nofollow") - { - // Skip the entire node. - continue 'nodes_loop; - } - } - // if it's not `<a href="...">`, skip it - if attr.name.local != *"href" { continue; } - // Be forgiving in parsing URLs, and resolve them against the base URL - if let Ok(url) = base_url.join(attr.value.as_ref()) { - if &url == link { - is_mention = true; - } - } - } - if is_mention { - return Ok(Some(MentionType::Mention)); - } - } - } - } - - } - } - - Ok(None) -} - -#[derive(Parser, Debug)] -#[clap( - name = "kittybox-check-webmention", - author = "Vika <vika@fireburn.ru>", - version = env!("CARGO_PKG_VERSION"), - about = "Verify an incoming webmention" -)] -struct Args { - #[clap(value_parser)] - url: url::Url, - #[clap(value_parser)] - link: url::Url -} - -#[tokio::main] -async fn main() -> Result<(), self::Error> { - let args = Args::parse(); - - let http: reqwest::Client = { - #[allow(unused_mut)] - let mut builder = reqwest::Client::builder() - .user_agent(concat!( - env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION") - )); - - builder.build().unwrap() - }; - - let response = http.get(args.url.clone()).send().await?; - let text = response.text().await?; - - if let Some(mention_type) = check_mention(text, &args.url, &args.link)? { - println!("{:?}", mention_type); - - Ok(()) - } else { - std::process::exit(1) - } -} |