about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--kittybox-rs/Cargo.toml10
-rw-r--r--kittybox-rs/src/bin/kittybox-check-webmention.rs152
-rw-r--r--kittybox-rs/src/bin/kittybox-mf2.rs49
3 files changed, 211 insertions, 0 deletions
diff --git a/kittybox-rs/Cargo.toml b/kittybox-rs/Cargo.toml
index 8f9dff6..6f7c3c4 100644
--- a/kittybox-rs/Cargo.toml
+++ b/kittybox-rs/Cargo.toml
@@ -36,6 +36,16 @@ name = "kittybox-indieauth-helper"
 path = "src/bin/kittybox-indieauth-helper.rs"
 required-features = ["cli"]
 
+[[bin]]
+name = "kittybox-check-webmention"
+path = "src/bin/kittybox-check-webmention.rs"
+required-features = ["cli"]
+
+[[bin]]
+name = "kittybox-mf2"
+path = "src/bin/kittybox-mf2.rs"
+required-features = ["cli"]
+
 [workspace]
 members = [".", "./util", "./templates", "./indieauth"]
 default-members = [".", "./util", "./templates", "./indieauth"]
diff --git a/kittybox-rs/src/bin/kittybox-check-webmention.rs b/kittybox-rs/src/bin/kittybox-check-webmention.rs
new file mode 100644
index 0000000..5307830
--- /dev/null
+++ b/kittybox-rs/src/bin/kittybox-check-webmention.rs
@@ -0,0 +1,152 @@
+use std::cell::{RefCell, Ref};
+use std::rc::Rc;
+
+use clap::Parser;
+use microformats::types::PropertyValue;
+use microformats::html5ever;
+use microformats::html5ever::tendril::TendrilSink;
+
+#[derive(thiserror::Error, Debug)]
+enum Error {
+    #[error("http request error: {0}")]
+    Http(#[from] reqwest::Error),
+    #[error("microformats error: {0}")]
+    Microformats(#[from] microformats::Error),
+    #[error("json error: {0}")]
+    Json(#[from] serde_json::Error),
+    #[error("url parse error: {0}")]
+    UrlParse(#[from] url::ParseError),
+}
+
+#[derive(Debug)]
+enum MentionType {
+    Reply,
+    Like,
+    Repost,
+    Bookmark,
+    Mention
+}
+
+fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
+    // First, check the document for MF2 markup
+    let document = microformats::from_html(document.as_ref(), base_url.clone())?;
+
+    // Get an iterator of all items
+    let items_iter = document.items.iter()
+        .map(AsRef::as_ref)
+        .map(RefCell::borrow);
+
+    for item in items_iter {
+        let props = item.properties.borrow();
+        for (prop, interaction_type) in [
+            ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like),
+            ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost)
+        ] {
+            if let Some(propvals) = props.get(prop) {
+                for val in propvals {
+                    if let PropertyValue::Url(url) = val {
+                        if url == link {
+                            return Ok(Some(interaction_type))
+                        }
+                    }
+                }
+            }
+        }
+        // Process `content`
+        if let Some(PropertyValue::Fragment(content)) = props.get("content")
+            .map(Vec::as_slice)
+            .unwrap_or_default()
+            .first()
+        {
+            let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default())
+                .from_utf8()
+                .one(content.html.to_owned().as_bytes())
+                .document;
+
+            // This is a trick to unwrap recursion into a loop
+            //
+            // A list of unprocessed node is made. Then, in each
+            // iteration, the list is "taken" and replaced with an
+            // empty list, which is populated with nodes for the next
+            // iteration of the loop.
+            //
+            // Empty list means all nodes were processed.
+            let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect();
+            while unprocessed_nodes.len() > 0 {
+                // "Take" the list out of its memory slot, replace it with an empty list
+                let nodes = std::mem::take(&mut unprocessed_nodes);
+                for node in nodes.into_iter() {
+                    // Add children nodes to the list for the next iteration
+                    unprocessed_nodes.extend(node.children.borrow().iter().cloned());
+
+                    if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data {
+                        // If it's not `<a>`, skip it
+                        if name.local != *"a" { continue; }
+                        let mut is_mention: bool = false;
+                        for attr in attrs.borrow().iter() {
+                            // if it's not `<a href="...">`, skip it 
+                            if attr.name.local != *"href" { continue; }
+                            if attr.name.local == *"rel" {
+                                // Don't count `rel="nofollow"` links — a web crawler should ignore them
+                                // and so for purposes of driving visitors they are useless
+                                if attr.value.as_ref().split([',', ' ']).any(|v| v == "nofollow") { continue; }
+                            }
+                            // Be forgiving in parsing URLs, and resolve them against the base URL
+                            if let Ok(url) = base_url.join(attr.value.as_ref()) {
+                                if &url == link {
+                                    is_mention = true;
+                                }
+                            }
+                        }
+                        if is_mention {
+                            return Ok(Some(MentionType::Mention));
+                        }
+                    }
+                }
+            }
+            
+        }
+    }
+
+    Ok(None)
+}
+
+#[derive(Parser, Debug)]
+#[clap(
+    name = "kittybox-check-webmention",
+    author = "Vika <vika@fireburn.ru>",
+    version = env!("CARGO_PKG_VERSION"),
+    about = "Verify an incoming webmention"
+)]
+struct Args {
+    #[clap(value_parser)]
+    url: url::Url,
+    #[clap(value_parser)]
+    link: url::Url
+}
+
+#[tokio::main]
+async fn main() -> Result<(), self::Error> {
+    let args = Args::parse();
+    
+    let http: reqwest::Client = {
+        #[allow(unused_mut)]
+        let mut builder = reqwest::Client::builder()
+            .user_agent(concat!(
+                env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")
+            ));
+
+        builder.build().unwrap()
+    };
+
+    let response = http.get(args.url.clone()).send().await?;
+    let text = response.text().await?;
+    
+    if let Some(mention_type) = check_mention(text, &args.url, &args.link)? {
+        println!("{:?}", mention_type);
+
+        Ok(())
+    } else {
+        std::process::exit(1)
+    }
+}
diff --git a/kittybox-rs/src/bin/kittybox-mf2.rs b/kittybox-rs/src/bin/kittybox-mf2.rs
new file mode 100644
index 0000000..4366cb8
--- /dev/null
+++ b/kittybox-rs/src/bin/kittybox-mf2.rs
@@ -0,0 +1,49 @@
+use clap::Parser;
+
+#[derive(Parser, Debug)]
+#[clap(
+    name = "kittybox-mf2",
+    author = "Vika <vika@fireburn.ru>",
+    version = env!("CARGO_PKG_VERSION"),
+    about = "Fetch HTML and turn it into MF2-JSON"
+)]
+struct Args {
+    #[clap(value_parser)]
+    url: url::Url,
+}
+
+#[derive(thiserror::Error, Debug)]
+enum Error {
+    #[error("http request error: {0}")]
+    Http(#[from] reqwest::Error),
+    #[error("microformats error: {0}")]
+    Microformats(#[from] microformats::Error),
+    #[error("json error: {0}")]
+    Json(#[from] serde_json::Error),
+    #[error("url parse error: {0}")]
+    UrlParse(#[from] url::ParseError),
+}
+
+#[tokio::main]
+async fn main() -> Result<(), Error> {
+    let args = Args::parse();
+    
+    let http: reqwest::Client = {
+        #[allow(unused_mut)]
+        let mut builder = reqwest::Client::builder()
+            .user_agent(concat!(
+                env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")
+            ));
+
+        builder.build().unwrap()
+    };
+
+    let response = http.get(args.url.clone()).send().await?;
+    let text = response.text().await?;
+
+    let mf2 = microformats::from_html(text.as_ref(), args.url)?;
+
+    println!("{}", serde_json::to_string_pretty(&mf2)?);
+
+    Ok(())
+}