about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVika <vika@fireburn.ru>2024-08-26 17:42:08 +0300
committerVika <vika@fireburn.ru>2024-08-26 17:42:08 +0300
commit8f78140825380b808270422c9613999a9399c573 (patch)
tree41f706a6cd2e5d1d40b2b097a5783b0a98ec9a7b
parent31a0bdad439a4575c1686f690e9e72bd44dde472 (diff)
Fix kittybox-check-webmention CLI tool
Now it uses the same code as Kittybox itself.
-rw-r--r--src/bin/kittybox-check-webmention.rs115
-rw-r--r--src/webmentions/mod.rs2
2 files changed, 8 insertions, 109 deletions
diff --git a/src/bin/kittybox-check-webmention.rs b/src/bin/kittybox-check-webmention.rs
index 820e7c4..b43980e 100644
--- a/src/bin/kittybox-check-webmention.rs
+++ b/src/bin/kittybox-check-webmention.rs
@@ -1,114 +1,13 @@
-use std::cell::{RefCell, Ref};
-use std::rc::Rc;
-
 use clap::Parser;
-use microformats::types::PropertyValue;
-use microformats::html5ever;
-use microformats::html5ever::tendril::TendrilSink;
 
-#[derive(thiserror::Error, Debug)]
+use kittybox::webmentions::check::{check_mention, Error as WebmentionError};
+
+#[derive(Debug, thiserror::Error)]
 enum Error {
-    #[error("http request error: {0}")]
+    #[error("reqwest error: {0}")]
     Http(#[from] reqwest::Error),
-    #[error("microformats error: {0}")]
-    Microformats(#[from] microformats::Error),
-    #[error("json error: {0}")]
-    Json(#[from] serde_json::Error),
-    #[error("url parse error: {0}")]
-    UrlParse(#[from] url::ParseError),
-}
-
-use kittybox_util::MentionType;
-
-fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
-    // First, check the document for MF2 markup
-    let document = microformats::from_html(document.as_ref(), base_url.clone())?;
-
-    // Get an iterator of all items
-    let items_iter = document.items.iter()
-        .map(AsRef::as_ref)
-        .map(RefCell::borrow);
-
-    for item in items_iter {
-        let props = item.properties.borrow();
-        for (prop, interaction_type) in [
-            ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like),
-            ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost)
-        ] {
-            if let Some(propvals) = props.get(prop) {
-                for val in propvals {
-                    if let PropertyValue::Url(url) = val {
-                        if url == link {
-                            return Ok(Some(interaction_type))
-                        }
-                    }
-                }
-            }
-        }
-        // Process `content`
-        if let Some(PropertyValue::Fragment(content)) = props.get("content")
-            .map(Vec::as_slice)
-            .unwrap_or_default()
-            .first()
-        {
-            let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default())
-                .from_utf8()
-                .one(content.html.to_owned().as_bytes())
-                .document;
-
-            // This is a trick to unwrap recursion into a loop
-            //
-            // A list of unprocessed node is made. Then, in each
-            // iteration, the list is "taken" and replaced with an
-            // empty list, which is populated with nodes for the next
-            // iteration of the loop.
-            //
-            // Empty list means all nodes were processed.
-            let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect();
-            while unprocessed_nodes.len() > 0 {
-                // "Take" the list out of its memory slot, replace it with an empty list
-                let nodes = std::mem::take(&mut unprocessed_nodes);
-                'nodes_loop: for node in nodes.into_iter() {
-                    // Add children nodes to the list for the next iteration
-                    unprocessed_nodes.extend(node.children.borrow().iter().cloned());
-
-                    if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data {
-                        // If it's not `<a>`, skip it
-                        if name.local != *"a" { continue; }
-                        let mut is_mention: bool = false;
-                        for attr in attrs.borrow().iter() {
-                            if attr.name.local == *"rel" {
-                                // Don't count `rel="nofollow"` links — a web crawler should ignore them
-                                // and so for purposes of driving visitors they are useless
-                                if attr.value
-                                    .as_ref()
-                                    .split([',', ' '])
-                                    .any(|v| v == "nofollow")
-                                {
-                                    // Skip the entire node.
-                                    continue 'nodes_loop;
-                                }
-                            }
-                            // if it's not `<a href="...">`, skip it
-                            if attr.name.local != *"href" { continue; }
-                            // Be forgiving in parsing URLs, and resolve them against the base URL
-                            if let Ok(url) = base_url.join(attr.value.as_ref()) {
-                                if &url == link {
-                                    is_mention = true;
-                                }
-                            }
-                        }
-                        if is_mention {
-                            return Ok(Some(MentionType::Mention));
-                        }
-                    }
-                }
-            }
-
-        }
-    }
-
-    Ok(None)
+    #[error("webmention check error: {0}")]
+    Webmention(#[from] WebmentionError)
 }
 
 #[derive(Parser, Debug)]
@@ -126,7 +25,7 @@ struct Args {
 }
 
 #[tokio::main]
-async fn main() -> Result<(), self::Error> {
+async fn main() -> Result<(), Error> {
     let args = Args::parse();
 
     let http: reqwest::Client = {
diff --git a/src/webmentions/mod.rs b/src/webmentions/mod.rs
index 22701b4..91b274b 100644
--- a/src/webmentions/mod.rs
+++ b/src/webmentions/mod.rs
@@ -92,7 +92,7 @@ where
         .unwrap();
 }
 
-mod check;
+pub mod check;
 
 #[derive(thiserror::Error, Debug)]
 enum Error<Q: std::error::Error + std::fmt::Debug + Send + 'static> {