about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVika <vika@fireburn.ru>2023-07-21 17:40:32 +0300
committerVika <vika@fireburn.ru>2023-07-21 17:40:32 +0300
commitf13c60b70e1d9435b5f2803fc48c44eed7be761c (patch)
tree16894e3116e85155797a752237a69bd0872db25f
parent94ebc5e653191fcaacfa91dddebf88dca7e7b7fe (diff)
downloadkittybox-f13c60b70e1d9435b5f2803fc48c44eed7be761c.tar.zst
Move MentionType into util and fix bugs in -check-webmention app
-rw-r--r--kittybox-rs/src/bin/kittybox-check-webmention.rs24
-rw-r--r--kittybox-rs/util/src/lib.rs16
2 files changed, 28 insertions, 12 deletions
diff --git a/kittybox-rs/src/bin/kittybox-check-webmention.rs b/kittybox-rs/src/bin/kittybox-check-webmention.rs
index 5307830..f02032c 100644
--- a/kittybox-rs/src/bin/kittybox-check-webmention.rs
+++ b/kittybox-rs/src/bin/kittybox-check-webmention.rs
@@ -18,14 +18,7 @@ enum Error {
     UrlParse(#[from] url::ParseError),
 }
 
-#[derive(Debug)]
-enum MentionType {
-    Reply,
-    Like,
-    Repost,
-    Bookmark,
-    Mention
-}
+use kittybox_util::MentionType;
 
 fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
     // First, check the document for MF2 markup
@@ -75,7 +68,7 @@ fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url
             while unprocessed_nodes.len() > 0 {
                 // "Take" the list out of its memory slot, replace it with an empty list
                 let nodes = std::mem::take(&mut unprocessed_nodes);
-                for node in nodes.into_iter() {
+                'nodes_loop: for node in nodes.into_iter() {
                     // Add children nodes to the list for the next iteration
                     unprocessed_nodes.extend(node.children.borrow().iter().cloned());
 
@@ -84,13 +77,20 @@ fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url
                         if name.local != *"a" { continue; }
                         let mut is_mention: bool = false;
                         for attr in attrs.borrow().iter() {
-                            // if it's not `<a href="...">`, skip it 
-                            if attr.name.local != *"href" { continue; }
                             if attr.name.local == *"rel" {
                                 // Don't count `rel="nofollow"` links — a web crawler should ignore them
                                 // and so for purposes of driving visitors they are useless
-                                if attr.value.as_ref().split([',', ' ']).any(|v| v == "nofollow") { continue; }
+                                if attr.value
+                                    .as_ref()
+                                    .split([',', ' '])
+                                    .any(|v| v == "nofollow")
+                                {
+                                    // Skip the entire node.
+                                    continue 'nodes_loop;
+                                }
                             }
+                            // if it's not `<a href="...">`, skip it
+                            if attr.name.local != *"href" { continue; }
                             // Be forgiving in parsing URLs, and resolve them against the base URL
                             if let Ok(url) = base_url.join(attr.value.as_ref()) {
                                 if &url == link {
diff --git a/kittybox-rs/util/src/lib.rs b/kittybox-rs/util/src/lib.rs
index 617ee97..c49bdf5 100644
--- a/kittybox-rs/util/src/lib.rs
+++ b/kittybox-rs/util/src/lib.rs
@@ -24,6 +24,22 @@ pub struct MicropubChannel {
     pub name: String,
 }
 
+#[derive(Debug, Default)]
+/// Common types of webmentions.
+pub enum MentionType {
+    /// Corresponds to a `u-in-reply-to` link.
+    Reply,
+    /// Corresponds to a `u-like-of` link.
+    Like,
+    /// Corresponds to a `u-repost-of` link.
+    Repost,
+    /// Corresponds to a `u-bookmark-of` link.
+    Bookmark,
+    /// A plain link without MF2 annotations.
+    #[default]
+    Mention
+}
+
 /// Common errors from the IndieWeb protocols that can be reused between modules.
 pub mod error;
 pub use error::{ErrorType, MicropubError};