diff options
author | Vika <vika@fireburn.ru> | 2023-07-29 21:59:56 +0300 |
---|---|---|
committer | Vika <vika@fireburn.ru> | 2023-07-29 21:59:56 +0300 |
commit | 0617663b249f9ca488e5de652108b17d67fbaf45 (patch) | |
tree | 11564b6c8fa37bf9203a0a4cc1c4e9cc088cb1a5 /src/webmentions/check.rs | |
parent | 26c2b79f6a6380ae3224e9309b9f3352f5717bd7 (diff) | |
download | kittybox-0617663b249f9ca488e5de652108b17d67fbaf45.tar.zst |
Moved the entire Kittybox tree into the root
Diffstat (limited to 'src/webmentions/check.rs')
-rw-r--r-- | src/webmentions/check.rs | 113 |
1 files changed, 113 insertions, 0 deletions
diff --git a/src/webmentions/check.rs b/src/webmentions/check.rs new file mode 100644 index 0000000..f7322f7 --- /dev/null +++ b/src/webmentions/check.rs @@ -0,0 +1,113 @@ +use std::{cell::RefCell, rc::Rc}; +use microformats::{types::PropertyValue, html5ever::{self, tendril::TendrilSink}}; +use kittybox_util::MentionType; + +#[derive(thiserror::Error, Debug)] +pub enum Error { + #[error("microformats error: {0}")] + Microformats(#[from] microformats::Error), + // #[error("json error: {0}")] + // Json(#[from] serde_json::Error), + #[error("url parse error: {0}")] + UrlParse(#[from] url::ParseError), +} + +#[tracing::instrument] +pub fn check_mention(document: impl AsRef<str> + std::fmt::Debug, base_url: &url::Url, link: &url::Url) -> Result<Option<(MentionType, serde_json::Value)>, Error> { + tracing::debug!("Parsing MF2 markup..."); + // First, check the document for MF2 markup + let document = microformats::from_html(document.as_ref(), base_url.clone())?; + + // Get an iterator of all items + let items_iter = document.items.iter() + .map(AsRef::as_ref) + .map(RefCell::borrow); + + for item in items_iter { + tracing::debug!("Processing item: {:?}", item); + + let props = item.properties.borrow(); + for (prop, interaction_type) in [ + ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like), + ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost) + ] { + if let Some(propvals) = props.get(prop) { + tracing::debug!("Has a u-{} property", prop); + for val in propvals { + if let PropertyValue::Url(url) = val { + if url == link { + tracing::debug!("URL matches! Webmention is valid"); + return Ok(Some((interaction_type, serde_json::to_value(&*item).unwrap()))) + } + } + } + } + } + // Process `content` + tracing::debug!("Processing e-content..."); + if let Some(PropertyValue::Fragment(content)) = props.get("content") + .map(Vec::as_slice) + .unwrap_or_default() + .first() + { + tracing::debug!("Parsing HTML data..."); + let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default()) + .from_utf8() + .one(content.html.to_owned().as_bytes()) + .document; + + // This is a trick to unwrap recursion into a loop + // + // A list of unprocessed node is made. Then, in each + // iteration, the list is "taken" and replaced with an + // empty list, which is populated with nodes for the next + // iteration of the loop. + // + // Empty list means all nodes were processed. + let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect(); + while !unprocessed_nodes.is_empty() { + // "Take" the list out of its memory slot, replace it with an empty list + let nodes = std::mem::take(&mut unprocessed_nodes); + tracing::debug!("Processing list of {} nodes", nodes.len()); + 'nodes_loop: for node in nodes.into_iter() { + // Add children nodes to the list for the next iteration + unprocessed_nodes.extend(node.children.borrow().iter().cloned()); + + if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data { + // If it's not `<a>`, skip it + if name.local != *"a" { continue; } + let mut is_mention: bool = false; + for attr in attrs.borrow().iter() { + if attr.name.local == *"rel" { + // Don't count `rel="nofollow"` links — a web crawler should ignore them + // and so for purposes of driving visitors they are useless + if attr.value + .as_ref() + .split([',', ' ']) + .any(|v| v == "nofollow") + { + // Skip the entire node. + continue 'nodes_loop; + } + } + // if it's not `<a href="...">`, skip it + if attr.name.local != *"href" { continue; } + // Be forgiving in parsing URLs, and resolve them against the base URL + if let Ok(url) = base_url.join(attr.value.as_ref()) { + if &url == link { + is_mention = true; + } + } + } + if is_mention { + return Ok(Some((MentionType::Mention, serde_json::to_value(&*item).unwrap()))); + } + } + } + } + + } + } + + Ok(None) +} |