about summary refs log tree commit diff
path: root/src/bin/kittybox-check-webmention.rs
blob: f02032ce8b902893a3f66f233b20a403df8b4fea (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
use std::cell::{RefCell, Ref};
use std::rc::Rc;

use clap::Parser;
use microformats::types::PropertyValue;
use microformats::html5ever;
use microformats::html5ever::tendril::TendrilSink;

#[derive(thiserror::Error, Debug)]
enum Error {
    #[error("http request error: {0}")]
    Http(#[from] reqwest::Error),
    #[error("microformats error: {0}")]
    Microformats(#[from] microformats::Error),
    #[error("json error: {0}")]
    Json(#[from] serde_json::Error),
    #[error("url parse error: {0}")]
    UrlParse(#[from] url::ParseError),
}

use kittybox_util::MentionType;

fn check_mention(document: impl AsRef<str>, base_url: &url::Url, link: &url::Url) -> Result<Option<MentionType>, Error> {
    // First, check the document for MF2 markup
    let document = microformats::from_html(document.as_ref(), base_url.clone())?;

    // Get an iterator of all items
    let items_iter = document.items.iter()
        .map(AsRef::as_ref)
        .map(RefCell::borrow);

    for item in items_iter {
        let props = item.properties.borrow();
        for (prop, interaction_type) in [
            ("in-reply-to", MentionType::Reply), ("like-of", MentionType::Like),
            ("bookmark-of", MentionType::Bookmark), ("repost-of", MentionType::Repost)
        ] {
            if let Some(propvals) = props.get(prop) {
                for val in propvals {
                    if let PropertyValue::Url(url) = val {
                        if url == link {
                            return Ok(Some(interaction_type))
                        }
                    }
                }
            }
        }
        // Process `content`
        if let Some(PropertyValue::Fragment(content)) = props.get("content")
            .map(Vec::as_slice)
            .unwrap_or_default()
            .first()
        {
            let root = html5ever::parse_document(html5ever::rcdom::RcDom::default(), Default::default())
                .from_utf8()
                .one(content.html.to_owned().as_bytes())
                .document;

            // This is a trick to unwrap recursion into a loop
            //
            // A list of unprocessed node is made. Then, in each
            // iteration, the list is "taken" and replaced with an
            // empty list, which is populated with nodes for the next
            // iteration of the loop.
            //
            // Empty list means all nodes were processed.
            let mut unprocessed_nodes: Vec<Rc<html5ever::rcdom::Node>> = root.children.borrow().iter().cloned().collect();
            while unprocessed_nodes.len() > 0 {
                // "Take" the list out of its memory slot, replace it with an empty list
                let nodes = std::mem::take(&mut unprocessed_nodes);
                'nodes_loop: for node in nodes.into_iter() {
                    // Add children nodes to the list for the next iteration
                    unprocessed_nodes.extend(node.children.borrow().iter().cloned());

                    if let html5ever::rcdom::NodeData::Element { ref name, ref attrs, .. } = node.data {
                        // If it's not `<a>`, skip it
                        if name.local != *"a" { continue; }
                        let mut is_mention: bool = false;
                        for attr in attrs.borrow().iter() {
                            if attr.name.local == *"rel" {
                                // Don't count `rel="nofollow"` links — a web crawler should ignore them
                                // and so for purposes of driving visitors they are useless
                                if attr.value
                                    .as_ref()
                                    .split([',', ' '])
                                    .any(|v| v == "nofollow")
                                {
                                    // Skip the entire node.
                                    continue 'nodes_loop;
                                }
                            }
                            // if it's not `<a href="...">`, skip it
                            if attr.name.local != *"href" { continue; }
                            // Be forgiving in parsing URLs, and resolve them against the base URL
                            if let Ok(url) = base_url.join(attr.value.as_ref()) {
                                if &url == link {
                                    is_mention = true;
                                }
                            }
                        }
                        if is_mention {
                            return Ok(Some(MentionType::Mention));
                        }
                    }
                }
            }
            
        }
    }

    Ok(None)
}

#[derive(Parser, Debug)]
#[clap(
    name = "kittybox-check-webmention",
    author = "Vika <vika@fireburn.ru>",
    version = env!("CARGO_PKG_VERSION"),
    about = "Verify an incoming webmention"
)]
struct Args {
    #[clap(value_parser)]
    url: url::Url,
    #[clap(value_parser)]
    link: url::Url
}

#[tokio::main]
async fn main() -> Result<(), self::Error> {
    let args = Args::parse();
    
    let http: reqwest::Client = {
        #[allow(unused_mut)]
        let mut builder = reqwest::Client::builder()
            .user_agent(concat!(
                env!("CARGO_PKG_NAME"), "/", env!("CARGO_PKG_VERSION")
            ));

        builder.build().unwrap()
    };

    let response = http.get(args.url.clone()).send().await?;
    let text = response.text().await?;
    
    if let Some(mention_type) = check_mention(text, &args.url, &args.link)? {
        println!("{:?}", mention_type);

        Ok(())
    } else {
        std::process::exit(1)
    }
}