use crate::database::Storage; use crate::indieauth::User; use crate::ApplicationState; use chrono::prelude::*; use core::iter::Iterator; use futures::stream; use futures::StreamExt; use http_types::Mime; use log::{error, info, warn}; use newbase60::num_to_sxg; use std::convert::TryInto; use std::str::FromStr; use tide::prelude::json; use tide::{Request, Response, Result}; static DEFAULT_CHANNEL_PATH: &str = "/feeds/main"; static DEFAULT_CHANNEL_NAME: &str = "Main feed"; static CONTACTS_CHANNEL_PATH: &str = "/feeds/vcards"; static CONTACTS_CHANNEL_NAME: &str = "My address book"; macro_rules! response { ($($code:expr, $json:tt)+) => { $( Ok(Response::builder($code).body(json!($json)).build()) )+ }; } macro_rules! error_json { ($($code:expr, $error:expr, $error_desc:expr)+) => { $( response!($code, { "error": $error, "error_description": $error_desc }) )+ } } fn get_folder_from_type(post_type: &str) -> String { (match post_type { "h-feed" => "feeds/", "h-card" => "vcards/", "h-event" => "events/", "h-food" => "food/", _ => "posts/", }) .to_string() } pub fn normalize_mf2(mut body: serde_json::Value, user: &User) -> (String, serde_json::Value) { // Normalize the MF2 object here. let me = &user.me; let published: DateTime; let folder = get_folder_from_type(body["type"][0].as_str().unwrap()); if let Some(dt) = body["properties"]["published"][0].as_str() { // Check if the datetime is parsable. match DateTime::parse_from_rfc3339(dt) { Ok(dt) => { published = dt; } Err(_) => { // Reset the datetime to a proper datetime. // Do not attempt to recover the information. // Do not pass GO. Do not collect $200. let curtime: DateTime = Local::now(); body["properties"]["published"] = serde_json::Value::Array(vec![serde_json::Value::String(curtime.to_rfc3339())]); published = chrono::DateTime::from(curtime); } } } else { // Set the datetime. let curtime: DateTime = Local::now(); body["properties"]["published"] = serde_json::Value::Array(vec![serde_json::Value::String(curtime.to_rfc3339())]); published = chrono::DateTime::from(curtime); } match body["properties"]["uid"][0].as_str() { None => { let uid = serde_json::Value::String( me.join( &(folder.clone() + &num_to_sxg(published.timestamp_millis().try_into().unwrap())), ) .unwrap() .to_string(), ); body["properties"]["uid"] = serde_json::Value::Array(vec![uid.clone()]); match body["properties"]["url"].as_array_mut() { Some(array) => array.push(uid), None => body["properties"]["url"] = body["properties"]["uid"].clone(), } } Some(uid_str) => { let uid = uid_str.to_string(); match body["properties"]["url"].as_array_mut() { Some(array) => { if !array.iter().any(|i| i.as_str().unwrap_or("") == uid) { array.push(serde_json::Value::String(uid)) } } None => body["properties"]["url"] = body["properties"]["uid"].clone(), } } } if let Some(slugs) = body["properties"]["mp-slug"].as_array() { let new_urls = slugs .iter() .map(|i| i.as_str().unwrap_or("")) .filter(|i| i != &"") .map(|i| me.join(&((&folder).clone() + i)).unwrap().to_string()) .collect::>(); let urls = body["properties"]["url"].as_array_mut().unwrap(); new_urls.iter().for_each(|i| urls.push(json!(i))); } let props = body["properties"].as_object_mut().unwrap(); props.remove("mp-slug"); if body["properties"]["content"][0].is_string() { // Convert the content to HTML using the `markdown` crate body["properties"]["content"] = json!([{ "html": markdown::to_html(body["properties"]["content"][0].as_str().unwrap()), "value": body["properties"]["content"][0] }]) } if body["properties"]["channel"][0].as_str().is_none() { if body["type"][0] == "h-entry" { // Set the channel to the main channel... let default_channel = me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); body["properties"]["channel"] = json!([default_channel]); } else if body["type"][0] == "h-card" { let default_channel = me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); body["properties"]["channel"] = json!([default_channel]); } else { body["properties"]["channel"] = json!([]); } // TODO: Sort other types of posts into channels too } body["properties"]["posted-with"] = json!([user.client_id]); if body["properties"]["author"][0].as_str().is_none() { body["properties"]["author"] = json!([me.as_str()]) } // TODO: maybe highlight #hashtags? // Find other processing to do and insert it here return ( body["properties"]["uid"][0].as_str().unwrap().to_string(), body, ); } pub async fn new_post( req: Request>, body: serde_json::Value, ) -> Result { // First, check for rights. let user = req.ext::().unwrap(); let storage = &req.state().storage; if !user.check_scope("create") { return error_json!( 401, "invalid_scope", "Not enough privileges to post. Try a token with a \"create\" scope instead." ); } let (uid, post) = normalize_mf2(body, user); // Security check! // This software might also be used in a multi-user setting // where several users or identities share one Micropub server // (maybe a family website or a shitpost sideblog?) if !post["properties"]["uid"][0] .as_str() .unwrap() .starts_with(user.me.as_str()) || post["properties"]["channel"] .as_array() .unwrap() .iter() .any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) { return error_json!( 403, "forbidden", "You're trying to post to someone else's website..." ); } match storage.post_exists(&uid).await { Ok(exists) => { if exists { return error_json!( 409, "already_exists", format!( "A post with the exact same UID already exists in the database: {}", uid ) ); } } Err(err) => return Ok(err.into()), } if let Err(err) = storage.put_post(&post, user.me.as_str()).await { return error_json!(500, "database_error", format!("{}", err)); } // It makes sense to use a loop here, because you wouldn't post to a hundred channels at once // Mostly one or two, and even those ones will be the ones picked for you by software for channel in post["properties"]["channel"] .as_array() .unwrap() .iter() .map(|i| i.as_str().unwrap_or("").to_string()) .filter(|i| !i.is_empty()) .collect::>() { let default_channel = user.me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); let vcards_channel = user.me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); match storage.post_exists(&channel).await { Ok(exists) => { if exists { if let Err(err) = storage .update_post( &channel, json!({ "add": { "children": [uid] } }), ) .await { return error_json!( 500, "database_error", format!( "Couldn't insert post into the channel due to a database error: {}", err ) ); } } else if channel == default_channel || channel == vcards_channel { if let Err(err) = create_feed(storage, &uid, &channel, &user).await { return error_json!( 500, "database_error", format!("Couldn't save feed: {}", err) ); } } else { warn!( "Ignoring request to post to a non-existent feed: {}", channel ); } } Err(err) => return error_json!(500, "database_error", err), } } // END WRITE BOUNDARY // do background processing on the post async_std::task::spawn(post_process_new_post(req, post)); Ok(Response::builder(202) .header("Location", &uid) .body(json!({"status": "accepted", "location": &uid})) .build()) } async fn create_feed( storage: &impl Storage, uid: &str, channel: &str, user: &User, ) -> crate::database::Result<()> { let path = url::Url::parse(channel).unwrap().path().to_string(); let (name, slug) = if path == DEFAULT_CHANNEL_PATH { (DEFAULT_CHANNEL_NAME, "main") } else if path == CONTACTS_CHANNEL_PATH { (CONTACTS_CHANNEL_NAME, "vcards") } else { panic!("Tried to create an unknown default feed!"); }; let (_, feed) = normalize_mf2( json!({ "type": ["h-feed"], "properties": { "name": [name], "mp-slug": [slug], }, "children": [uid] }), &user, ); storage.put_post(&feed, user.me.as_str()).await } async fn post_process_new_post( req: Request>, post: serde_json::Value, ) { // TODO: Post-processing the post (aka second write pass) // - [-] Download rich reply contexts // - [-] Syndicate the post if requested, add links to the syndicated copies // - [ ] Send WebSub notifications to the hub (if we happen to have one) // - [x] Send webmentions let http = &req.state().http_client; let uid = post["properties"]["uid"][0].as_str().unwrap().to_string(); // 1. Download rich reply contexts // This needs to be done first, because at this step we can also determine webmention endpoints // and save them for later use. Additionally, the richer our content is, the better. // This needs to be done asynchronously, so the posting experience for the author will be as fast // as possible without making them wait for potentially slow downstream websites to load // 1.1. Collect the list of contextually-significant post to load context from. // This will include reply-tos, liked, reposted and bookmarked content // // TODO: Fetch links mentioned in a post, since we need to send webmentions to those as mentions let mut contextually_significant_posts: Vec = vec![]; for prop in &["in-reply-to", "like-of", "repost-of", "bookmark-of"] { if let Some(array) = post["properties"][prop].as_array() { contextually_significant_posts.extend( array .iter() .filter_map(|v| v.as_str().and_then(|v| surf::Url::parse(v).ok())), ); } } // 1.2. Deduplicate the list contextually_significant_posts.sort_unstable(); contextually_significant_posts.dedup(); // 1.3. Fetch the posts with their bodies and save them in a new Vec<(surf::Url, String)> let posts_with_bodies: Vec<(surf::Url, surf::Response, String)> = stream::iter(contextually_significant_posts.into_iter()) .filter_map(|v: surf::Url| async move { if let Ok(res) = http.get(&v).send().await { if res.status() != 200 { None } else { Some((v, res)) } } else { None } }) .filter_map(|(v, mut res): (surf::Url, surf::Response)| async move { if let Ok(body) = res.body_string().await { Some((v, res, body)) } else { None } }) .collect() .await; // 1.4. Parse the bodies and include them in relevant places on the MF2 struct // This requires an MF2 parser, and there are none for Rust at the moment. // // TODO: integrate https://gitlab.com/vikanezrimaya/mf2-parser when it's ready // 2. Syndicate the post let syndicated_copies: Vec; if let Some(syndication_targets) = post["properties"]["syndicate-to"].as_array() { syndicated_copies = stream::iter( syndication_targets .iter() .filter_map(|v| v.as_str()) .filter_map(|t| surf::Url::parse(t).ok()) .collect::>() .into_iter() .map(|_t: surf::Url| async move { // TODO: Define supported syndication methods // and syndicate the endpoint there // Possible ideas: // - indieweb.xyz (might need a lot of space for the buttons though, investigate proposing grouping syndication targets) // - news.indieweb.org (IndieNews - needs a category linking to #indienews) // - Twitter via brid.gy (do I really need Twitter syndication tho?) if false { Some("") } else { None } }), ) .buffer_unordered(3) .filter_map(|v| async move { v }) .map(|v| serde_json::Value::String(v.to_string())) .collect::>() .await; } else { syndicated_copies = vec![] } // Save the post a second time here after syndication // We use update_post here to prevent race conditions since its required to be atomic let mut update = json!({ "action": "update", "url": &uid }); if !syndicated_copies.is_empty() { update["add"] = json!({}); update["add"]["syndication"] = serde_json::Value::Array(syndicated_copies); } if !posts_with_bodies.is_empty() { error!("Replacing context links with parsed MF2-JSON data is not yet implemented (but it's ok! it'll just be less pretty)") /* TODO: Replace context links with parsed MF2-JSON data * / update["replace"] = {} update["replace"]["like-of"] = [] update["replace"]["in-reply-to"] = [] update["replace"]["bookmark-of"] = [] update["replace"]["repost-of"] = [] // */ } // We don't need the original copy of the post anymore... I hope! // This will act as a safeguard so I can't read stale data by accident anymore... drop(post); if let Err(err) = req.state().storage.update_post(&uid, update).await { error!("Encountered error while post-processing a post: {}", err) // At this point, we can still continue, we just won't have rich data for the post // I wonder why could it even happen except in case of a database disconnection? } // 3. Send WebSub notifications // TODO WebSub support // 4. Send webmentions // We'll need the bodies here to get their endpoints let source = &uid; stream::iter(posts_with_bodies.into_iter()) .filter_map( |(url, response, body): (surf::Url, surf::Response, String)| async move { // Check Link headers first // the first webmention endpoint will be returned if let Some(values) = response.header("Link") { let iter = values.iter().flat_map(|i| i.as_str().split(',')); for link in iter { let mut split = link.split(';'); match split.next() { Some(uri) => { if let Some(uri) = uri.strip_prefix('<') { if let Some(uri) = uri.strip_suffix('>') { for prop in split { let lowercased = prop.to_ascii_lowercase(); if &lowercased == "rel=\"webmention\"" || &lowercased == "rel=webmention" { if let Ok(endpoint) = url.join(uri) { return Some((url, endpoint)); } } } } } } None => continue, } } } // TODO: Replace this function once the MF2 parser is ready // A compliant parser's output format includes rels, // we could just find a Webmention one in there let pattern = easy_scraper::Pattern::new(r#""#) .expect("Pattern for webmentions couldn't be parsed"); let matches = pattern.matches(&body); if matches.is_empty() { return None; } let endpoint = &matches[0]["url"]; if let Ok(endpoint) = url.join(endpoint) { Some((url, endpoint)) } else { None } }, ) .map(|(target, endpoint)| async move { info!( "Sending webmention to {} about {}", source, &target.to_string() ); let response = http .post(&endpoint) .content_type("application/x-www-form-urlencoded") .body( serde_urlencoded::to_string(vec![ ("source", source), ("target", &target.to_string()), ]) .expect("Couldn't construct webmention form"), ) .send() .await; match response { Ok(response) => { if response.status() == 200 || response.status() == 201 || response.status() == 202 { info!("Sent webmention for {} to {}", target, endpoint); Ok(()) } else { error!( "Sending webmention for {} to {} failed: Endpoint replied with HTTP {}", target, endpoint, response.status() ); Err(()) } } Err(err) => { error!( "Sending webmention for {} to {} failed: {}", target, endpoint, err ); Err(()) } } }) .buffer_unordered(3) .collect::>() .await; } async fn process_json( req: Request>, body: serde_json::Value, ) -> Result { let is_action = body["action"].is_string() && body["url"].is_string(); if is_action { // This could be an update, a deletion or an undeletion request. // Process it separately. let action = body["action"].as_str().unwrap(); let url = body["url"].as_str().unwrap(); let user = req.ext::().unwrap(); match action { "delete" => { if !user.check_scope("delete") { return error_json!( 401, "insufficient_scope", "You need a `delete` scope to delete posts." ); } // This special scope is not available through a token endpoint, since the // authorization endpoint is supposed to reject any auth request trying to get this // scope. It is intended for TRUSTED external services that need to modify the // database while ignoring any access controls if (url::Url::parse(url)?.origin().ascii_serialization() + "/") != user.me.as_str() && !user.check_scope("kittybox_internal:do_what_thou_wilt") { return error_json!( 403, "forbidden", "You're not allowed to delete someone else's posts." ); } if let Err(error) = req.state().storage.delete_post(&url).await { return Ok(error.into()); } Ok(Response::builder(200).build()) } "update" => { if !user.check_scope("update") { return error_json!( 401, "insufficient_scope", "You need an `update` scope to update posts." ); } if (url::Url::parse(url)?.origin().ascii_serialization() + "/") != user.me.as_str() && !user.check_scope("kittybox_internal:do_what_thou_wilt") { return error_json!( 403, "forbidden", "You're not allowed to delete someone else's posts." ); } if let Err(error) = req.state().storage.update_post(&url, body.clone()).await { Ok(error.into()) } else { Ok(Response::builder(204).build()) } } _ => return error_json!(400, "invalid_request", "This action is not supported."), } } else if body["type"][0].is_string() { // This is definitely an h-entry or something similar. Check if it has properties? if body["properties"].is_object() { // Ok, this is definitely a new h-entry. Let's save it. return new_post(req, body).await; } else { return error_json!( 400, "invalid_request", "This MF2-JSON object has a type, but not properties. This makes no sense to post." ); } } else { return error_json!( 400, "invalid_request", "Try sending MF2-structured data or an object with an \"action\" and \"url\" keys." ); } } fn convert_form_to_mf2_json(form: Vec<(String, String)>) -> serde_json::Value { let mut mf2 = json!({"type": [], "properties": {}}); for (k, v) in form { if k == "h" { mf2["type"] .as_array_mut() .unwrap() .push(json!("h-".to_string() + &v)); } else if k != "access_token" { let key = k.strip_suffix("[]").unwrap_or(&k); match mf2["properties"][key].as_array_mut() { Some(prop) => prop.push(json!(v)), None => mf2["properties"][key] = json!([v]), } } } if mf2["type"].as_array().unwrap().is_empty() { mf2["type"].as_array_mut().unwrap().push(json!("h-entry")); } mf2 } async fn process_form( req: Request>, form: Vec<(String, String)>, ) -> Result { if let Some((_, v)) = form.iter().find(|(k, _)| k == "action") { if v == "delete" { let user = req.ext::().unwrap(); if !user.check_scope("delete") { return error_json!( 401, "insufficient_scope", "You cannot delete posts without a `delete` scope." ); } match form.iter().find(|(k, _)| k == "url") { Some((_, url)) => { if (url::Url::parse(url)?.origin().ascii_serialization() + "/") != user.me.as_str() && !user.check_scope("kittybox_internal:do_what_thou_wilt") { return error_json!( 403, "forbidden", "You're not allowed to delete someone else's posts." ); } if let Err(error) = req.state().storage.delete_post(&url).await { return error_json!(500, "database_error", error); } return Ok(Response::builder(200).build()); } None => { return error_json!( 400, "invalid_request", "Please provide an `url` to delete." ) } } } else { return error_json!(400, "invalid_request", "This action is not supported in form-encoded mode. (JSON requests support more actions, use JSON!)"); } } let mf2 = convert_form_to_mf2_json(form); if mf2["properties"].as_object().unwrap().keys().len() > 0 { return new_post(req, mf2).await; } return error_json!( 400, "invalid_request", "Try sending h=entry&content=something%20interesting" ); } pub async fn post_handler(mut req: Request>) -> Result { match req.content_type() { Some(value) => { if value == Mime::from_str("application/json").unwrap() { match req.body_json::().await { Ok(parsed) => return process_json(req, parsed).await, Err(err) => { return error_json!( 400, "invalid_request", format!("Parsing JSON failed: {:?}", err) ) } } } else if value == Mime::from_str("application/x-www-form-urlencoded").unwrap() { match req.body_form::>().await { Ok(parsed) => return process_form(req, parsed).await, Err(err) => { return error_json!( 400, "invalid_request", format!("Parsing form failed: {:?}", err) ) } } } else { return error_json!( 415, "unsupported_media_type", "What's this? Try sending JSON instead. (urlencoded form also works but is less cute)" ); } } _ => { return error_json!( 415, "unsupported_media_type", "You didn't send a Content-Type header, so we don't know how to parse your request." ); } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_no_replace_uid() { let mf2 = json!({ "type": ["h-card"], "properties": { "uid": ["https://fireburn.ru/"], "name": ["Vika Nezrimaya"], "note": ["A crazy programmer girl who wants some hugs"] } }); let (uid, normalized) = normalize_mf2( mf2.clone(), &User::new( "https://fireburn.ru/", "https://quill.p3k.io/", "create update media", ), ); assert_eq!( normalized["properties"]["uid"][0], mf2["properties"]["uid"][0], "UID was replaced" ); assert_eq!( normalized["properties"]["uid"][0], uid, "Returned post location doesn't match UID" ); } #[test] fn test_form_to_mf2() { use serde_urlencoded::from_str; assert_eq!( convert_form_to_mf2_json(from_str("h=entry&content=something%20interesting").unwrap()), json!({ "type": ["h-entry"], "properties": { "content": ["something interesting"] } }) ) } #[test] fn test_normalize_mf2() { let mf2 = json!({ "type": ["h-entry"], "properties": { "content": ["This is content!"] } }); let (uid, post) = normalize_mf2( mf2, &User::new( "https://fireburn.ru/", "https://quill.p3k.io/", "create update media", ), ); assert_eq!( post["properties"]["published"] .as_array() .expect("post['published'] is undefined") .len(), 1, "Post doesn't have a published time" ); DateTime::parse_from_rfc3339(post["properties"]["published"][0].as_str().unwrap()) .expect("Couldn't parse date from rfc3339"); assert!( !post["properties"]["url"] .as_array() .expect("post['url'] is undefined") .is_empty(), "Post doesn't have any URLs" ); assert_eq!( post["properties"]["uid"] .as_array() .expect("post['uid'] is undefined") .len(), 1, "Post doesn't have a single UID" ); assert_eq!( post["properties"]["uid"][0], uid, "UID of a post and its supposed location don't match" ); assert!( uid.starts_with("https://fireburn.ru/posts/"), "The post namespace is incorrect" ); assert_eq!( post["properties"]["content"][0]["html"] .as_str() .expect("Post doesn't have a rich content object") .trim(), "

This is content!

", "Parsed Markdown content doesn't match expected HTML" ); assert_eq!( post["properties"]["channel"][0], "https://fireburn.ru/feeds/main", "Post isn't posted to the main channel" ); assert_eq!( post["properties"]["author"][0], "https://fireburn.ru/", "Post author is unknown" ); } #[test] fn test_mp_slug() { let mf2 = json!({ "type": ["h-entry"], "properties": { "content": ["This is content!"], "mp-slug": ["hello-post"] }, }); let (_, post) = normalize_mf2( mf2, &User::new( "https://fireburn.ru/", "https://quill.p3k.io/", "create update media", ), ); assert!( post["properties"]["url"] .as_array() .unwrap() .iter() .map(|i| i.as_str().unwrap()) .any(|i| i == "https://fireburn.ru/posts/hello-post"), "Didn't found an URL pointing to the location expected by the mp-slug semantics" ); assert!( post["properties"]["mp-slug"].as_array().is_none(), "mp-slug wasn't deleted from the array!" ) } #[test] fn test_normalize_feed() { let mf2 = json!({ "type": ["h-feed"], "properties": { "name": "Main feed", "mp-slug": ["main"] } }); let (uid, post) = normalize_mf2( mf2, &User::new( "https://fireburn.ru/", "https://quill.p3k.io/", "create update media", ), ); assert_eq!( post["properties"]["uid"][0], uid, "UID of a post and its supposed location don't match" ); assert_eq!(post["properties"]["author"][0], "https://fireburn.ru/"); assert!( post["properties"]["url"] .as_array() .unwrap() .iter() .map(|i| i.as_str().unwrap()) .any(|i| i == "https://fireburn.ru/feeds/main"), "Didn't found an URL pointing to the location expected by the mp-slug semantics" ); assert!( post["properties"]["mp-slug"].as_array().is_none(), "mp-slug wasn't deleted from the array!" ) } }