use core::iter::Iterator; use std::str::FromStr; use std::convert::TryInto; use log::{warn, error}; use futures::stream; use futures::StreamExt; use chrono::prelude::*; use http_types::Mime; use tide::prelude::json; use tide::{Request, Response, Result}; use newbase60::num_to_sxg; use crate::ApplicationState; use crate::database::{Storage}; use crate::indieauth::User; static DEFAULT_CHANNEL_PATH: &str = "/feeds/main"; static DEFAULT_CHANNEL_NAME: &str = "Main feed"; static CONTACTS_CHANNEL_PATH: &str = "/feeds/vcards"; static CONTACTS_CHANNEL_NAME: &str = "My address book"; macro_rules! response { ($($code:expr, $json:tt)+) => { $( Ok(Response::builder($code).body(json!($json)).build()) )+ }; } macro_rules! error_json { ($($code:expr, $error:expr, $error_desc:expr)+) => { $( response!($code, { "error": $error, "error_description": $error_desc }) )+ } } fn get_folder_from_type(post_type: &str) -> String { (match post_type { "h-feed" => "feeds/", "h-card" => "vcards/", "h-event" => "events/", "h-food" => "food/", _ => "posts/" }).to_string() } fn normalize_mf2(mut body: serde_json::Value, user: &User) -> (String, serde_json::Value) { // Normalize the MF2 object here. let me = &user.me; let published: DateTime; let folder = get_folder_from_type(body["type"][0].as_str().unwrap()); if let Some(dt) = body["properties"]["published"][0].as_str() { // Check if the datetime is parsable. match DateTime::parse_from_rfc3339(dt) { Ok(dt) => { published = dt; } Err(_) => { // Reset the datetime to a proper datetime. // Do not attempt to recover the information. // Do not pass GO. Do not collect $200. let curtime: DateTime = Local::now(); body["properties"]["published"] = serde_json::Value::Array(vec![ serde_json::Value::String(curtime.to_rfc3339()) ]); published = chrono::DateTime::from(curtime); } } } else { // Set the datetime. let curtime: DateTime = Local::now(); body["properties"]["published"] = serde_json::Value::Array(vec![ serde_json::Value::String(curtime.to_rfc3339()) ]); published = chrono::DateTime::from(curtime); } match body["properties"]["uid"][0].as_str() { None => { let uid = serde_json::Value::String( me.join( &(folder.clone() + &num_to_sxg(published.timestamp_millis().try_into().unwrap())) ).unwrap().to_string()); body["properties"]["uid"] = serde_json::Value::Array(vec![uid.clone()]); match body["properties"]["url"].as_array_mut() { Some(array) => { array.push(uid) } None => { body["properties"]["url"] = body["properties"]["uid"].clone() } } } Some(uid_str) => { let uid = uid_str.to_string(); match body["properties"]["url"].as_array_mut() { Some(array) => { if !array.iter().any(|i| i.as_str().unwrap_or("") == uid) { array.push(serde_json::Value::String(uid)) } } None => { body["properties"]["url"] = body["properties"]["uid"].clone() } } } } if let Some(slugs) = body["properties"]["mp-slug"].as_array() { let new_urls = slugs.iter() .map(|i| i.as_str().unwrap_or("")) .filter(|i| i != &"") .map(|i| me.join(&((&folder).clone() + i)).unwrap().to_string()) .collect::>(); let urls = body["properties"]["url"].as_array_mut().unwrap(); new_urls.iter().for_each(|i| urls.push(json!(i))); } let props = body["properties"].as_object_mut().unwrap(); props.remove("mp-slug"); if body["properties"]["content"][0].is_string() { // Convert the content to HTML using the `markdown` crate body["properties"]["content"] = json!([{ "html": markdown::to_html(body["properties"]["content"][0].as_str().unwrap()), "value": body["properties"]["content"][0] }]) } if body["properties"]["channel"][0].as_str().is_none() { if body["type"][0] == "h-entry" { // Set the channel to the main channel... let default_channel = me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); body["properties"]["channel"] = json!([default_channel]); } else if body["type"][0] == "h-card" { let default_channel = me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); body["properties"]["channel"] = json!([default_channel]); } else { body["properties"]["channel"] = json!([]); } // TODO: Sort other types of posts into channels too } body["properties"]["posted-with"] = json!([user.client_id]); if body["properties"]["author"][0].as_str().is_none() { body["properties"]["author"] = json!([me.as_str()]) } // TODO: maybe highlight #hashtags? // Find other processing to do and insert it here return (body["properties"]["uid"][0].as_str().unwrap().to_string(), body) } async fn new_post(req: Request>, body: serde_json::Value) -> Result { // First, check for rights. let user = req.ext::().unwrap(); if !user.check_scope("create") { return error_json!(401, "invalid_scope", "Not enough privileges to post. Try a token with a \"create\" scope instead.") } let (uid, post) = normalize_mf2(body, user); // Security check! // This software might also be used in a multi-user setting // where several users or identities share one Micropub server // (maybe a family website or a shitpost sideblog?) if post["properties"]["url"].as_array().unwrap().iter().any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) || !post["properties"]["uid"][0].as_str().unwrap().starts_with(user.me.as_str()) || post["properties"]["channel"].as_array().unwrap().iter().any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) { return error_json!(403, "forbidden", "You're trying to post to someone else's website...") } let storage = &req.state().storage; match storage.post_exists(&uid).await { Ok(exists) => if exists { return error_json!(409, "already_exists", format!("A post with the exact same UID already exists in the database: {}", uid)) }, Err(err) => return Ok(err.into()) } // WARNING: WRITE BOUNDARY //let mut storage = RwLockUpgradableReadGuard::upgrade(storage).await; if let Err(err) = storage.put_post(&post).await { return error_json!(500, "database_error", format!("{}", err)) } for channel in post["properties"]["channel"] .as_array().unwrap().iter() .map(|i| i.as_str().unwrap_or("").to_string()) .filter(|i| !i.is_empty()) .collect::>() { let default_channel = user.me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); let vcards_channel = user.me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); match storage.post_exists(&channel).await { Ok(exists) => if exists { if let Err(err) = storage.update_post(&channel, json!({ "add": { "children": [uid] } })).await { return error_json!(500, "database_error", format!("Couldn't insert post into the channel due to a database error: {}", err)) } } else if channel == default_channel || channel == vcards_channel { if let Err(err) = create_feed(storage, &uid, &channel, &user).await { return error_json!(500, "database_error", format!("Couldn't save feed: {}", err)) } } else { warn!("Ignoring request to post to a non-existent feed: {}", channel); }, Err(err) => return error_json!(500, "database_error", err) } } // END WRITE BOUNDARY // do background processing on the post async_std::task::spawn(post_process_new_post(req, post)); return Ok(Response::builder(202) .header("Location", &uid) .body(json!({"status": "accepted", "location": &uid})) .build()); } async fn create_feed(storage: &impl Storage, uid: &str, channel: &str, user: &User) -> crate::database::Result<()> { let path = url::Url::parse(channel).unwrap().path().to_string(); let (name, slug) = if path == DEFAULT_CHANNEL_PATH { (DEFAULT_CHANNEL_NAME, "main") } else if path == CONTACTS_CHANNEL_PATH { (CONTACTS_CHANNEL_NAME, "vcards") } else { panic!("Tried to create an unknown default feed!"); }; let (_, feed) = normalize_mf2(json!({ "type": ["h-feed"], "properties": { "name": [name], "mp-slug": [slug], }, "children": [uid] }), &user); storage.put_post(&feed).await } async fn post_process_new_post(req: Request>, post: serde_json::Value) { // TODO: Post-processing the post (aka second write pass) // - [-] Download rich reply contexts // - [-] Syndicate the post if requested, add links to the syndicated copies // - [ ] Send WebSub notifications to the hub (if we happen to have one) // - [x] Send webmentions let http = &req.state().http_client; let uid = post["properties"]["uid"][0].as_str().unwrap().to_string(); // 1. Download rich reply contexts // This needs to be done first, because at this step we can also determine webmention endpoints // and save them for later use. Additionally, the richer our content is, the better. // This needs to be done asynchronously, so the posting experience for the author will be as fast // as possible without making them wait for potentially slow downstream websites to load // 1.1. Collect the list of contextually-significant post to load context from. // This will include reply-tos, liked, reposted and bookmarked content // // TODO: Fetch links mentioned in a post, since we need to send webmentions to those as mentions let mut contextually_significant_posts: Vec = vec![]; for prop in &["in-reply-to", "like-of", "repost-of", "bookmark-of"] { if let Some(array) = post["properties"][prop].as_array() { contextually_significant_posts.extend( array.iter() .filter_map(|v| v.as_str() .and_then(|v| surf::Url::parse(v).ok() ) ) ); } } // 1.2. Deduplicate the list contextually_significant_posts.sort_unstable(); contextually_significant_posts.dedup(); // 1.3. Fetch the posts with their bodies and save them in a new Vec<(surf::Url, String)> let posts_with_bodies: Vec<(surf::Url, String)> = stream::iter(contextually_significant_posts.into_iter()) .filter_map(|v: surf::Url| async move { if let Ok(res) = http.get(&v).send().await { if res.status() != 200 { return None } else { return Some((v, res)) } } else { return None } }) .filter_map(|(v, mut res): (surf::Url, surf::Response)| async move { if let Ok(body) = res.body_string().await { return Some((v, body)) } else { return None } }) .collect().await; // 1.4. Parse the bodies and include them in relevant places on the MF2 struct // This requires an MF2 parser, and there are none for Rust at the moment. // // TODO: integrate https://gitlab.com/vikanezrimaya/mf2-parser when it's ready // 2. Syndicate the post let syndicated_copies: Vec; if let Some(syndication_targets) = post["properties"]["syndicate-to"].as_array() { syndicated_copies = stream::iter(syndication_targets.into_iter() .filter_map(|v| v.as_str()) .filter_map(|t| surf::Url::parse(t).ok()) .collect::>().into_iter() .map(|_t: surf::Url| async move { // TODO: Define supported syndication methods // and syndicate the endpoint there // Possible ideas: // - indieweb.xyz (might need a lot of space for the buttons though, investigate proposing grouping syndication targets) // - news.indieweb.org (IndieNews - needs a category linking to #indienews) // - Twitter via brid.gy (do I really need Twitter syndication tho?) if false { Some("") } else { None } }) ).buffer_unordered(3).filter_map(|v| async move { v }).map(|v| serde_json::Value::String(v.to_string())).collect::>().await; } else { syndicated_copies = vec![] } // Save the post a second time here after syndication // We use update_post here to prevent race conditions since its required to be atomic let mut update = json!({ "action": "update", "url": &uid }); if !syndicated_copies.is_empty() { update["add"] = json!({}); update["add"]["syndication"] = serde_json::Value::Array(syndicated_copies); } if !posts_with_bodies.is_empty() { error!("Replacing context links with parsed MF2-JSON data is not yet implemented (but it's ok! it'll just be less pretty)") /* TODO: Replace context links with parsed MF2-JSON data * / update["replace"] = {} update["replace"]["like-of"] = [] update["replace"]["in-reply-to"] = [] update["replace"]["bookmark-of"] = [] update["replace"]["repost-of"] = [] // */ } // We don't need the original copy of the post anymore... I hope! // This will act as a safeguard so I can't read stale data by accident anymore... drop(post); if let Err(err) = req.state().storage.update_post(&uid, update).await { error!("Encountered error while post-processing a post: {}", err) // At this point, we can still continue, we just won't have rich data for the post // I wonder why could it even happen except in case of a database disconnection? } // 3. Send WebSub notifications // TODO // 4. Send webmentions // We'll need the bodies here to get their endpoints let source = &uid; stream::iter(posts_with_bodies.into_iter()) .filter_map(|(url, body): (surf::Url, String)| async move { // TODO: Replace this function once the MF2 parser is ready // A compliant parser's output format includes rels, // we could just find a Webmention one in there let pattern = easy_scraper::Pattern::new(r#""#).expect("Pattern for webmentions couldn't be parsed"); let matches = pattern.matches(&body); if matches.is_empty() { return None } let endpoint = &matches[0]["url"]; if let Ok(endpoint) = url.join(endpoint) { Some((url, endpoint)) } else { None } }) .map(|(target, endpoint)| async move { let response = http.post(&endpoint) .content_type("application/x-www-form-urlencoded") .body( serde_urlencoded::to_string(vec![("source", source), ("target", &target.to_string())]) .expect("Couldn't construct webmention form") ).send().await; // TODO improve error handling if let Ok(response) = response { if response.status() == 200 || response.status() == 201 || response.status() == 202 { Ok(()) } else { Err(()) } } else { Err(()) } }).buffer_unordered(3).collect::>().await; } async fn process_json(req: Request>, body: serde_json::Value) -> Result { let is_action = body["action"].is_string() && body["url"].is_string(); if is_action { // This could be an update, a deletion or an undeletion request. // Process it separately. let action = body["action"].as_str().unwrap(); let url = body["url"].as_str().unwrap(); let user = req.ext::().unwrap(); match action { "delete" => { if !user.check_scope("delete") { return error_json!(401, "insufficient_scope", "You need a `delete` scope to delete posts.") } if let Err(error) = req.state().storage.delete_post(&url).await { return Ok(error.into()) } return Ok(Response::builder(200).build()); }, "update" => { if !user.check_scope("update") { return error_json!(401, "insufficient_scope", "You need an `update` scope to update posts.") } if let Err(error) = req.state().storage.update_post(&url, body.clone()).await { return Ok(error.into()) } else { return Ok(Response::builder(204).build()) } }, _ => { return error_json!(400, "invalid_request", "This action is not supported.") } } } else if body["type"][0].is_string() { // This is definitely an h-entry or something similar. Check if it has properties? if body["properties"].is_object() { // Ok, this is definitely a new h-entry. Let's save it. return new_post(req, body).await } else { return error_json!(400, "invalid_request", "This MF2-JSON object has a type, but not properties. This makes no sense to post.") } } else { return error_json!(400, "invalid_request", "Try sending MF2-structured data or an object with an \"action\" and \"url\" keys.") } } fn convert_form_to_mf2_json(form: Vec<(String, String)>) -> serde_json::Value { let mut mf2 = json!({"type": [], "properties": {}}); for (k, v) in form { if k == "h" { mf2["type"].as_array_mut().unwrap().push(json!("h-".to_string() + &v)); } else if k != "access_token" { let key = k.strip_suffix("[]").unwrap_or(&k); match mf2["properties"][key].as_array_mut() { Some(prop) => prop.push(json!(v)), None => mf2["properties"][key] = json!([v]) } } } if mf2["type"].as_array().unwrap().is_empty() { mf2["type"].as_array_mut().unwrap().push(json!("h-entry")); } mf2 } async fn process_form(req: Request>, form: Vec<(String, String)>) -> Result { if let Some((_, v)) = form.iter().find(|(k, _)| k == "action") { if v == "delete" { let user = req.ext::().unwrap(); if !user.check_scope("delete") { return error_json!(401, "insufficient_scope", "You cannot delete posts without a `delete` scope.") } match form.iter().find(|(k, _)| k == "url") { Some((_, url)) => { if let Err(error) = req.state().storage.delete_post(&url).await { return error_json!(500, "database_error", error) } return Ok(Response::builder(200).build()) }, None => return error_json!(400, "invalid_request", "Please provide an `url` to delete.") } } else { return error_json!(400, "invalid_request", "This action is not supported in form-encoded mode. (JSON requests support more actions, use them!)") } } let mf2 = convert_form_to_mf2_json(form); if mf2["properties"].as_object().unwrap().keys().len() > 0 { return new_post(req, mf2).await; } return error_json!(400, "invalid_request", "Try sending h=entry&content=something%20interesting"); } pub async fn post_handler(mut req: Request>) -> Result { match req.content_type() { Some(value) => { if value == Mime::from_str("application/json").unwrap() { match req.body_json::().await { Ok(parsed) => { return process_json(req, parsed).await }, Err(err) => return error_json!( 400, "invalid_request", format!("Parsing JSON failed: {:?}", err) ) } } else if value == Mime::from_str("application/x-www-form-urlencoded").unwrap() { match req.body_form::>().await { Ok(parsed) => { return process_form(req, parsed).await }, Err(err) => return error_json!( 400, "invalid_request", format!("Parsing form failed: {:?}", err) ) } } else { return error_json!( 415, "unsupported_media_type", "What's this? Try sending JSON instead. (urlencoded form also works but is less cute)" ) } } _ => { return error_json!( 415, "unsupported_media_type", "You didn't send a Content-Type header, so we don't know how to parse your request." ); } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_no_replace_uid() { let mf2 = json!({ "type": ["h-card"], "properties": { "uid": ["https://fireburn.ru/"], "name": ["Vika Nezrimaya"], "note": ["A crazy programmer girl who wants some hugs"] } }); let (uid, normalized) = normalize_mf2(mf2.clone(), &User::new("https://fireburn.ru/", "https://quill.p3k.io/", "create update media")); assert_eq!(normalized["properties"]["uid"][0], mf2["properties"]["uid"][0], "UID was replaced"); assert_eq!(normalized["properties"]["uid"][0], uid, "Returned post location doesn't match UID"); } #[test] fn test_form_to_mf2() { use serde_urlencoded::from_str; assert_eq!( convert_form_to_mf2_json(from_str("h=entry&content=something%20interesting").unwrap()), json!({ "type": ["h-entry"], "properties": { "content": ["something interesting"] } }) ) } #[test] fn test_normalize_mf2() { let mf2 = json!({ "type": ["h-entry"], "properties": { "content": ["This is content!"] } }); let (uid, post) = normalize_mf2(mf2, &User::new("https://fireburn.ru/", "https://quill.p3k.io/", "create update media")); assert_eq!(post["properties"]["published"].as_array().expect("post['published'] is undefined").len(), 1, "Post doesn't have a published time"); DateTime::parse_from_rfc3339(post["properties"]["published"][0].as_str().unwrap()).expect("Couldn't parse date from rfc3339"); assert!(post["properties"]["url"].as_array().expect("post['url'] is undefined").len() > 0, "Post doesn't have any URLs"); assert_eq!(post["properties"]["uid"].as_array().expect("post['uid'] is undefined").len(), 1, "Post doesn't have a single UID"); assert_eq!(post["properties"]["uid"][0], uid, "UID of a post and its supposed location don't match"); assert!(uid.starts_with("https://fireburn.ru/posts/"), "The post namespace is incorrect"); assert_eq!(post["properties"]["content"][0]["html"].as_str().expect("Post doesn't have a rich content object").trim(), "

This is content!

", "Parsed Markdown content doesn't match expected HTML"); assert_eq!(post["properties"]["channel"][0], "https://fireburn.ru/feeds/main", "Post isn't posted to the main channel"); assert_eq!(post["properties"]["author"][0], "https://fireburn.ru/", "Post author is unknown"); } #[test] fn test_mp_slug() { let mf2 = json!({ "type": ["h-entry"], "properties": { "content": ["This is content!"], "mp-slug": ["hello-post"] }, }); let (_, post) = normalize_mf2(mf2, &User::new("https://fireburn.ru/", "https://quill.p3k.io/", "create update media")); assert!(post["properties"]["url"] .as_array() .unwrap() .iter() .map(|i| i.as_str().unwrap()) .any(|i| i == "https://fireburn.ru/posts/hello-post"), "Didn't found an URL pointing to the location expected by the mp-slug semantics"); assert!(post["properties"]["mp-slug"].as_array().is_none(), "mp-slug wasn't deleted from the array!") } #[test] fn test_normalize_feed() { let mf2 = json!({ "type": ["h-feed"], "properties": { "name": "Main feed", "mp-slug": ["main"] } }); let (uid, post) = normalize_mf2(mf2, &User::new("https://fireburn.ru/", "https://quill.p3k.io/", "create update media")); assert_eq!(post["properties"]["uid"][0], uid, "UID of a post and its supposed location don't match"); assert_eq!(post["properties"]["author"][0], "https://fireburn.ru/"); assert!(post["properties"]["url"] .as_array() .unwrap() .iter() .map(|i| i.as_str().unwrap()) .any(|i| i == "https://fireburn.ru/feeds/main"), "Didn't found an URL pointing to the location expected by the mp-slug semantics"); assert!(post["properties"]["mp-slug"].as_array().is_none(), "mp-slug wasn't deleted from the array!") } }