diff options
Diffstat (limited to 'src/micropub')
-rw-r--r-- | src/micropub/get.rs | 82 | ||||
-rw-r--r-- | src/micropub/mod.rs | 846 | ||||
-rw-r--r-- | src/micropub/util.rs | 444 |
3 files changed, 1372 insertions, 0 deletions
diff --git a/src/micropub/get.rs b/src/micropub/get.rs new file mode 100644 index 0000000..718714a --- /dev/null +++ b/src/micropub/get.rs @@ -0,0 +1,82 @@ +use crate::database::{MicropubChannel, Storage}; +use crate::indieauth::User; +use crate::ApplicationState; +use tide::prelude::{json, Deserialize}; +use tide::{Request, Response, Result}; + +#[derive(Deserialize)] +struct QueryOptions { + q: String, + url: Option<String>, +} + +pub async fn get_handler<Backend>(req: Request<ApplicationState<Backend>>) -> Result +where + Backend: Storage + Send + Sync, +{ + let user = req.ext::<User>().unwrap(); + let backend = &req.state().storage; + let media_endpoint = &req.state().media_endpoint; + let query = req.query::<QueryOptions>().unwrap_or(QueryOptions { + q: "".to_string(), + url: None, + }); + match &*query.q { + "config" => { + let channels: Vec<MicropubChannel>; + match backend.get_channels(user.me.as_str()).await { + Ok(chans) => channels = chans, + Err(err) => return Ok(err.into()) + } + Ok(Response::builder(200).body(json!({ + "q": ["source", "config", "channel"], + "channels": channels, + "media-endpoint": media_endpoint + })).build()) + }, + "channel" => { + let channels: Vec<MicropubChannel>; + match backend.get_channels(user.me.as_str()).await { + Ok(chans) => channels = chans, + Err(err) => return Ok(err.into()) + } + Ok(Response::builder(200).body(json!(channels)).build()) + } + "source" => { + if user.check_scope("create") || user.check_scope("update") || user.check_scope("delete") || user.check_scope("undelete") { + if let Some(url) = query.url { + match backend.get_post(&url).await { + Ok(post) => if let Some(post) = post { + Ok(Response::builder(200).body(post).build()) + } else { + Ok(Response::builder(404).build()) + }, + Err(err) => Ok(err.into()) + } + } else { + Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "Please provide `url`." + })).build()) + } + } else { + Ok(Response::builder(401).body(json!({ + "error": "insufficient_scope", + "error_description": "You don't have the required scopes to proceed.", + "scope": "update" + })).build()) + } + }, + // TODO: ?q=food, ?q=geo, ?q=contacts + // Depends on indexing posts + // Errors + "" => Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "No ?q= parameter specified. Try ?q=config maybe?" + })).build()), + _ => Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "Unsupported ?q= query. Try ?q=config and see the q array for supported values." + })).build()) + } +} diff --git a/src/micropub/mod.rs b/src/micropub/mod.rs new file mode 100644 index 0000000..02eee6e --- /dev/null +++ b/src/micropub/mod.rs @@ -0,0 +1,846 @@ +use std::collections::HashMap; +use std::sync::Arc; + +use crate::database::{MicropubChannel, Storage, StorageError}; +use crate::indieauth::backend::AuthBackend; +use crate::indieauth::User; +use crate::micropub::util::form_to_mf2_json; +use axum::extract::{BodyStream, Query, Host}; +use axum::headers::ContentType; +use axum::response::{IntoResponse, Response}; +use axum::TypedHeader; +use axum::{http::StatusCode, Extension}; +use serde::{Deserialize, Serialize}; +use serde_json::json; +use tokio::sync::Mutex; +use tokio::task::JoinSet; +use tracing::{debug, error, info, warn}; +use kittybox_indieauth::{Scope, TokenData}; +use kittybox_util::{MicropubError, ErrorType}; + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[serde(rename_all = "kebab-case")] +enum QueryType { + Source, + Config, + Channel, + SyndicateTo, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct MicropubQuery { + q: QueryType, + url: Option<String>, +} + +impl From<StorageError> for MicropubError { + fn from(err: StorageError) -> Self { + Self { + error: match err.kind() { + crate::database::ErrorKind::NotFound => ErrorType::NotFound, + _ => ErrorType::InternalServerError, + }, + error_description: format!("Backend error: {}", err), + } + } +} + +mod util; +pub(crate) use util::normalize_mf2; + +#[derive(Debug)] +struct FetchedPostContext { + url: url::Url, + mf2: serde_json::Value, + webmention: Option<url::Url>, +} + +fn populate_reply_context( + mf2: &serde_json::Value, + prop: &str, + ctxs: &[FetchedPostContext], +) -> Option<Vec<serde_json::Value>> { + mf2["properties"][prop].as_array().map(|array| { + array + .iter() + // TODO: This seems to be O(n^2) and I don't like it. + // Switching `ctxs` to a hashmap might speed it up to O(n) + // The key would be the URL/UID + .map(|i| ctxs + .iter() + .find(|ctx| Some(ctx.url.as_str()) == i.as_str()) + .and_then(|ctx| ctx.mf2["items"].get(0)) + .unwrap_or(i)) + .cloned() + .collect::<Vec<serde_json::Value>>() + }) +} + +#[tracing::instrument(skip(db))] +async fn background_processing<D: 'static + Storage>( + db: D, + mf2: serde_json::Value, + http: reqwest::Client, +) -> () { + // TODO: Post-processing the post (aka second write pass) + // - [x] Download rich reply contexts + // - [ ] Syndicate the post if requested, add links to the syndicated copies + // - [ ] Send WebSub notifications to the hub (if we happen to have one) + // - [x] Send webmentions + + use futures_util::StreamExt; + + let uid: &str = mf2["properties"]["uid"][0].as_str().unwrap(); + + let context_props = ["in-reply-to", "like-of", "repost-of", "bookmark-of"]; + let mut context_urls: Vec<url::Url> = vec![]; + for prop in &context_props { + if let Some(array) = mf2["properties"][prop].as_array() { + context_urls.extend( + array + .iter() + .filter_map(|v| v.as_str()) + .filter_map(|v| v.parse::<url::Url>().ok()), + ); + } + } + // TODO parse HTML in e-content and add links found here + context_urls.sort_unstable_by_key(|u| u.to_string()); + context_urls.dedup(); + + // TODO: Make a stream to fetch all these posts and convert them to MF2 + let post_contexts = { + let http = &http; + tokio_stream::iter(context_urls.into_iter()) + .then(move |url: url::Url| http.get(url).send()) + .filter_map(|response| futures::future::ready(response.ok())) + .filter(|response| futures::future::ready(response.status() == 200)) + .filter_map(|response: reqwest::Response| async move { + // 1. We need to preserve the URL + // 2. We need to get the HTML for MF2 processing + // 3. We need to get the webmention endpoint address + // All of that can be done in one go. + let url = response.url().clone(); + // TODO parse link headers + let links = response + .headers() + .get_all(hyper::http::header::LINK) + .iter() + .cloned() + .collect::<Vec<hyper::http::HeaderValue>>(); + let html = response.text().await; + if html.is_err() { + return None; + } + let html = html.unwrap(); + let mf2 = microformats::from_html(&html, url.clone()).unwrap(); + // TODO use first Link: header if available + let webmention: Option<url::Url> = mf2 + .rels + .by_rels() + .get("webmention") + .and_then(|i| i.first().cloned()); + + dbg!(Some(FetchedPostContext { + url, + mf2: serde_json::to_value(mf2).unwrap(), + webmention + })) + }) + .collect::<Vec<FetchedPostContext>>() + .await + }; + + let mut update = MicropubUpdate { + replace: Some(Default::default()), + ..Default::default() + }; + for prop in context_props { + if let Some(json) = populate_reply_context(&mf2, prop, &post_contexts) { + update.replace.as_mut().unwrap().insert(prop.to_owned(), json); + } + } + if !update.replace.as_ref().unwrap().is_empty() { + if let Err(err) = db.update_post(uid, update).await { + error!("Failed to update post with rich reply contexts: {}", err); + } + } + + // At this point we can start syndicating the post. + // Currently we don't really support any syndication endpoints, but still! + /*if let Some(syndicate_to) = mf2["properties"]["mp-syndicate-to"].as_array() { + let http = &http; + tokio_stream::iter(syndicate_to) + .filter_map(|i| futures::future::ready(i.as_str())) + .for_each_concurrent(3, |s: &str| async move { + #[allow(clippy::match_single_binding)] + match s { + _ => { + todo!("Syndicate to generic webmention-aware service {}", s); + } + // TODO special handling for non-webmention-aware services like the birdsite + } + }) + .await; + }*/ + + { + let http = &http; + tokio_stream::iter( + post_contexts + .into_iter() + .filter(|ctx| ctx.webmention.is_some()), + ) + .for_each_concurrent(2, |ctx| async move { + let mut map = std::collections::HashMap::new(); + map.insert("source", uid); + map.insert("target", ctx.url.as_str()); + + match http + .post(ctx.webmention.unwrap().clone()) + .form(&map) + .send() + .await + { + Ok(res) => { + if !res.status().is_success() { + warn!( + "Failed to send a webmention for {}: got HTTP {}", + ctx.url, + res.status() + ); + } else { + info!( + "Sent a webmention to {}, got HTTP {}", + ctx.url, + res.status() + ) + } + } + Err(err) => warn!("Failed to send a webmention for {}: {}", ctx.url, err), + } + }) + .await; + } +} + +// TODO actually save the post to the database and schedule post-processing +pub(crate) async fn _post<D: 'static + Storage>( + user: &TokenData, + uid: String, + mf2: serde_json::Value, + db: D, + http: reqwest::Client, + jobset: Arc<Mutex<JoinSet<()>>>, +) -> Result<Response, MicropubError> { + // Here, we have the following guarantees: + // - The MF2-JSON document is normalized (guaranteed by normalize_mf2) + // - The MF2-JSON document contains a UID + // - The MF2-JSON document's URL list contains its UID + // - The MF2-JSON document's "content" field contains an HTML blob, if present + // - The MF2-JSON document's publishing datetime is present + // - The MF2-JSON document's target channels are set + // - The MF2-JSON document's author is set + + // Security check! Do we have an OAuth2 scope to proceed? + if !user.check_scope(&Scope::Create) { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "Not enough privileges - try acquiring the \"create\" scope." + .to_owned(), + }); + } + + // Security check #2! Are we posting to our own website? + if !uid.starts_with(user.me.as_str()) + || mf2["properties"]["channel"] + .as_array() + .unwrap_or(&vec![]) + .iter() + .any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) + { + return Err(MicropubError { + error: ErrorType::Forbidden, + error_description: "You're posting to a website that's not yours.".to_owned(), + }); + } + + // Security check #3! Are we overwriting an existing document? + if db.post_exists(&uid).await? { + return Err(MicropubError { + error: ErrorType::AlreadyExists, + error_description: "UID clash was detected, operation aborted.".to_owned(), + }); + } + let user_domain = format!( + "{}{}", + user.me.host_str().unwrap(), + user.me.port() + .map(|port| format!(":{}", port)) + .unwrap_or_default() + ); + // Save the post + tracing::debug!("Saving post to database..."); + db.put_post(&mf2, &user_domain).await?; + + let mut channels = mf2["properties"]["channel"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap_or("")) + .filter(|i| !i.is_empty()); + + let default_channel = user + .me + .join(util::DEFAULT_CHANNEL_PATH) + .unwrap() + .to_string(); + let vcards_channel = user + .me + .join(util::CONTACTS_CHANNEL_PATH) + .unwrap() + .to_string(); + let food_channel = user.me.join(util::FOOD_CHANNEL_PATH).unwrap().to_string(); + let default_channels = vec![default_channel, vcards_channel, food_channel]; + + for chan in &mut channels { + debug!("Adding post {} to channel {}", uid, chan); + if db.post_exists(chan).await? { + db.add_to_feed(chan, &uid).await?; + } else if default_channels.iter().any(|i| chan == i) { + util::create_feed(&db, &uid, chan, user).await?; + } else { + warn!("Ignoring non-existent channel: {}", chan); + } + } + + let reply = + IntoResponse::into_response((StatusCode::ACCEPTED, [("Location", uid.as_str())])); + + #[cfg(not(tokio_unstable))] + jobset.lock().await.spawn(background_processing(db, mf2, http)); + #[cfg(tokio_unstable)] + jobset.lock().await.build_task() + .name(format!("Kittybox background processing for post {}", uid.as_str()).as_str()) + .spawn(background_processing(db, mf2, http)); + + Ok(reply) +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "snake_case")] +enum ActionType { + Delete, + Update, +} + +#[derive(Serialize, Deserialize, Debug)] +#[serde(untagged)] +pub enum MicropubPropertyDeletion { + Properties(Vec<String>), + Values(HashMap<String, Vec<serde_json::Value>>) +} +#[derive(Serialize, Deserialize)] +struct MicropubFormAction { + action: ActionType, + url: String, +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct MicropubAction { + action: ActionType, + url: String, + #[serde(flatten)] + #[serde(skip_serializing_if = "Option::is_none")] + update: Option<MicropubUpdate> +} + +#[derive(Serialize, Deserialize, Debug, Default)] +pub struct MicropubUpdate { + #[serde(skip_serializing_if = "Option::is_none")] + pub replace: Option<HashMap<String, Vec<serde_json::Value>>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub add: Option<HashMap<String, Vec<serde_json::Value>>>, + #[serde(skip_serializing_if = "Option::is_none")] + pub delete: Option<MicropubPropertyDeletion>, + +} + +impl From<MicropubFormAction> for MicropubAction { + fn from(a: MicropubFormAction) -> Self { + debug_assert!(matches!(a.action, ActionType::Delete)); + Self { + action: a.action, + url: a.url, + update: None + } + } +} + +#[tracing::instrument(skip(db))] +async fn post_action<D: Storage, A: AuthBackend>( + action: MicropubAction, + db: D, + user: User<A>, +) -> Result<(), MicropubError> { + let uri = if let Ok(uri) = action.url.parse::<hyper::Uri>() { + uri + } else { + return Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Your URL doesn't parse properly.".to_owned(), + }); + }; + + if uri.authority().unwrap() + != user + .me + .as_str() + .parse::<hyper::Uri>() + .unwrap() + .authority() + .unwrap() + { + return Err(MicropubError { + error: ErrorType::Forbidden, + error_description: "Don't tamper with others' posts!".to_owned(), + }); + } + + match action.action { + ActionType::Delete => { + if !user.check_scope(&Scope::Delete) { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "You need a \"delete\" scope for this.".to_owned(), + }); + } + + db.delete_post(&action.url).await? + } + ActionType::Update => { + if !user.check_scope(&Scope::Update) { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "You need an \"update\" scope for this.".to_owned(), + }); + } + + db.update_post( + &action.url, + action.update.ok_or(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Update request is not set.".to_owned(), + })? + ) + .await? + } + } + + Ok(()) +} + +enum PostBody { + Action(MicropubAction), + MF2(serde_json::Value), +} + +#[tracing::instrument] +async fn dispatch_body( + mut body: BodyStream, + content_type: ContentType, +) -> Result<PostBody, MicropubError> { + let body: Vec<u8> = { + debug!("Buffering body..."); + use tokio_stream::StreamExt; + let mut buf = Vec::default(); + + while let Some(chunk) = body.next().await { + buf.extend_from_slice(&chunk.unwrap()) + } + + buf + }; + + debug!("Content-Type: {:?}", content_type); + if content_type == ContentType::json() { + if let Ok(action) = serde_json::from_slice::<MicropubAction>(&body) { + Ok(PostBody::Action(action)) + } else if let Ok(body) = serde_json::from_slice::<serde_json::Value>(&body) { + // quick sanity check + if !body.is_object() || !body["type"].is_array() { + return Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid MF2-JSON detected: `.` should be an object, `.type` should be an array of MF2 types".to_owned() + }); + } + + Ok(PostBody::MF2(body)) + } else { + Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid JSON object passed.".to_owned(), + }) + } + } else if content_type == ContentType::form_url_encoded() { + if let Ok(body) = serde_urlencoded::from_bytes::<MicropubFormAction>(&body) { + Ok(PostBody::Action(body.into())) + } else if let Ok(body) = serde_urlencoded::from_bytes::<Vec<(String, String)>>(&body) { + Ok(PostBody::MF2(form_to_mf2_json(body))) + } else { + Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid form-encoded data. Try h=entry&content=Hello!" + .to_owned(), + }) + } + } else { + Err(MicropubError::new( + ErrorType::UnsupportedMediaType, + "This Content-Type is not recognized. Try application/json instead?", + )) + } +} + +#[tracing::instrument(skip(db, http))] +pub(crate) async fn post<D: Storage + 'static, A: AuthBackend>( + Extension(db): Extension<D>, + Extension(http): Extension<reqwest::Client>, + Extension(jobset): Extension<Arc<Mutex<JoinSet<()>>>>, + TypedHeader(content_type): TypedHeader<ContentType>, + user: User<A>, + body: BodyStream, +) -> axum::response::Response { + match dispatch_body(body, content_type).await { + Ok(PostBody::Action(action)) => match post_action(action, db, user).await { + Ok(()) => Response::default(), + Err(err) => err.into_response(), + }, + Ok(PostBody::MF2(mf2)) => { + let (uid, mf2) = normalize_mf2(mf2, &user); + match _post(&user, uid, mf2, db, http, jobset).await { + Ok(response) => response, + Err(err) => err.into_response(), + } + } + Err(err) => err.into_response(), + } +} + +#[tracing::instrument(skip(db))] +pub(crate) async fn query<D: Storage, A: AuthBackend>( + Extension(db): Extension<D>, + query: Option<Query<MicropubQuery>>, + Host(host): Host, + user: User<A>, +) -> axum::response::Response { + // We handle the invalid query case manually to return a + // MicropubError instead of HTTP 422 + let query = if let Some(Query(query)) = query { + query + } else { + return MicropubError::new( + ErrorType::InvalidRequest, + "Invalid query provided. Try ?q=config to see what you can do." + ).into_response(); + }; + + if axum::http::Uri::try_from(user.me.as_str()) + .unwrap() + .authority() + .unwrap() + != &host + { + return MicropubError::new( + ErrorType::NotAuthorized, + "This website doesn't belong to you.", + ) + .into_response(); + } + + let user_domain = format!( + "{}{}", + user.me.host_str().unwrap(), + user.me.port() + .map(|port| format!(":{}", port)) + .unwrap_or_default() + ); + match query.q { + QueryType::Config => { + let channels: Vec<MicropubChannel> = match db.get_channels(user.me.as_str()).await { + Ok(chans) => chans, + Err(err) => { + return MicropubError::new( + ErrorType::InternalServerError, + &format!("Error fetching channels: {}", err), + ) + .into_response() + } + }; + + axum::response::Json(json!({ + "q": [ + QueryType::Source, + QueryType::Config, + QueryType::Channel, + QueryType::SyndicateTo + ], + "channels": channels, + "_kittybox_authority": user.me.as_str(), + "syndicate-to": [], + "media-endpoint": user.me.join("/.kittybox/media").unwrap().as_str() + })) + .into_response() + } + QueryType::Source => { + match query.url { + Some(url) => { + match db.get_post(&url).await { + Ok(some) => match some { + Some(post) => axum::response::Json(&post).into_response(), + None => MicropubError::new( + ErrorType::NotFound, + "The specified MF2 object was not found in database.", + ) + .into_response(), + }, + Err(err) => MicropubError::new( + ErrorType::InternalServerError, + &format!("Backend error: {}", err), + ) + .into_response(), + } + } + None => { + // Here, one should probably attempt to query at least the main feed and collect posts + // Using a pre-made query function can't be done because it does unneeded filtering + // Don't implement for now, this is optional + MicropubError::new( + ErrorType::InvalidRequest, + "Querying for post list is not implemented yet.", + ) + .into_response() + } + } + } + QueryType::Channel => match db.get_channels(&user_domain).await { + Ok(chans) => axum::response::Json(json!({ "channels": chans })).into_response(), + Err(err) => MicropubError::new( + ErrorType::InternalServerError, + &format!("Error fetching channels: {}", err), + ) + .into_response(), + }, + QueryType::SyndicateTo => { + axum::response::Json(json!({ "syndicate-to": [] })).into_response() + } + } +} + +#[must_use] +pub fn router<S, A>( + storage: S, + http: reqwest::Client, + auth: A, + jobset: Arc<Mutex<JoinSet<()>>> +) -> axum::routing::MethodRouter +where + S: Storage + 'static, + A: AuthBackend +{ + axum::routing::get(query::<S, A>) + .post(post::<S, A>) + .layer::<_, _, std::convert::Infallible>(tower_http::cors::CorsLayer::new() + .allow_methods([ + axum::http::Method::GET, + axum::http::Method::POST, + ]) + .allow_origin(tower_http::cors::Any)) + .layer::<_, _, std::convert::Infallible>(axum::Extension(storage)) + .layer::<_, _, std::convert::Infallible>(axum::Extension(http)) + .layer::<_, _, std::convert::Infallible>(axum::Extension(auth)) + .layer::<_, _, std::convert::Infallible>(axum::Extension(jobset)) +} + +#[cfg(test)] +#[allow(dead_code)] +impl MicropubQuery { + fn config() -> Self { + Self { + q: QueryType::Config, + url: None, + } + } + + fn source(url: &str) -> Self { + Self { + q: QueryType::Source, + url: Some(url.to_owned()), + } + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use crate::{database::Storage, micropub::MicropubError}; + use hyper::body::HttpBody; + use serde_json::json; + use tokio::sync::Mutex; + + use super::FetchedPostContext; + use kittybox_indieauth::{Scopes, Scope, TokenData}; + use axum::extract::Host; + + #[test] + fn test_populate_reply_context() { + let already_expanded_reply_ctx = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "like-of": [ + "https://fireburn.ru/posts/example", + already_expanded_reply_ctx, + "https://fireburn.ru/posts/non-existent" + ] + } + }); + let test_ctx = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is a post which was reacted to."] + } + }); + let reply_contexts = vec![FetchedPostContext { + url: "https://fireburn.ru/posts/example".parse().unwrap(), + mf2: json!({ "items": [test_ctx] }), + webmention: None, + }]; + + let like_of = super::populate_reply_context(&mf2, "like-of", &reply_contexts).unwrap(); + + assert_eq!(like_of[0], test_ctx); + assert_eq!(like_of[1], already_expanded_reply_ctx); + assert_eq!(like_of[2], "https://fireburn.ru/posts/non-existent"); + } + + #[tokio::test] + async fn test_post_reject_scope() { + let db = crate::database::MemoryStorage::new(); + + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let user = TokenData { + me: "https://localhost:8080/".parse().unwrap(), + client_id: "https://kittybox.fireburn.ru/".parse().unwrap(), + scope: Scopes::new(vec![Scope::Profile]), + iat: None, exp: None + }; + let (uid, mf2) = super::normalize_mf2(post, &user); + + let err = super::_post(&user, uid, mf2, db.clone(), reqwest::Client::new(), Arc::new(Mutex::new(tokio::task::JoinSet::new()))) + .await + .unwrap_err(); + + assert_eq!(err.error, super::ErrorType::InvalidScope); + + let hashmap = db.mapping.read().await; + assert!(hashmap.is_empty()); + } + + #[tokio::test] + async fn test_post_reject_different_user() { + let db = crate::database::MemoryStorage::new(); + + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"], + "uid": ["https://fireburn.ru/posts/hello"], + "url": ["https://fireburn.ru/posts/hello"] + } + }); + let user = TokenData { + me: "https://aaronparecki.com/".parse().unwrap(), + client_id: "https://kittybox.fireburn.ru/".parse().unwrap(), + scope: Scopes::new(vec![Scope::Profile, Scope::Create, Scope::Update, Scope::Media]), + iat: None, exp: None + }; + let (uid, mf2) = super::normalize_mf2(post, &user); + + let err = super::_post(&user, uid, mf2, db.clone(), reqwest::Client::new(), Arc::new(Mutex::new(tokio::task::JoinSet::new()))) + .await + .unwrap_err(); + + assert_eq!(err.error, super::ErrorType::Forbidden); + + let hashmap = db.mapping.read().await; + assert!(hashmap.is_empty()); + } + + #[tokio::test] + async fn test_post_mf2() { + let db = crate::database::MemoryStorage::new(); + + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let user = TokenData { + me: "https://localhost:8080/".parse().unwrap(), + client_id: "https://kittybox.fireburn.ru/".parse().unwrap(), + scope: Scopes::new(vec![Scope::Profile, Scope::Create]), + iat: None, exp: None + }; + let (uid, mf2) = super::normalize_mf2(post, &user); + + let res = super::_post(&user, uid, mf2, db.clone(), reqwest::Client::new(), Arc::new(Mutex::new(tokio::task::JoinSet::new()))) + .await + .unwrap(); + + assert!(res.headers().contains_key("Location")); + let location = res.headers().get("Location").unwrap(); + assert!(db.post_exists(location.to_str().unwrap()).await.unwrap()); + assert!(db + .post_exists("https://localhost:8080/feeds/main") + .await + .unwrap()); + } + + #[tokio::test] + async fn test_query_foreign_url() { + let mut res = super::query( + axum::Extension(crate::database::MemoryStorage::new()), + Some(axum::extract::Query(super::MicropubQuery::source( + "https://aaronparecki.com/feeds/main", + ))), + Host("aaronparecki.com".to_owned()), + crate::indieauth::User::<crate::indieauth::backend::fs::FileBackend>( + TokenData { + me: "https://fireburn.ru/".parse().unwrap(), + client_id: "https://kittybox.fireburn.ru/".parse().unwrap(), + scope: Scopes::new(vec![Scope::Profile, Scope::Create, Scope::Update, Scope::Media]), + iat: None, exp: None + }, std::marker::PhantomData + ) + ) + .await; + + assert_eq!(res.status(), 401); + let body = res.body_mut().data().await.unwrap().unwrap(); + let json: MicropubError = serde_json::from_slice(&body as &[u8]).unwrap(); + assert_eq!(json.error, super::ErrorType::NotAuthorized); + } +} diff --git a/src/micropub/util.rs b/src/micropub/util.rs new file mode 100644 index 0000000..940d7c3 --- /dev/null +++ b/src/micropub/util.rs @@ -0,0 +1,444 @@ +use crate::database::Storage; +use kittybox_indieauth::TokenData; +use chrono::prelude::*; +use core::iter::Iterator; +use newbase60::num_to_sxg; +use serde_json::json; +use std::convert::TryInto; + +pub(crate) const DEFAULT_CHANNEL_PATH: &str = "/feeds/main"; +const DEFAULT_CHANNEL_NAME: &str = "Main feed"; +pub(crate) const CONTACTS_CHANNEL_PATH: &str = "/feeds/vcards"; +const CONTACTS_CHANNEL_NAME: &str = "My address book"; +pub(crate) const FOOD_CHANNEL_PATH: &str = "/feeds/food"; +const FOOD_CHANNEL_NAME: &str = "My recipe book"; + +fn get_folder_from_type(post_type: &str) -> String { + (match post_type { + "h-feed" => "feeds/", + "h-card" => "vcards/", + "h-event" => "events/", + "h-food" => "food/", + _ => "posts/", + }) + .to_string() +} + +/// Reset the datetime to a proper datetime. +/// Do not attempt to recover the information. +/// Do not pass GO. Do not collect $200. +fn reset_dt(post: &mut serde_json::Value) -> DateTime<FixedOffset> { + let curtime: DateTime<Local> = Local::now(); + post["properties"]["published"] = json!([curtime.to_rfc3339()]); + chrono::DateTime::from(curtime) +} + +pub fn normalize_mf2(mut body: serde_json::Value, user: &TokenData) -> (String, serde_json::Value) { + // Normalize the MF2 object here. + let me = &user.me; + let folder = get_folder_from_type(body["type"][0].as_str().unwrap()); + let published: DateTime<FixedOffset> = + if let Some(dt) = body["properties"]["published"][0].as_str() { + // Check if the datetime is parsable. + match DateTime::parse_from_rfc3339(dt) { + Ok(dt) => dt, + Err(_) => reset_dt(&mut body), + } + } else { + // Set the datetime. + // Note: this code block duplicates functionality with the above failsafe. + // Consider refactoring it to a helper function? + reset_dt(&mut body) + }; + match body["properties"]["uid"][0].as_str() { + None => { + let uid = serde_json::Value::String( + me.join( + &(folder.clone() + + &num_to_sxg(published.timestamp_millis().try_into().unwrap())), + ) + .unwrap() + .to_string(), + ); + body["properties"]["uid"] = serde_json::Value::Array(vec![uid.clone()]); + match body["properties"]["url"].as_array_mut() { + Some(array) => array.push(uid), + None => body["properties"]["url"] = body["properties"]["uid"].clone(), + } + } + Some(uid_str) => { + let uid = uid_str.to_string(); + match body["properties"]["url"].as_array_mut() { + Some(array) => { + if !array.iter().any(|i| i.as_str().unwrap_or("") == uid) { + array.push(serde_json::Value::String(uid)) + } + } + None => body["properties"]["url"] = body["properties"]["uid"].clone(), + } + } + } + if let Some(slugs) = body["properties"]["mp-slug"].as_array() { + let new_urls = slugs + .iter() + .map(|i| i.as_str().unwrap_or("")) + .filter(|i| i != &"") + .map(|i| me.join(&((&folder).clone() + i)).unwrap().to_string()) + .collect::<Vec<String>>(); + let urls = body["properties"]["url"].as_array_mut().unwrap(); + new_urls.iter().for_each(|i| urls.push(json!(i))); + } + let props = body["properties"].as_object_mut().unwrap(); + props.remove("mp-slug"); + + if body["properties"]["content"][0].is_string() { + // Convert the content to HTML using the `markdown` crate + body["properties"]["content"] = json!([{ + "html": markdown::to_html(body["properties"]["content"][0].as_str().unwrap()), + "value": body["properties"]["content"][0] + }]) + } + // TODO: apply this normalization to editing too + if body["properties"]["mp-channel"].is_array() { + let mut additional_channels = body["properties"]["mp-channel"].as_array().unwrap().clone(); + if let Some(array) = body["properties"]["channel"].as_array_mut() { + array.append(&mut additional_channels); + } else { + body["properties"]["channel"] = json!(additional_channels) + } + body["properties"] + .as_object_mut() + .unwrap() + .remove("mp-channel"); + } else if body["properties"]["mp-channel"].is_string() { + let chan = body["properties"]["mp-channel"] + .as_str() + .unwrap() + .to_owned(); + if let Some(array) = body["properties"]["channel"].as_array_mut() { + array.push(json!(chan)) + } else { + body["properties"]["channel"] = json!([chan]); + } + body["properties"] + .as_object_mut() + .unwrap() + .remove("mp-channel"); + } + if body["properties"]["channel"][0].as_str().is_none() { + match body["type"][0].as_str() { + Some("h-entry") => { + // Set the channel to the main channel... + // TODO find like posts and move them to separate private channel + let default_channel = me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + Some("h-card") => { + let default_channel = me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + Some("h-food") => { + let default_channel = me.join(FOOD_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + // TODO h-event + /*"h-event" => { + let default_channel + },*/ + _ => { + body["properties"]["channel"] = json!([]); + } + } + } + body["properties"]["posted-with"] = json!([user.client_id]); + if body["properties"]["author"][0].as_str().is_none() { + body["properties"]["author"] = json!([me.as_str()]) + } + // TODO: maybe highlight #hashtags? + // Find other processing to do and insert it here + return ( + body["properties"]["uid"][0].as_str().unwrap().to_string(), + body, + ); +} + +pub(crate) fn form_to_mf2_json(form: Vec<(String, String)>) -> serde_json::Value { + let mut mf2 = json!({"type": [], "properties": {}}); + for (k, v) in form { + if k == "h" { + mf2["type"] + .as_array_mut() + .unwrap() + .push(json!("h-".to_string() + &v)); + } else if k != "access_token" { + let key = k.strip_suffix("[]").unwrap_or(&k); + match mf2["properties"][key].as_array_mut() { + Some(prop) => prop.push(json!(v)), + None => mf2["properties"][key] = json!([v]), + } + } + } + if mf2["type"].as_array().unwrap().is_empty() { + mf2["type"].as_array_mut().unwrap().push(json!("h-entry")); + } + mf2 +} + +pub(crate) async fn create_feed( + storage: &impl Storage, + uid: &str, + channel: &str, + user: &TokenData, +) -> crate::database::Result<()> { + let path = url::Url::parse(channel).unwrap().path().to_string(); + + let name = match path.as_str() { + DEFAULT_CHANNEL_PATH => DEFAULT_CHANNEL_NAME, + CONTACTS_CHANNEL_PATH => CONTACTS_CHANNEL_NAME, + FOOD_CHANNEL_PATH => FOOD_CHANNEL_NAME, + _ => panic!("Tried to create an unknown default feed!"), + }; + + let (_, feed) = normalize_mf2( + json!({ + "type": ["h-feed"], + "properties": { + "name": [name], + "uid": [channel] + }, + }), + user, + ); + storage.put_post(&feed, user.me.as_str()).await?; + storage.add_to_feed(channel, uid).await +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + fn token_data() -> TokenData { + TokenData { + me: "https://fireburn.ru/".parse().unwrap(), + client_id: "https://quill.p3k.io/".parse().unwrap(), + scope: kittybox_indieauth::Scopes::new(vec![kittybox_indieauth::Scope::Create]), + exp: Some(u64::MAX), + iat: Some(0) + } + } + + #[test] + fn test_form_to_mf2() { + assert_eq!( + super::form_to_mf2_json( + serde_urlencoded::from_str("h=entry&content=something%20interesting").unwrap() + ), + json!({ + "type": ["h-entry"], + "properties": { + "content": ["something interesting"] + } + }) + ) + } + + #[test] + fn test_no_replace_uid() { + let mf2 = json!({ + "type": ["h-card"], + "properties": { + "uid": ["https://fireburn.ru/"], + "name": ["Vika Nezrimaya"], + "note": ["A crazy programmer girl who wants some hugs"] + } + }); + + let (uid, normalized) = normalize_mf2( + mf2.clone(), + &token_data(), + ); + assert_eq!( + normalized["properties"]["uid"][0], mf2["properties"]["uid"][0], + "UID was replaced" + ); + assert_eq!( + normalized["properties"]["uid"][0], uid, + "Returned post location doesn't match UID" + ); + } + + #[test] + fn test_mp_channel() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "uid": ["https://fireburn.ru/posts/test"], + "content": [{"html": "<p>Hello world!</p>"}], + "mp-channel": ["https://fireburn.ru/feeds/test"] + } + }); + + let (_, normalized) = normalize_mf2( + mf2.clone(), + &token_data(), + ); + + assert_eq!( + normalized["properties"]["channel"], + mf2["properties"]["mp-channel"] + ); + } + + #[test] + fn test_mp_channel_as_string() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "uid": ["https://fireburn.ru/posts/test"], + "content": [{"html": "<p>Hello world!</p>"}], + "mp-channel": "https://fireburn.ru/feeds/test" + } + }); + + let (_, normalized) = normalize_mf2( + mf2.clone(), + &token_data(), + ); + + assert_eq!( + normalized["properties"]["channel"][0], + mf2["properties"]["mp-channel"] + ); + } + + #[test] + fn test_normalize_mf2() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is content!"] + } + }); + + let (uid, post) = normalize_mf2( + mf2, + &token_data(), + ); + assert_eq!( + post["properties"]["published"] + .as_array() + .expect("post['published'] is undefined") + .len(), + 1, + "Post doesn't have a published time" + ); + DateTime::parse_from_rfc3339(post["properties"]["published"][0].as_str().unwrap()) + .expect("Couldn't parse date from rfc3339"); + assert!( + !post["properties"]["url"] + .as_array() + .expect("post['url'] is undefined") + .is_empty(), + "Post doesn't have any URLs" + ); + assert_eq!( + post["properties"]["uid"] + .as_array() + .expect("post['uid'] is undefined") + .len(), + 1, + "Post doesn't have a single UID" + ); + assert_eq!( + post["properties"]["uid"][0], uid, + "UID of a post and its supposed location don't match" + ); + assert!( + uid.starts_with("https://fireburn.ru/posts/"), + "The post namespace is incorrect" + ); + assert_eq!( + post["properties"]["content"][0]["html"] + .as_str() + .expect("Post doesn't have a rich content object") + .trim(), + "<p>This is content!</p>", + "Parsed Markdown content doesn't match expected HTML" + ); + assert_eq!( + post["properties"]["channel"][0], "https://fireburn.ru/feeds/main", + "Post isn't posted to the main channel" + ); + assert_eq!( + post["properties"]["author"][0], "https://fireburn.ru/", + "Post author is unknown" + ); + } + + #[test] + fn test_mp_slug() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is content!"], + "mp-slug": ["hello-post"] + }, + }); + + let (_, post) = normalize_mf2( + mf2, + &token_data(), + ); + assert!( + post["properties"]["url"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap()) + .any(|i| i == "https://fireburn.ru/posts/hello-post"), + "Didn't found an URL pointing to the location expected by the mp-slug semantics" + ); + assert!( + post["properties"]["mp-slug"].as_array().is_none(), + "mp-slug wasn't deleted from the array!" + ) + } + + #[test] + fn test_normalize_feed() { + let mf2 = json!({ + "type": ["h-feed"], + "properties": { + "name": "Main feed", + "mp-slug": ["main"] + } + }); + + let (uid, post) = normalize_mf2( + mf2, + &token_data(), + ); + assert_eq!( + post["properties"]["uid"][0], uid, + "UID of a post and its supposed location don't match" + ); + assert_eq!(post["properties"]["author"][0], "https://fireburn.ru/"); + assert!( + post["properties"]["url"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap()) + .any(|i| i == "https://fireburn.ru/feeds/main"), + "Didn't found an URL pointing to the location expected by the mp-slug semantics" + ); + assert!( + post["properties"]["mp-slug"].as_array().is_none(), + "mp-slug wasn't deleted from the array!" + ) + } +} |