diff options
Diffstat (limited to 'kittybox-rs/src')
22 files changed, 6078 insertions, 0 deletions
diff --git a/kittybox-rs/src/bin/kittybox_bulk_import.rs b/kittybox-rs/src/bin/kittybox_bulk_import.rs new file mode 100644 index 0000000..7e1f6af --- /dev/null +++ b/kittybox-rs/src/bin/kittybox_bulk_import.rs @@ -0,0 +1,66 @@ +use anyhow::{anyhow, bail, Context, Result}; +use std::fs::File; +use std::io; + +#[async_std::main] +async fn main() -> Result<()> { + let args = std::env::args().collect::<Vec<String>>(); + if args.iter().skip(1).any(|s| s == "--help") { + println!("Usage: {} <url> [file]", args[0]); + println!("\nIf launched with no arguments, reads from stdin."); + println!( + "\nUse KITTYBOX_AUTH_TOKEN environment variable to authorize to the Micropub endpoint." + ); + std::process::exit(0); + } + + let token = std::env::var("KITTYBOX_AUTH_TOKEN") + .map_err(|_| anyhow!("No auth token found! Use KITTYBOX_AUTH_TOKEN env variable."))?; + let data: Vec<serde_json::Value> = (if args.len() == 2 || (args.len() == 3 && args[2] == "-") { + serde_json::from_reader(io::stdin()) + } else if args.len() == 3 { + serde_json::from_reader(File::open(&args[2]).with_context(|| "Error opening input file")?) + } else { + bail!("See `{} --help` for usage.", args[0]); + }) + .with_context(|| "Error while loading the input file")?; + + let url = surf::Url::parse(&args[1])?; + let client = surf::Client::new(); + + let iter = data.into_iter(); + + for post in iter { + println!( + "Processing {}...", + post["properties"]["url"][0] + .as_str() + .or_else(|| post["properties"]["published"][0] + .as_str() + .or_else(|| post["properties"]["name"][0] + .as_str() + .or(Some("<unidentified post>")))) + .unwrap() + ); + match client + .post(&url) + .body(surf::http::Body::from_string(serde_json::to_string(&post)?)) + .header("Content-Type", "application/json") + .header("Authorization", format!("Bearer {}", &token)) + .send() + .await + { + Ok(mut response) => { + if response.status() == 201 || response.status() == 202 { + println!("Posted at {}", response.header("location").unwrap().last()); + } else { + println!("Error: {:?}", response.body_string().await); + } + } + Err(err) => { + println!("{}", err); + } + } + } + Ok(()) +} diff --git a/kittybox-rs/src/bin/kittybox_database_converter.rs b/kittybox-rs/src/bin/kittybox_database_converter.rs new file mode 100644 index 0000000..bc355c9 --- /dev/null +++ b/kittybox-rs/src/bin/kittybox_database_converter.rs @@ -0,0 +1,106 @@ +use anyhow::{anyhow, Context}; +use kittybox::database::FileStorage; +use kittybox::database::Storage; +use redis::{self, AsyncCommands}; +use std::collections::HashMap; + +/// Convert from a Redis storage to a new storage new_storage. +async fn convert_from_redis<S: Storage>(from: String, new_storage: S) -> anyhow::Result<()> { + let db = redis::Client::open(from).context("Failed to open the Redis connection")?; + + let mut conn = db + .get_async_std_connection() + .await + .context("Failed to connect to Redis")?; + + // Rebinding to convince the borrow checker we're not smuggling stuff outta scope + let storage = &new_storage; + + let mut stream = conn.hscan::<_, String>("posts").await?; + + while let Some(key) = stream.next_item().await { + let value = serde_json::from_str::<serde_json::Value>( + &stream + .next_item() + .await + .ok_or(anyhow!("Failed to find a corresponding value for the key"))?, + )?; + + println!("{}, {:?}", key, value); + + if value["see_other"].is_string() { + continue; + } + + let user = &(url::Url::parse(value["properties"]["uid"][0].as_str().unwrap()) + .unwrap() + .origin() + .ascii_serialization() + .clone() + + "/"); + if let Err(err) = storage.clone().put_post(&value, user).await { + eprintln!("Error saving post: {}", err); + } + } + + let mut stream: redis::AsyncIter<String> = conn.scan_match("settings_*").await?; + while let Some(key) = stream.next_item().await { + let mut conn = db + .get_async_std_connection() + .await + .context("Failed to connect to Redis")?; + let user = key.strip_prefix("settings_").unwrap(); + match conn + .hgetall::<&str, HashMap<String, String>>(&key) + .await + .context(format!("Failed getting settings from key {}", key)) + { + Ok(settings) => { + for (k, v) in settings.iter() { + if let Err(e) = storage + .set_setting(k, user, v) + .await + .with_context(|| format!("Failed setting {} for {}", k, user)) + { + eprintln!("{}", e); + } + } + } + Err(e) => { + eprintln!("{}", e); + } + } + } + + Ok(()) +} + +#[async_std::main] +async fn main() -> anyhow::Result<()> { + let mut args = std::env::args(); + args.next(); // skip argv[0] + let old_uri = args + .next() + .ok_or_else(|| anyhow!("No import source is provided."))?; + let new_uri = args + .next() + .ok_or_else(|| anyhow!("No import destination is provided."))?; + + let storage = if new_uri.starts_with("file:") { + let folder = new_uri.strip_prefix("file://").unwrap(); + let path = std::path::PathBuf::from(folder); + Box::new( + FileStorage::new(path) + .await + .context("Failed to construct the file storage")?, + ) + } else { + anyhow::bail!("Cannot construct the storage abstraction for destination storage. Check the storage type?"); + }; + + if old_uri.starts_with("redis") { + convert_from_redis(old_uri, *storage).await? + } + + Ok(()) +} diff --git a/kittybox-rs/src/bin/pyindieblog_to_kittybox.rs b/kittybox-rs/src/bin/pyindieblog_to_kittybox.rs new file mode 100644 index 0000000..38590c3 --- /dev/null +++ b/kittybox-rs/src/bin/pyindieblog_to_kittybox.rs @@ -0,0 +1,68 @@ +use anyhow::{anyhow, Context, Result}; + +use redis::AsyncCommands; +use serde::{Deserialize, Serialize}; +use std::collections::HashMap; +use std::fs::File; + +#[derive(Default, Serialize, Deserialize)] +struct PyindieblogData { + posts: Vec<serde_json::Value>, + cards: Vec<serde_json::Value>, +} + +#[async_std::main] +async fn main() -> Result<()> { + let mut args = std::env::args(); + args.next(); // skip argv[0] which is the name + let redis_uri = args + .next() + .ok_or_else(|| anyhow!("No Redis URI provided"))?; + let client = redis::Client::open(redis_uri.as_str()) + .with_context(|| format!("Failed to construct Redis client on {}", redis_uri))?; + + let filename = args + .next() + .ok_or_else(|| anyhow!("No filename provided for export"))?; + + let mut data: Vec<serde_json::Value>; + + let file = File::create(filename)?; + + let mut conn = client + .get_async_std_connection() + .await + .with_context(|| "Failed to connect to the Redis server")?; + + data = conn + .hgetall::<&str, HashMap<String, String>>("posts") + .await? + .values() + .map(|s| { + serde_json::from_str::<serde_json::Value>(s) + .with_context(|| format!("Failed to parse the following entry: {:?}", s)) + }) + .collect::<std::result::Result<Vec<serde_json::Value>, anyhow::Error>>() + .with_context(|| "Failed to export h-entries from pyindieblog")?; + data.extend( + conn.hgetall::<&str, HashMap<String, String>>("hcards") + .await? + .values() + .map(|s| { + serde_json::from_str::<serde_json::Value>(s) + .with_context(|| format!("Failed to parse the following card: {:?}", s)) + }) + .collect::<std::result::Result<Vec<serde_json::Value>, anyhow::Error>>() + .with_context(|| "Failed to export h-cards from pyindieblog")?, + ); + + data.sort_by_key(|v| { + v["properties"]["published"][0] + .as_str() + .map(|s| s.to_string()) + }); + + serde_json::to_writer(file, &data)?; + + Ok(()) +} diff --git a/kittybox-rs/src/database/file/mod.rs b/kittybox-rs/src/database/file/mod.rs new file mode 100644 index 0000000..1e7aa96 --- /dev/null +++ b/kittybox-rs/src/database/file/mod.rs @@ -0,0 +1,619 @@ +//#![warn(clippy::unwrap_used)] +use crate::database::{filter_post, ErrorKind, Result, Storage, StorageError, Settings}; +use std::io::ErrorKind as IOErrorKind; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::task::spawn_blocking; +use async_trait::async_trait; +use futures::{stream, StreamExt, TryStreamExt}; +use log::debug; +use serde_json::json; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +impl From<std::io::Error> for StorageError { + fn from(source: std::io::Error) -> Self { + Self::with_source( + match source.kind() { + IOErrorKind::NotFound => ErrorKind::NotFound, + IOErrorKind::AlreadyExists => ErrorKind::Conflict, + _ => ErrorKind::Backend, + }, + "file I/O error", + Box::new(source), + ) + } +} + +impl From<tokio::time::error::Elapsed> for StorageError { + fn from(source: tokio::time::error::Elapsed) -> Self { + Self::with_source( + ErrorKind::Backend, + "timeout on I/O operation", + Box::new(source) + ) + } +} + +// Copied from https://stackoverflow.com/questions/39340924 +// This routine is adapted from the *old* Path's `path_relative_from` +// function, which works differently from the new `relative_from` function. +// In particular, this handles the case on unix where both paths are +// absolute but with only the root as the common directory. +fn path_relative_from(path: &Path, base: &Path) -> Option<PathBuf> { + use std::path::Component; + + if path.is_absolute() != base.is_absolute() { + if path.is_absolute() { + Some(PathBuf::from(path)) + } else { + None + } + } else { + let mut ita = path.components(); + let mut itb = base.components(); + let mut comps: Vec<Component> = vec![]; + loop { + match (ita.next(), itb.next()) { + (None, None) => break, + (Some(a), None) => { + comps.push(a); + comps.extend(ita.by_ref()); + break; + } + (None, _) => comps.push(Component::ParentDir), + (Some(a), Some(b)) if comps.is_empty() && a == b => (), + (Some(a), Some(b)) if b == Component::CurDir => comps.push(a), + (Some(_), Some(b)) if b == Component::ParentDir => return None, + (Some(a), Some(_)) => { + comps.push(Component::ParentDir); + for _ in itb { + comps.push(Component::ParentDir); + } + comps.push(a); + comps.extend(ita.by_ref()); + break; + } + } + } + Some(comps.iter().map(|c| c.as_os_str()).collect()) + } +} + +#[allow(clippy::unwrap_used, clippy::expect_used)] +#[cfg(test)] +mod tests { + #[test] + fn test_relative_path_resolving() { + let path1 = std::path::Path::new("/home/vika/Projects/kittybox"); + let path2 = std::path::Path::new("/home/vika/Projects/nixpkgs"); + let relative_path = super::path_relative_from(path2, path1).unwrap(); + + assert_eq!(relative_path, std::path::Path::new("../nixpkgs")) + } +} + +// TODO: Check that the path ACTUALLY IS INSIDE THE ROOT FOLDER +// This could be checked by completely resolving the path +// and checking if it has a common prefix +fn url_to_path(root: &Path, url: &str) -> PathBuf { + let path = url_to_relative_path(url).to_logical_path(root); + if !path.starts_with(root) { + // TODO: handle more gracefully + panic!("Security error: {:?} is not a prefix of {:?}", path, root) + } else { + path + } +} + +fn url_to_relative_path(url: &str) -> relative_path::RelativePathBuf { + let url = warp::http::Uri::try_from(url).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string() + url.path() + ".json"); + + path +} + +fn modify_post(post: &serde_json::Value, update: &serde_json::Value) -> Result<serde_json::Value> { + let mut add_keys: HashMap<String, Vec<serde_json::Value>> = HashMap::new(); + let mut remove_keys: Vec<String> = vec![]; + let mut remove_values: HashMap<String, Vec<serde_json::Value>> = HashMap::new(); + let mut post = post.clone(); + + if let Some(delete) = update["delete"].as_array() { + remove_keys.extend( + delete + .iter() + .filter_map(|v| v.as_str()) + .map(|v| v.to_string()), + ); + } else if let Some(delete) = update["delete"].as_object() { + for (k, v) in delete { + if let Some(v) = v.as_array() { + remove_values + .entry(k.to_string()) + .or_default() + .extend(v.clone()); + } else { + return Err(StorageError::new( + ErrorKind::BadRequest, + "Malformed update object", + )); + } + } + } + if let Some(add) = update["add"].as_object() { + for (k, v) in add { + if let Some(v) = v.as_array() { + add_keys.insert(k.to_string(), v.clone()); + } else { + return Err(StorageError::new( + ErrorKind::BadRequest, + "Malformed update object", + )); + } + } + } + if let Some(replace) = update["replace"].as_object() { + for (k, v) in replace { + remove_keys.push(k.to_string()); + if let Some(v) = v.as_array() { + add_keys.insert(k.to_string(), v.clone()); + } else { + return Err(StorageError::new(ErrorKind::BadRequest, "Malformed update object")); + } + } + } + + if let Some(props) = post["properties"].as_object_mut() { + for k in remove_keys { + props.remove(&k); + } + } + for (k, v) in remove_values { + let k = &k; + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + v.iter().for_each(|v| { + if let Some(vec) = props[k].as_array_mut() { + if let Some(index) = vec.iter().position(|w| w == v) { + vec.remove(index); + } + } + }); + } + for (k, v) in add_keys { + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + let k = &k; + if let Some(prop) = props[k].as_array_mut() { + if k == "children" { + v.into_iter() + .rev() + .for_each(|v| prop.insert(0, v)); + } else { + prop.extend(v.into_iter()); + } + } else { + post["properties"][k] = serde_json::Value::Array(v) + } + } + Ok(post) +} + +#[derive(Clone, Debug)] +/// A backend using a folder with JSON files as a backing store. +/// Uses symbolic links to represent a many-to-one mapping of URLs to a post. +pub struct FileStorage { + root_dir: PathBuf, +} + +impl FileStorage { + /// Create a new storage wrapping a folder specified by root_dir. + pub async fn new(root_dir: PathBuf) -> Result<Self> { + // TODO check if the dir is writable + Ok(Self { root_dir }) + } +} + +async fn hydrate_author<S: Storage>( + feed: &mut serde_json::Value, + user: &'_ Option<String>, + storage: &S, +) { + let url = feed["properties"]["uid"][0] + .as_str() + .expect("MF2 value should have a UID set! Check if you used normalize_mf2 before recording the post!"); + if let Some(author) = feed["properties"]["author"].as_array().cloned() { + if !feed["type"] + .as_array() + .expect("MF2 value should have a type set!") + .iter() + .any(|i| i == "h-card") + { + let author_list: Vec<serde_json::Value> = stream::iter(author.iter()) + .then(|i| async move { + if let Some(i) = i.as_str() { + match storage.get_post(i).await { + Ok(post) => match post { + Some(post) => match filter_post(post, user) { + Some(author) => author, + None => json!(i), + }, + None => json!(i), + }, + Err(e) => { + log::error!("Error while hydrating post {}: {}", url, e); + json!(i) + } + } + } else { + i.clone() + } + }) + .collect::<Vec<_>>() + .await; + if let Some(props) = feed["properties"].as_object_mut() { + props["author"] = json!(author_list); + } else { + feed["properties"] = json!({"author": author_list}); + } + } + } +} + +#[async_trait] +impl Storage for FileStorage { + async fn post_exists(&self, url: &str) -> Result<bool> { + let path = url_to_path(&self.root_dir, url); + debug!("Checking if {:?} exists...", path); + /*let result = match tokio::fs::metadata(path).await { + Ok(metadata) => { + Ok(true) + }, + Err(err) => { + if err.kind() == IOErrorKind::NotFound { + Ok(false) + } else { + Err(err.into()) + } + } + };*/ + #[allow(clippy::unwrap_used)] // JoinHandle captures panics, this closure shouldn't panic + Ok(spawn_blocking(move || path.is_file()).await.unwrap()) + } + + async fn get_post(&self, url: &str) -> Result<Option<serde_json::Value>> { + let path = url_to_path(&self.root_dir, url); + // TODO: check that the path actually belongs to the dir of user who requested it + // it's not like you CAN access someone else's private posts with it + // so it's not exactly a security issue, but it's still not good + debug!("Opening {:?}", path); + + match File::open(&path).await { + Ok(mut file) => { + let mut content = String::new(); + // Typechecks because OS magic acts on references + // to FDs as if they were behind a mutex + AsyncReadExt::read_to_string(&mut file, &mut content).await?; + debug!("Read {} bytes successfully from {:?}", content.as_bytes().len(), &path); + Ok(Some(serde_json::from_str(&content)?)) + }, + Err(err) => { + if err.kind() == IOErrorKind::NotFound { + Ok(None) + } else { + Err(err.into()) + } + } + } + } + + async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()> { + let key = post["properties"]["uid"][0] + .as_str() + .expect("Tried to save a post without UID"); + let path = url_to_path(&self.root_dir, key); + let tempfile = (&path).with_extension("tmp"); + debug!("Creating {:?}", path); + + let parent = path.parent().expect("Parent for this directory should always exist").to_owned(); + if !parent.is_dir() { + tokio::fs::create_dir_all(parent).await?; + } + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfile).await?; + + file.write_all(post.to_string().as_bytes()).await?; + file.flush().await?; + drop(file); + tokio::fs::rename(&tempfile, &path).await?; + + if let Some(urls) = post["properties"]["url"].as_array() { + for url in urls + .iter() + .map(|i| i.as_str().unwrap()) + { + if url != key && url.starts_with(user) { + let link = url_to_path(&self.root_dir, url); + debug!("Creating a symlink at {:?}", link); + let orig = path.clone(); + // We're supposed to have a parent here. + let basedir = link.parent().ok_or_else(|| { + StorageError::new( + ErrorKind::Backend, + "Failed to calculate parent directory when creating a symlink", + ) + })?; + let relative = path_relative_from(&orig, basedir).unwrap(); + println!("{:?} - {:?} = {:?}", &orig, &basedir, &relative); + tokio::fs::symlink(relative, link).await?; + } + } + } + + if post["type"] + .as_array() + .unwrap() + .iter() + .any(|s| s.as_str() == Some("h-feed")) + { + println!("Adding to channel list..."); + // Add the h-feed to the channel list + let mut path = relative_path::RelativePathBuf::new(); + path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); + path.push("channels"); + + let path = path.to_path(&self.root_dir); + let tempfilename = (&path).with_extension("tmp"); + let channel_name = post["properties"]["name"][0] + .as_str() + .map(|s| s.to_string()) + .unwrap_or_else(String::default); + let key = key.to_string(); + + let mut tempfile = OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfilename).await?; + let mut file = OpenOptions::new() + .read(true) + .write(true) + .truncate(false) + .create(true) + .open(&path).await?; + + let mut content = String::new(); + file.read_to_string(&mut content).await?; + drop(file); + let mut channels: Vec<super::MicropubChannel> = if !content.is_empty() { + serde_json::from_str(&content)? + } else { + Vec::default() + }; + + channels.push(super::MicropubChannel { + uid: key.to_string(), + name: channel_name, + }); + + tempfile.write_all(serde_json::to_string(&channels)?.as_bytes()).await?; + tempfile.flush().await?; + drop(tempfile); + tokio::fs::rename(tempfilename, path).await?; + } + Ok(()) + } + + async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()> { + let path = url_to_path(&self.root_dir, url); + let tempfilename = path.with_extension("tmp"); + #[allow(unused_variables)] + let (old_json, new_json) = { + let mut temp = OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfilename) + .await?; + let mut file = OpenOptions::new() + .read(true) + .open(&path) + .await?; + + let mut content = String::new(); + file.read_to_string(&mut content).await?; + let json: serde_json::Value = serde_json::from_str(&content)?; + drop(file); + // Apply the editing algorithms + let new_json = modify_post(&json, &update)?; + + temp.write_all(new_json.to_string().as_bytes()).await?; + temp.flush().await?; + drop(temp); + tokio::fs::rename(tempfilename, path).await?; + + (json, new_json) + }; + // TODO check if URLs changed between old and new JSON + Ok(()) + } + + async fn get_channels(&self, user: &'_ str) -> Result<Vec<super::MicropubChannel>> { + let mut path = relative_path::RelativePathBuf::new(); + path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); + path.push("channels"); + + let path = path.to_path(&self.root_dir); + match File::open(&path).await { + Ok(mut f) => { + let mut content = String::new(); + f.read_to_string(&mut content).await?; + // This should not happen, but if it does, handle it gracefully + if content.is_empty() { + return Ok(vec![]); + } + let channels: Vec<super::MicropubChannel> = serde_json::from_str(&content)?; + Ok(channels) + } + Err(e) => { + if e.kind() == IOErrorKind::NotFound { + Ok(vec![]) + } else { + Err(e.into()) + } + } + } + } + + async fn read_feed_with_limit( + &self, + url: &'_ str, + after: &'_ Option<String>, + limit: usize, + user: &'_ Option<String>, + ) -> Result<Option<serde_json::Value>> { + if let Some(feed) = self.get_post(url).await? { + if let Some(mut feed) = filter_post(feed, user) { + if feed["children"].is_array() { + // This code contains several clones. It looks + // like the borrow checker thinks it is preventing + // me from doing something incredibly stupid. The + // borrow checker may or may not be right. + let children = feed["children"].as_array().unwrap().clone(); + let mut posts_iter = children + .into_iter() + .map(|s: serde_json::Value| s.as_str().unwrap().to_string()); + // Note: we can't actually use skip_while here because we end up emitting `after`. + // This imperative snippet consumes after instead of emitting it, allowing the + // stream of posts to return only those items that truly come *after* + if let Some(after) = after { + for s in posts_iter.by_ref() { + if &s == after { + break + } + } + }; + let posts = stream::iter(posts_iter) + .map(|url: String| async move { self.get_post(&url).await }) + .buffered(std::cmp::min(3, limit)) + // Hack to unwrap the Option and sieve out broken links + // Broken links return None, and Stream::filter_map skips Nones. + .try_filter_map(|post: Option<serde_json::Value>| async move { Ok(post) }) + .try_filter_map(|post| async move { Ok(filter_post(post, user)) }) + .and_then(|mut post| async move { + hydrate_author(&mut post, user, self).await; + Ok(post) + }) + .take(limit); + + match posts.try_collect::<Vec<serde_json::Value>>().await { + Ok(posts) => feed["children"] = serde_json::json!(posts), + Err(err) => { + return Err(StorageError::with_source( + ErrorKind::Other, + "Feed assembly error", + Box::new(err), + )); + } + } + } + hydrate_author(&mut feed, user, self).await; + Ok(Some(feed)) + } else { + Err(StorageError::new( + ErrorKind::PermissionDenied, + "specified user cannot access this post", + )) + } + } else { + Ok(None) + } + } + + async fn delete_post(&self, url: &'_ str) -> Result<()> { + let path = url_to_path(&self.root_dir, url); + if let Err(e) = tokio::fs::remove_file(path).await { + Err(e.into()) + } else { + // TODO check for dangling references in the channel list + Ok(()) + } + } + + async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result<String> { + log::debug!("User for getting settings: {}", user); + let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string()); + path.push("settings"); + + let path = path.to_path(&self.root_dir); + log::debug!("Getting settings from {:?}", &path); + let setting = setting.to_string(); + let mut file = File::open(path).await?; + let mut content = String::new(); + file.read_to_string(&mut content).await?; + + let settings: HashMap<String, String> = serde_json::from_str(&content)?; + // XXX consider returning string slices instead of cloning a string every time + // it might come with a performance hit and/or memory usage inflation + settings + .get(&setting) + .cloned() + .ok_or_else(|| StorageError::new(ErrorKind::Backend, "Setting not set")) + } + + async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()> { + let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string()); + path.push("settings"); + + let path = path.to_path(&self.root_dir); + let temppath = path.with_extension("tmp"); + + let parent = path.parent().unwrap().to_owned(); + if !spawn_blocking(move || parent.is_dir()).await.unwrap() { + tokio::fs::create_dir_all(path.parent().unwrap()).await?; + } + + let (setting, value) = (setting.to_string(), value.to_string()); + + let mut tempfile = OpenOptions::new() + .write(true) + .create_new(true) + .open(&temppath) + .await?; + + let mut settings: HashMap<String, String> = match File::open(&path).await { + Ok(mut f) => { + let mut content = String::new(); + f.read_to_string(&mut content).await?; + if content.is_empty() { + HashMap::default() + } else { + serde_json::from_str(&content)? + } + } + Err(err) => if err.kind() == IOErrorKind::NotFound { + HashMap::default() + } else { + return Err(err.into()) + } + }; + settings.insert(setting, value); + tempfile.write_all(serde_json::to_string(&settings)?.as_bytes()).await?; + drop(tempfile); + tokio::fs::rename(temppath, path).await?; + Ok(()) + } +} diff --git a/kittybox-rs/src/database/memory.rs b/kittybox-rs/src/database/memory.rs new file mode 100644 index 0000000..786466c --- /dev/null +++ b/kittybox-rs/src/database/memory.rs @@ -0,0 +1,200 @@ +#![allow(clippy::todo)] +use async_trait::async_trait; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::RwLock; +use futures_util::FutureExt; +use serde_json::json; + +use crate::database::{Storage, Result, StorageError, ErrorKind, MicropubChannel, Settings}; + +#[derive(Clone, Debug)] +pub struct MemoryStorage { + pub mapping: Arc<RwLock<HashMap<String, serde_json::Value>>>, + pub channels: Arc<RwLock<HashMap<String, Vec<String>>>> +} + +#[async_trait] +impl Storage for MemoryStorage { + async fn post_exists(&self, url: &str) -> Result<bool> { + return Ok(self.mapping.read().await.contains_key(url)) + } + + async fn get_post(&self, url: &str) ->Result<Option<serde_json::Value>> { + let mapping = self.mapping.read().await; + match mapping.get(url) { + Some(val) => { + if let Some(new_url) = val["see_other"].as_str() { + match mapping.get(new_url) { + Some(val) => Ok(Some(val.clone())), + None => { + drop(mapping); + self.mapping.write().await.remove(url); + Ok(None) + } + } + } else { + Ok(Some(val.clone())) + } + }, + _ => Ok(None) + } + } + + async fn put_post(&self, post: &'_ serde_json::Value, _user: &'_ str) -> Result<()> { + let mapping = &mut self.mapping.write().await; + let key: &str = match post["properties"]["uid"][0].as_str() { + Some(uid) => uid, + None => return Err(StorageError::new(ErrorKind::Other, "post doesn't have a UID")) + }; + mapping.insert(key.to_string(), post.clone()); + if post["properties"]["url"].is_array() { + for url in post["properties"]["url"].as_array().unwrap().iter().map(|i| i.as_str().unwrap().to_string()) { + if url != key { + mapping.insert(url, json!({"see_other": key})); + } + } + } + if post["type"].as_array().unwrap().iter().any(|i| i == "h-feed") { + // This is a feed. Add it to the channels array if it's not already there. + println!("{:#}", post); + self.channels.write().await.entry(post["properties"]["author"][0].as_str().unwrap().to_string()).or_insert_with(Vec::new).push(key.to_string()) + } + Ok(()) + } + + async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()> { + let mut add_keys: HashMap<String, serde_json::Value> = HashMap::new(); + let mut remove_keys: Vec<String> = vec![]; + let mut remove_values: HashMap<String, Vec<serde_json::Value>> = HashMap::new(); + + if let Some(delete) = update["delete"].as_array() { + remove_keys.extend(delete.iter().filter_map(|v| v.as_str()).map(|v| v.to_string())); + } else if let Some(delete) = update["delete"].as_object() { + for (k, v) in delete { + if let Some(v) = v.as_array() { + remove_values.entry(k.to_string()).or_default().extend(v.clone()); + } else { + return Err(StorageError::new(ErrorKind::BadRequest, "Malformed update object")); + } + } + } + if let Some(add) = update["add"].as_object() { + for (k, v) in add { + if v.is_array() { + add_keys.insert(k.to_string(), v.clone()); + } else { + return Err(StorageError::new(ErrorKind::BadRequest, "Malformed update object")); + } + } + } + if let Some(replace) = update["replace"].as_object() { + for (k, v) in replace { + remove_keys.push(k.to_string()); + add_keys.insert(k.to_string(), v.clone()); + } + } + let mut mapping = self.mapping.write().await; + if let Some(mut post) = mapping.get(url) { + if let Some(url) = post["see_other"].as_str() { + if let Some(new_post) = mapping.get(url) { + post = new_post + } else { + return Err(StorageError::new(ErrorKind::NotFound, "The post you have requested is not found in the database.")); + } + } + let mut post = post.clone(); + for k in remove_keys { + post["properties"].as_object_mut().unwrap().remove(&k); + } + for (k, v) in remove_values { + let k = &k; + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + v.iter().for_each(|v| { + if let Some(vec) = props[k].as_array_mut() { + if let Some(index) = vec.iter().position(|w| w == v) { + vec.remove(index); + } + } + }); + } + for (k, v) in add_keys { + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + let k = &k; + if let Some(prop) = props[k].as_array_mut() { + if k == "children" { + v.as_array().unwrap().iter().cloned().rev().for_each(|v| prop.insert(0, v)); + } else { + prop.extend(v.as_array().unwrap().iter().cloned()); + } + } else { + post["properties"][k] = v + } + } + mapping.insert(post["properties"]["uid"][0].as_str().unwrap().to_string(), post); + } else { + return Err(StorageError::new(ErrorKind::NotFound, "The designated post wasn't found in the database.")); + } + Ok(()) + } + + async fn get_channels(&self, user: &'_ str) -> Result<Vec<MicropubChannel>> { + match self.channels.read().await.get(user) { + Some(channels) => Ok(futures_util::future::join_all(channels.iter() + .map(|channel| self.get_post(channel) + .map(|result| result.unwrap()) + .map(|post: Option<serde_json::Value>| { + post.map(|post| MicropubChannel { + uid: post["properties"]["uid"][0].as_str().unwrap().to_string(), + name: post["properties"]["name"][0].as_str().unwrap().to_string() + }) + }) + ).collect::<Vec<_>>()).await.into_iter().flatten().collect::<Vec<_>>()), + None => Ok(vec![]) + } + + } + + #[allow(unused_variables)] + async fn read_feed_with_limit(&self, url: &'_ str, after: &'_ Option<String>, limit: usize, user: &'_ Option<String>) -> Result<Option<serde_json::Value>> { + todo!() + } + + async fn delete_post(&self, url: &'_ str) -> Result<()> { + self.mapping.write().await.remove(url); + Ok(()) + } + + #[allow(unused_variables)] + async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result<String> { + todo!() + } + + #[allow(unused_variables)] + async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()> { + todo!() + } +} + +impl Default for MemoryStorage { + fn default() -> Self { + Self::new() + } +} + +impl MemoryStorage { + pub fn new() -> Self { + Self { + mapping: Arc::new(RwLock::new(HashMap::new())), + channels: Arc::new(RwLock::new(HashMap::new())) + } + } +} diff --git a/kittybox-rs/src/database/mod.rs b/kittybox-rs/src/database/mod.rs new file mode 100644 index 0000000..6bf5409 --- /dev/null +++ b/kittybox-rs/src/database/mod.rs @@ -0,0 +1,539 @@ +#![warn(missing_docs)] +use async_trait::async_trait; +use serde::{Deserialize, Serialize}; + +mod file; +pub use crate::database::file::FileStorage; +#[cfg(test)] +mod memory; +#[cfg(test)] +pub use crate::database::memory::MemoryStorage; + +pub use kittybox_util::MicropubChannel; + +/// Enum representing different errors that might occur during the database query. +#[derive(Debug, Clone, Copy)] +pub enum ErrorKind { + /// Backend error (e.g. database connection error) + Backend, + /// Error due to insufficient contextual permissions for the query + PermissionDenied, + /// Error due to the database being unable to parse JSON returned from the backing storage. + /// Usually indicative of someone fiddling with the database manually instead of using proper tools. + JsonParsing, + /// - ErrorKind::NotFound - equivalent to a 404 error. Note, some requests return an Option, + /// in which case None is also equivalent to a 404. + NotFound, + /// The user's query or request to the database was malformed. Used whenever the database processes + /// the user's query directly, such as when editing posts inside of the database (e.g. Redis backend) + BadRequest, + /// the user's query collided with an in-flight request and needs to be retried + Conflict, + /// - ErrorKind::Other - when something so weird happens that it becomes undescribable. + Other, +} + +/// Enum representing settings that might be stored in the site's database. +#[derive(Deserialize, Serialize, Debug, Clone, Copy)] +#[serde(rename_all = "snake_case")] +pub enum Settings { + /// The name of the website -- displayed in the header and the browser titlebar. + SiteName, +} + +impl std::string::ToString for Settings { + fn to_string(&self) -> String { + serde_variant::to_variant_name(self).unwrap().to_string() + } +} + +/// Error signalled from the database. +#[derive(Debug)] +pub struct StorageError { + msg: String, + source: Option<Box<dyn std::error::Error + Send + Sync>>, + kind: ErrorKind, +} + +impl warp::reject::Reject for StorageError {} + +impl std::error::Error for StorageError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &dyn std::error::Error) + } +} +impl From<serde_json::Error> for StorageError { + fn from(err: serde_json::Error) -> Self { + Self { + msg: format!("{}", err), + source: Some(Box::new(err)), + kind: ErrorKind::JsonParsing, + } + } +} +impl std::fmt::Display for StorageError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match match self.kind { + ErrorKind::Backend => write!(f, "backend error: "), + ErrorKind::JsonParsing => write!(f, "error while parsing JSON: "), + ErrorKind::PermissionDenied => write!(f, "permission denied: "), + ErrorKind::NotFound => write!(f, "not found: "), + ErrorKind::BadRequest => write!(f, "bad request: "), + ErrorKind::Conflict => write!(f, "conflict with an in-flight request or existing data: "), + ErrorKind::Other => write!(f, "generic storage layer error: "), + } { + Ok(_) => write!(f, "{}", self.msg), + Err(err) => Err(err), + } + } +} +impl serde::Serialize for StorageError { + fn serialize<S: serde::Serializer>( + &self, + serializer: S, + ) -> std::result::Result<S::Ok, S::Error> { + serializer.serialize_str(&self.to_string()) + } +} +impl StorageError { + /// Create a new StorageError of an ErrorKind with a message. + fn new(kind: ErrorKind, msg: &str) -> Self { + Self { + msg: msg.to_string(), + source: None, + kind, + } + } + /// Create a StorageError using another arbitrary Error as a source. + fn with_source( + kind: ErrorKind, + msg: &str, + source: Box<dyn std::error::Error + Send + Sync>, + ) -> Self { + Self { + msg: msg.to_string(), + source: Some(source), + kind, + } + } + /// Get the kind of an error. + pub fn kind(&self) -> ErrorKind { + self.kind + } + /// Get the message as a string slice. + pub fn msg(&self) -> &str { + &self.msg + } +} + +/// A special Result type for the Micropub backing storage. +pub type Result<T> = std::result::Result<T, StorageError>; + +/// Filter the post according to the value of `user`. +/// +/// Anonymous users cannot view private posts and protected locations; +/// Logged-in users can only view private posts targeted at them; +/// Logged-in users can't view private location data +pub fn filter_post( + mut post: serde_json::Value, + user: &'_ Option<String>, +) -> Option<serde_json::Value> { + if post["properties"]["deleted"][0].is_string() { + return Some(serde_json::json!({ + "type": post["type"], + "properties": { + "deleted": post["properties"]["deleted"] + } + })); + } + let empty_vec: Vec<serde_json::Value> = vec![]; + let author = post["properties"]["author"] + .as_array() + .unwrap_or(&empty_vec) + .iter() + .map(|i| i.as_str().unwrap().to_string()); + let visibility = post["properties"]["visibility"][0] + .as_str() + .unwrap_or("public"); + let mut audience = author.chain( + post["properties"]["audience"] + .as_array() + .unwrap_or(&empty_vec) + .iter() + .map(|i| i.as_str().unwrap().to_string()), + ); + if (visibility == "private" && !audience.any(|i| Some(i) == *user)) + || (visibility == "protected" && user.is_none()) + { + return None; + } + if post["properties"]["location"].is_array() { + let location_visibility = post["properties"]["location-visibility"][0] + .as_str() + .unwrap_or("private"); + let mut author = post["properties"]["author"] + .as_array() + .unwrap_or(&empty_vec) + .iter() + .map(|i| i.as_str().unwrap().to_string()); + if (location_visibility == "private" && !author.any(|i| Some(i) == *user)) + || (location_visibility == "protected" && user.is_none()) + { + post["properties"] + .as_object_mut() + .unwrap() + .remove("location"); + } + } + Some(post) +} + +/// A storage backend for the Micropub server. +/// +/// Implementations should note that all methods listed on this trait MUST be fully atomic +/// or lock the database so that write conflicts or reading half-written data should not occur. +#[async_trait] +pub trait Storage: std::fmt::Debug + Clone + Send + Sync { + /// Check if a post exists in the database. + async fn post_exists(&self, url: &str) -> Result<bool>; + + /// Load a post from the database in MF2-JSON format, deserialized from JSON. + async fn get_post(&self, url: &str) -> Result<Option<serde_json::Value>>; + + /// Save a post to the database as an MF2-JSON structure. + /// + /// Note that the `post` object MUST have `post["properties"]["uid"][0]` defined. + async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()>; + + /// Modify a post using an update object as defined in the Micropub spec. + /// + /// Note to implementors: the update operation MUST be atomic and + /// SHOULD lock the database to prevent two clients overwriting + /// each other's changes or simply corrupting something. Rejecting + /// is allowed in case of concurrent updates if waiting for a lock + /// cannot be done. + async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()>; + + /// Get a list of channels available for the user represented by the URL `user` to write to. + async fn get_channels(&self, user: &'_ str) -> Result<Vec<MicropubChannel>>; + + /// Fetch a feed at `url` and return a an h-feed object containing + /// `limit` posts after a post by url `after`, filtering the content + /// in context of a user specified by `user` (or an anonymous user). + /// + /// Specifically, private posts that don't include the user in the audience + /// will be elided from the feed, and the posts containing location and not + /// specifying post["properties"]["location-visibility"][0] == "public" + /// will have their location data (but not check-in data) stripped. + /// + /// This function is used as an optimization so the client, whatever it is, + /// doesn't have to fetch posts, then realize some of them are private, and + /// fetch more posts. + /// + /// Note for implementors: if you use streams to fetch posts in parallel + /// from the database, preferably make this method use a connection pool + /// to reduce overhead of creating a database connection per post for + /// parallel fetching. + async fn read_feed_with_limit( + &self, + url: &'_ str, + after: &'_ Option<String>, + limit: usize, + user: &'_ Option<String>, + ) -> Result<Option<serde_json::Value>>; + + /// Deletes a post from the database irreversibly. 'nuff said. Must be idempotent. + async fn delete_post(&self, url: &'_ str) -> Result<()>; + + /// Gets a setting from the setting store and passes the result. + async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result<String>; + + /// Commits a setting to the setting store. + async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()>; +} + +#[cfg(test)] +mod tests { + use super::{MicropubChannel, Storage}; + use serde_json::json; + + async fn test_basic_operations<Backend: Storage>(backend: Backend) { + let post: serde_json::Value = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Test content"], + "author": ["https://fireburn.ru/"], + "uid": ["https://fireburn.ru/posts/hello"], + "url": ["https://fireburn.ru/posts/hello", "https://fireburn.ru/posts/test"] + } + }); + let key = post["properties"]["uid"][0].as_str().unwrap().to_string(); + let alt_url = post["properties"]["url"][1].as_str().unwrap().to_string(); + + // Reading and writing + backend + .put_post(&post, "https://fireburn.ru/") + .await + .unwrap(); + if let Some(returned_post) = backend.get_post(&key).await.unwrap() { + assert!(returned_post.is_object()); + assert_eq!( + returned_post["type"].as_array().unwrap().len(), + post["type"].as_array().unwrap().len() + ); + assert_eq!( + returned_post["type"].as_array().unwrap(), + post["type"].as_array().unwrap() + ); + let props: &serde_json::Map<String, serde_json::Value> = + post["properties"].as_object().unwrap(); + for key in props.keys() { + assert_eq!( + returned_post["properties"][key].as_array().unwrap(), + post["properties"][key].as_array().unwrap() + ) + } + } else { + panic!("For some reason the backend did not return the post.") + } + // Check the alternative URL - it should return the same post + if let Ok(Some(returned_post)) = backend.get_post(&alt_url).await { + assert!(returned_post.is_object()); + assert_eq!( + returned_post["type"].as_array().unwrap().len(), + post["type"].as_array().unwrap().len() + ); + assert_eq!( + returned_post["type"].as_array().unwrap(), + post["type"].as_array().unwrap() + ); + let props: &serde_json::Map<String, serde_json::Value> = + post["properties"].as_object().unwrap(); + for key in props.keys() { + assert_eq!( + returned_post["properties"][key].as_array().unwrap(), + post["properties"][key].as_array().unwrap() + ) + } + } else { + panic!("For some reason the backend did not return the post.") + } + } + + /// Note: this is merely a smoke check and is in no way comprehensive. + // TODO updates for feeds must update children using special logic + async fn test_update<Backend: Storage>(backend: Backend) { + let post: serde_json::Value = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Test content"], + "author": ["https://fireburn.ru/"], + "uid": ["https://fireburn.ru/posts/hello"], + "url": ["https://fireburn.ru/posts/hello", "https://fireburn.ru/posts/test"] + } + }); + let key = post["properties"]["uid"][0].as_str().unwrap().to_string(); + + // Reading and writing + backend + .put_post(&post, "https://fireburn.ru/") + .await + .unwrap(); + + backend + .update_post( + &key, + json!({ + "url": &key, + "add": { + "category": ["testing"], + }, + "replace": { + "content": ["Different test content"] + } + }), + ) + .await + .unwrap(); + + match backend.get_post(&key).await { + Ok(Some(returned_post)) => { + assert!(returned_post.is_object()); + assert_eq!( + returned_post["type"].as_array().unwrap().len(), + post["type"].as_array().unwrap().len() + ); + assert_eq!( + returned_post["type"].as_array().unwrap(), + post["type"].as_array().unwrap() + ); + assert_eq!( + returned_post["properties"]["content"][0].as_str().unwrap(), + "Different test content" + ); + assert_eq!( + returned_post["properties"]["category"].as_array().unwrap(), + &vec![json!("testing")] + ); + }, + something_else => { + something_else.expect("Shouldn't error").expect("Should have the post"); + } + } + } + + async fn test_get_channel_list<Backend: Storage>(backend: Backend) { + let feed = json!({ + "type": ["h-feed"], + "properties": { + "name": ["Main Page"], + "author": ["https://fireburn.ru/"], + "uid": ["https://fireburn.ru/feeds/main"] + }, + "children": [] + }); + backend + .put_post(&feed, "https://fireburn.ru/") + .await + .unwrap(); + let chans = backend.get_channels("https://fireburn.ru/").await.unwrap(); + assert_eq!(chans.len(), 1); + assert_eq!( + chans[0], + MicropubChannel { + uid: "https://fireburn.ru/feeds/main".to_string(), + name: "Main Page".to_string() + } + ); + } + + async fn test_settings<Backend: Storage>(backend: Backend) { + backend + .set_setting(crate::database::Settings::SiteName, "https://fireburn.ru/", "Vika's Hideout") + .await + .unwrap(); + assert_eq!( + backend + .get_setting(crate::database::Settings::SiteName, "https://fireburn.ru/") + .await + .unwrap(), + "Vika's Hideout" + ); + } + + fn gen_random_post(domain: &str) -> serde_json::Value { + use faker_rand::lorem::{Paragraphs, Word}; + + let uid = format!( + "https://{domain}/posts/{}-{}-{}", + rand::random::<Word>(), rand::random::<Word>(), rand::random::<Word>() + ); + + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": [rand::random::<Paragraphs>().to_string()], + "uid": [&uid], + "url": [&uid] + } + }); + + post + } + + async fn test_feed_pagination<Backend: Storage>(backend: Backend) { + let posts = std::iter::from_fn(|| Some(gen_random_post("fireburn.ru"))) + .take(20) + .collect::<Vec<serde_json::Value>>(); + + let feed = json!({ + "type": ["h-feed"], + "properties": { + "name": ["Main Page"], + "author": ["https://fireburn.ru/"], + "uid": ["https://fireburn.ru/feeds/main"] + }, + "children": posts.iter() + .filter_map(|json| json["properties"]["uid"][0].as_str()) + .collect::<Vec<&str>>() + }); + let key = feed["properties"]["uid"][0].as_str().unwrap(); + + backend + .put_post(&feed, "https://fireburn.ru/") + .await + .unwrap(); + println!("---"); + for (i, post) in posts.iter().enumerate() { + backend.put_post(post, "https://fireburn.ru/").await.unwrap(); + println!("posts[{}] = {}", i, post["properties"]["uid"][0]); + } + println!("---"); + let limit: usize = 10; + let result = backend.read_feed_with_limit(key, &None, limit, &None) + .await + .unwrap() + .unwrap(); + for (i, post) in result["children"].as_array().unwrap().iter().enumerate() { + println!("feed[0][{}] = {}", i, post["properties"]["uid"][0]); + } + println!("---"); + assert_eq!(result["children"].as_array().unwrap()[0..10], posts[0..10]); + + let result2 = backend.read_feed_with_limit( + key, + &result["children"] + .as_array() + .unwrap() + .last() + .unwrap() + ["properties"]["uid"][0] + .as_str() + .map(|i| i.to_owned()), + limit, &None + ).await.unwrap().unwrap(); + for (i, post) in result2["children"].as_array().unwrap().iter().enumerate() { + println!("feed[1][{}] = {}", i, post["properties"]["uid"][0]); + } + println!("---"); + assert_eq!(result2["children"].as_array().unwrap()[0..10], posts[10..20]); + + // Regression test for #4 + let nonsense_after = Some("1010101010".to_owned()); + let result3 = tokio::time::timeout(tokio::time::Duration::from_secs(10), async move { + backend.read_feed_with_limit( + key, &nonsense_after, limit, &None + ).await.unwrap().unwrap() + }).await.expect("Operation should not hang: see https://gitlab.com/kittybox/kittybox/-/issues/4"); + assert!(result3["children"].as_array().unwrap().is_empty()); + } + + /// Automatically generates a test suite for + macro_rules! test_all { + ($func_name:ident, $mod_name:ident) => { + mod $mod_name { + $func_name!(test_basic_operations); + $func_name!(test_get_channel_list); + $func_name!(test_settings); + $func_name!(test_update); + $func_name!(test_feed_pagination); + } + } + } + macro_rules! file_test { + ($func_name:ident) => { + #[tokio::test] + async fn $func_name () { + test_logger::ensure_env_logger_initialized(); + let tempdir = tempdir::TempDir::new("file").expect("Failed to create tempdir"); + let backend = super::super::FileStorage::new(tempdir.into_path()).await.unwrap(); + super::$func_name(backend).await + } + }; + } + + test_all!(file_test, file); + +} diff --git a/kittybox-rs/src/database/redis/edit_post.lua b/kittybox-rs/src/database/redis/edit_post.lua new file mode 100644 index 0000000..a398f8d --- /dev/null +++ b/kittybox-rs/src/database/redis/edit_post.lua @@ -0,0 +1,93 @@ +local posts = KEYS[1] +local update_desc = cjson.decode(ARGV[2]) +local post = cjson.decode(redis.call("HGET", posts, ARGV[1])) + +local delete_keys = {} +local delete_kvs = {} +local add_keys = {} + +if update_desc.replace ~= nil then + for k, v in pairs(update_desc.replace) do + table.insert(delete_keys, k) + add_keys[k] = v + end +end +if update_desc.delete ~= nil then + if update_desc.delete[0] == nil then + -- Table has string keys. Probably! + for k, v in pairs(update_desc.delete) do + delete_kvs[k] = v + end + else + -- Table has numeric keys. Probably! + for i, v in ipairs(update_desc.delete) do + table.insert(delete_keys, v) + end + end +end +if update_desc.add ~= nil then + for k, v in pairs(update_desc.add) do + add_keys[k] = v + end +end + +for i, v in ipairs(delete_keys) do + post["properties"][v] = nil + -- TODO delete URL links +end + +for k, v in pairs(delete_kvs) do + local index = -1 + if k == "children" then + for j, w in ipairs(post[k]) do + if w == v then + index = j + break + end + end + if index > -1 then + table.remove(post[k], index) + end + else + for j, w in ipairs(post["properties"][k]) do + if w == v then + index = j + break + end + end + if index > -1 then + table.remove(post["properties"][k], index) + -- TODO delete URL links + end + end +end + +for k, v in pairs(add_keys) do + if k == "children" then + if post["children"] == nil then + post["children"] = {} + end + for i, w in ipairs(v) do + table.insert(post["children"], 1, w) + end + else + if post["properties"][k] == nil then + post["properties"][k] = {} + end + for i, w in ipairs(v) do + table.insert(post["properties"][k], w) + end + if k == "url" then + redis.call("HSET", posts, v, cjson.encode({ see_other = post["properties"]["uid"][1] })) + elseif k == "channel" then + local feed = cjson.decode(redis.call("HGET", posts, v)) + table.insert(feed["children"], 1, post["properties"]["uid"][1]) + redis.call("HSET", posts, v, cjson.encode(feed)) + end + end +end + +local encoded = cjson.encode(post) +redis.call("SET", "debug", encoded) +redis.call("HSET", posts, post["properties"]["uid"][1], encoded) +return \ No newline at end of file diff --git a/kittybox-rs/src/database/redis/mod.rs b/kittybox-rs/src/database/redis/mod.rs new file mode 100644 index 0000000..eeaa3f2 --- /dev/null +++ b/kittybox-rs/src/database/redis/mod.rs @@ -0,0 +1,392 @@ +use async_trait::async_trait; +use futures::stream; +use futures_util::FutureExt; +use futures_util::StreamExt; +use futures_util::TryStream; +use futures_util::TryStreamExt; +use lazy_static::lazy_static; +use log::error; +use mobc::Pool; +use mobc_redis::redis; +use mobc_redis::redis::AsyncCommands; +use mobc_redis::RedisConnectionManager; +use serde_json::json; +use std::time::Duration; + +use crate::database::{ErrorKind, MicropubChannel, Result, Storage, StorageError, filter_post}; +use crate::indieauth::User; + +struct RedisScripts { + edit_post: redis::Script, +} + +impl From<mobc_redis::redis::RedisError> for StorageError { + fn from(err: mobc_redis::redis::RedisError) -> Self { + Self { + msg: format!("{}", err), + source: Some(Box::new(err)), + kind: ErrorKind::Backend, + } + } +} +impl From<mobc::Error<mobc_redis::redis::RedisError>> for StorageError { + fn from(err: mobc::Error<mobc_redis::redis::RedisError>) -> Self { + Self { + msg: format!("{}", err), + source: Some(Box::new(err)), + kind: ErrorKind::Backend, + } + } +} + +lazy_static! { + static ref SCRIPTS: RedisScripts = RedisScripts { + edit_post: redis::Script::new(include_str!("./edit_post.lua")) + }; +} + +#[derive(Clone)] +pub struct RedisStorage { + // note to future Vika: + // mobc::Pool is actually a fancy name for an Arc + // around a shared connection pool with a manager + // which makes it safe to implement [`Clone`] and + // not worry about new pools being suddenly made + // + // stop worrying and start coding, you dum-dum + redis: mobc::Pool<RedisConnectionManager>, +} + +#[async_trait] +impl Storage for RedisStorage { + async fn get_setting<'a>(&self, setting: &'a str, user: &'a str) -> Result<String> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + Ok(conn + .hget::<String, &str, String>(format!("settings_{}", user), setting) + .await?) + } + + async fn set_setting<'a>(&self, setting: &'a str, user: &'a str, value: &'a str) -> Result<()> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + Ok(conn + .hset::<String, &str, &str, ()>(format!("settings_{}", user), setting, value) + .await?) + } + + async fn delete_post<'a>(&self, url: &'a str) -> Result<()> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + Ok(conn.hdel::<&str, &str, ()>("posts", url).await?) + } + + async fn post_exists(&self, url: &str) -> Result<bool> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + Ok(conn.hexists::<&str, &str, bool>("posts", url).await?) + } + + async fn get_post(&self, url: &str) -> Result<Option<serde_json::Value>> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + match conn + .hget::<&str, &str, Option<String>>("posts", url) + .await? + { + Some(val) => { + let parsed = serde_json::from_str::<serde_json::Value>(&val)?; + if let Some(new_url) = parsed["see_other"].as_str() { + match conn + .hget::<&str, &str, Option<String>>("posts", new_url) + .await? + { + Some(val) => Ok(Some(serde_json::from_str::<serde_json::Value>(&val)?)), + None => Ok(None), + } + } else { + Ok(Some(parsed)) + } + } + None => Ok(None), + } + } + + async fn get_channels(&self, user: &User) -> Result<Vec<MicropubChannel>> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + let channels = conn + .smembers::<String, Vec<String>>("channels_".to_string() + user.me.as_str()) + .await?; + // TODO: use streams here instead of this weird thing... how did I even write this?! + Ok(futures_util::future::join_all( + channels + .iter() + .map(|channel| { + self.get_post(channel).map(|result| result.unwrap()).map( + |post: Option<serde_json::Value>| { + post.map(|post| MicropubChannel { + uid: post["properties"]["uid"][0].as_str().unwrap().to_string(), + name: post["properties"]["name"][0].as_str().unwrap().to_string(), + }) + }, + ) + }) + .collect::<Vec<_>>(), + ) + .await + .into_iter() + .flatten() + .collect::<Vec<_>>()) + } + + async fn put_post<'a>(&self, post: &'a serde_json::Value, user: &'a str) -> Result<()> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + let key: &str; + match post["properties"]["uid"][0].as_str() { + Some(uid) => key = uid, + None => { + return Err(StorageError::new( + ErrorKind::BadRequest, + "post doesn't have a UID", + )) + } + } + conn.hset::<&str, &str, String, ()>("posts", key, post.to_string()) + .await?; + if post["properties"]["url"].is_array() { + for url in post["properties"]["url"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap().to_string()) + { + if url != key && url.starts_with(user) { + conn.hset::<&str, &str, String, ()>( + "posts", + &url, + json!({ "see_other": key }).to_string(), + ) + .await?; + } + } + } + if post["type"] + .as_array() + .unwrap() + .iter() + .any(|i| i == "h-feed") + { + // This is a feed. Add it to the channels array if it's not already there. + conn.sadd::<String, &str, ()>( + "channels_".to_string() + post["properties"]["author"][0].as_str().unwrap(), + key, + ) + .await? + } + Ok(()) + } + + async fn read_feed_with_limit<'a>( + &self, + url: &'a str, + after: &'a Option<String>, + limit: usize, + user: &'a Option<String>, + ) -> Result<Option<serde_json::Value>> { + let mut conn = self.redis.get().await?; + let mut feed; + match conn + .hget::<&str, &str, Option<String>>("posts", url) + .await + .map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))? + { + Some(post) => feed = serde_json::from_str::<serde_json::Value>(&post)?, + None => return Ok(None), + } + if feed["see_other"].is_string() { + match conn + .hget::<&str, &str, Option<String>>("posts", feed["see_other"].as_str().unwrap()) + .await? + { + Some(post) => feed = serde_json::from_str::<serde_json::Value>(&post)?, + None => return Ok(None), + } + } + if let Some(post) = filter_post(feed, user) { + feed = post + } else { + return Err(StorageError::new( + ErrorKind::PermissionDenied, + "specified user cannot access this post", + )); + } + if feed["children"].is_array() { + let children = feed["children"].as_array().unwrap(); + let mut posts_iter = children.iter().map(|i| i.as_str().unwrap().to_string()); + if after.is_some() { + loop { + let i = posts_iter.next(); + if &i == after { + break; + } + } + } + async fn fetch_post_for_feed(url: String) -> Option<serde_json::Value> { + return Some(serde_json::json!({})); + } + let posts = stream::iter(posts_iter) + .map(|url: String| async move { + return Ok(fetch_post_for_feed(url).await); + /*match self.redis.get().await { + Ok(mut conn) => { + match conn.hget::<&str, &str, Option<String>>("posts", &url).await { + Ok(post) => match post { + Some(post) => { + Ok(Some(serde_json::from_str(&post)?)) + } + // Happens because of a broken link (result of an improper deletion?) + None => Ok(None), + }, + Err(err) => Err(StorageError::with_source(ErrorKind::Backend, "Error executing a Redis command", Box::new(err))) + } + } + Err(err) => Err(StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(err))) + }*/ + }) + // TODO: determine the optimal value for this buffer + // It will probably depend on how often can you encounter a private post on the page + // It shouldn't be too large, or we'll start fetching too many posts from the database + // It MUST NOT be larger than the typical page size + // It MUST NOT be a significant amount of the connection pool size + //.buffered(std::cmp::min(3, limit)) + // Hack to unwrap the Option and sieve out broken links + // Broken links return None, and Stream::filter_map skips all Nones. + // I wonder if one can use try_flatten() here somehow akin to iters + .try_filter_map(|post| async move { Ok(post) }) + .try_filter_map(|post| async move { + Ok(filter_post(post, user)) + }) + .take(limit); + match posts.try_collect::<Vec<serde_json::Value>>().await { + Ok(posts) => feed["children"] = json!(posts), + Err(err) => { + let e = StorageError::with_source( + ErrorKind::Other, + "An error was encountered while processing the feed", + Box::new(err) + ); + error!("Error while assembling feed: {}", e); + return Err(e); + } + } + } + return Ok(Some(feed)); + } + + async fn update_post<'a>(&self, mut url: &'a str, update: serde_json::Value) -> Result<()> { + let mut conn = self.redis.get().await.map_err(|e| StorageError::with_source(ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e)))?; + if !conn + .hexists::<&str, &str, bool>("posts", url) + .await + .unwrap() + { + return Err(StorageError::new( + ErrorKind::NotFound, + "can't edit a non-existent post", + )); + } + let post: serde_json::Value = + serde_json::from_str(&conn.hget::<&str, &str, String>("posts", url).await?)?; + if let Some(new_url) = post["see_other"].as_str() { + url = new_url + } + Ok(SCRIPTS + .edit_post + .key("posts") + .arg(url) + .arg(update.to_string()) + .invoke_async::<_, ()>(&mut conn as &mut redis::aio::Connection) + .await?) + } +} + +impl RedisStorage { + /// Create a new RedisDatabase that will connect to Redis at `redis_uri` to store data. + pub async fn new(redis_uri: String) -> Result<Self> { + match redis::Client::open(redis_uri) { + Ok(client) => Ok(Self { + redis: Pool::builder() + .max_open(20) + .max_idle(5) + .get_timeout(Some(Duration::from_secs(3))) + .max_lifetime(Some(Duration::from_secs(120))) + .build(RedisConnectionManager::new(client)), + }), + Err(e) => Err(e.into()), + } + } + + pub async fn conn(&self) -> Result<mobc::Connection<mobc_redis::RedisConnectionManager>> { + self.redis.get().await.map_err(|e| StorageError::with_source( + ErrorKind::Backend, "Error getting a connection from the pool", Box::new(e) + )) + } +} + +#[cfg(test)] +pub mod tests { + use mobc_redis::redis; + use std::process; + use std::time::Duration; + + pub struct RedisInstance { + // We just need to hold on to it so it won't get dropped and remove the socket + _tempdir: tempdir::TempDir, + uri: String, + child: std::process::Child, + } + impl Drop for RedisInstance { + fn drop(&mut self) { + self.child.kill().expect("Failed to kill the child!"); + } + } + impl RedisInstance { + pub fn uri(&self) -> &str { + &self.uri + } + } + + pub async fn get_redis_instance() -> RedisInstance { + let tempdir = tempdir::TempDir::new("redis").expect("failed to create tempdir"); + let socket = tempdir.path().join("redis.sock"); + let redis_child = process::Command::new("redis-server") + .current_dir(&tempdir) + .arg("--port") + .arg("0") + .arg("--unixsocket") + .arg(&socket) + .stdout(process::Stdio::null()) + .stderr(process::Stdio::null()) + .spawn() + .expect("Failed to spawn Redis"); + println!("redis+unix:///{}", socket.to_str().unwrap()); + let uri = format!("redis+unix:///{}", socket.to_str().unwrap()); + // There should be a slight delay, we need to wait for Redis to spin up + let client = redis::Client::open(uri.clone()).unwrap(); + let millisecond = Duration::from_millis(1); + let mut retries: usize = 0; + const MAX_RETRIES: usize = 60 * 1000/*ms*/; + while let Err(err) = client.get_connection() { + if err.is_connection_refusal() { + async_std::task::sleep(millisecond).await; + retries += 1; + if retries > MAX_RETRIES { + panic!("Timeout waiting for Redis, last error: {}", err); + } + } else { + panic!("Could not connect: {}", err); + } + } + + RedisInstance { + uri, + child: redis_child, + _tempdir: tempdir, + } + } +} diff --git a/kittybox-rs/src/frontend/login.rs b/kittybox-rs/src/frontend/login.rs new file mode 100644 index 0000000..9665ce7 --- /dev/null +++ b/kittybox-rs/src/frontend/login.rs @@ -0,0 +1,333 @@ +use http_types::Mime; +use log::{debug, error}; +use rand::Rng; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::convert::TryInto; +use std::str::FromStr; + +use crate::frontend::templates::Template; +use crate::frontend::{FrontendError, IndiewebEndpoints}; +use crate::{database::Storage, ApplicationState}; +use kittybox_templates::LoginPage; + +pub async fn form<S: Storage>(req: Request<ApplicationState<S>>) -> Result { + let owner = req.url().origin().ascii_serialization() + "/"; + let storage = &req.state().storage; + let authorization_endpoint = req.state().authorization_endpoint.to_string(); + let token_endpoint = req.state().token_endpoint.to_string(); + let blog_name = storage + .get_setting("site_name", &owner) + .await + .unwrap_or_else(|_| "Kitty Box!".to_string()); + let feeds = storage.get_channels(&owner).await.unwrap_or_default(); + + Ok(Response::builder(200) + .body( + Template { + title: "Sign in with IndieAuth", + blog_name: &blog_name, + endpoints: IndiewebEndpoints { + authorization_endpoint, + token_endpoint, + webmention: None, + microsub: None, + }, + feeds, + user: req.session().get("user"), + content: LoginPage {}.to_string(), + } + .to_string(), + ) + .content_type("text/html; charset=utf-8") + .build()) +} + +#[derive(Serialize, Deserialize)] +struct LoginForm { + url: String, +} + +#[derive(Serialize, Deserialize)] +struct IndieAuthClientState { + /// A random value to protect from CSRF attacks. + nonce: String, + /// The user's initial "me" value. + me: String, + /// Authorization endpoint used. + authorization_endpoint: String, +} + +#[derive(Serialize, Deserialize)] +struct IndieAuthRequestParams { + response_type: String, // can only have "code". TODO make an enum + client_id: String, // always a URL. TODO consider making a URL + redirect_uri: surf::Url, // callback URI for IndieAuth + state: String, // CSRF protection, should include randomness and be passed through + code_challenge: String, // base64-encoded PKCE challenge + code_challenge_method: String, // usually "S256". TODO make an enum + scope: Option<String>, // oAuth2 scopes to grant, + me: surf::Url, // User's entered profile URL +} + +/// Handle login requests. Find the IndieAuth authorization endpoint and redirect to it. +pub async fn handler<S: Storage>(mut req: Request<ApplicationState<S>>) -> Result { + let content_type = req.content_type(); + if content_type.is_none() { + return Err(FrontendError::with_code(400, "Use the login form, Luke.").into()); + } + if content_type.unwrap() != Mime::from_str("application/x-www-form-urlencoded").unwrap() { + return Err( + FrontendError::with_code(400, "Login form results must be a urlencoded form").into(), + ); + } + + let form = req.body_form::<LoginForm>().await?; // FIXME check if it returns 400 or 500 on error + let homepage_uri = surf::Url::parse(&form.url)?; + let http = &req.state().http_client; + + let mut fetch_response = http.get(&homepage_uri).send().await?; + if fetch_response.status() != 200 { + return Err(FrontendError::with_code( + 500, + "Error fetching your authorization endpoint. Check if your website's okay.", + ) + .into()); + } + + let mut authorization_endpoint: Option<surf::Url> = None; + if let Some(links) = fetch_response.header("Link") { + // NOTE: this is the same Link header parser used in src/micropub/post.rs:459. + // One should refactor it to a function to use independently and improve later + for link in links.iter().flat_map(|i| i.as_str().split(',')) { + debug!("Trying to match {} as authorization_endpoint", link); + let mut split_link = link.split(';'); + + match split_link.next() { + Some(uri) => { + if let Some(uri) = uri.strip_prefix('<').and_then(|uri| uri.strip_suffix('>')) { + debug!("uri: {}", uri); + for prop in split_link { + debug!("prop: {}", prop); + let lowercased = prop.to_ascii_lowercase(); + let trimmed = lowercased.trim(); + if trimmed == "rel=\"authorization_endpoint\"" + || trimmed == "rel=authorization_endpoint" + { + if let Ok(endpoint) = homepage_uri.join(uri) { + debug!( + "Found authorization endpoint {} for user {}", + endpoint, + homepage_uri.as_str() + ); + authorization_endpoint = Some(endpoint); + break; + } + } + } + } + } + None => continue, + } + } + } + // If the authorization_endpoint is still not found after the Link parsing gauntlet, + // bring out the big guns and parse HTML to find it. + if authorization_endpoint.is_none() { + let body = fetch_response.body_string().await?; + let pattern = + easy_scraper::Pattern::new(r#"<link rel="authorization_endpoint" href="{{url}}">"#) + .expect("Cannot parse the pattern for authorization_endpoint"); + let matches = pattern.matches(&body); + debug!("Matches for authorization_endpoint in HTML: {:?}", matches); + if !matches.is_empty() { + if let Ok(endpoint) = homepage_uri.join(&matches[0]["url"]) { + debug!( + "Found authorization endpoint {} for user {}", + endpoint, + homepage_uri.as_str() + ); + authorization_endpoint = Some(endpoint) + } + } + }; + // If even after this the authorization endpoint is still not found, bail out. + if authorization_endpoint.is_none() { + error!( + "Couldn't find authorization_endpoint for {}", + homepage_uri.as_str() + ); + return Err(FrontendError::with_code( + 400, + "Your website doesn't support the IndieAuth protocol.", + ) + .into()); + } + let mut authorization_endpoint: surf::Url = authorization_endpoint.unwrap(); + let mut rng = rand::thread_rng(); + let state: String = data_encoding::BASE64URL.encode( + serde_urlencoded::to_string(IndieAuthClientState { + nonce: (0..8) + .map(|_| { + let idx = rng.gen_range(0..INDIEAUTH_PKCE_CHARSET.len()); + INDIEAUTH_PKCE_CHARSET[idx] as char + }) + .collect(), + me: homepage_uri.to_string(), + authorization_endpoint: authorization_endpoint.to_string(), + })? + .as_bytes(), + ); + // PKCE code generation + let code_verifier: String = (0..128) + .map(|_| { + let idx = rng.gen_range(0..INDIEAUTH_PKCE_CHARSET.len()); + INDIEAUTH_PKCE_CHARSET[idx] as char + }) + .collect(); + let mut hasher = Sha256::new(); + hasher.update(code_verifier.as_bytes()); + let code_challenge: String = data_encoding::BASE64URL.encode(&hasher.finalize()); + + authorization_endpoint.set_query(Some(&serde_urlencoded::to_string( + IndieAuthRequestParams { + response_type: "code".to_string(), + client_id: req.url().origin().ascii_serialization(), + redirect_uri: req.url().join("login/callback")?, + state: state.clone(), + code_challenge, + code_challenge_method: "S256".to_string(), + scope: Some("profile".to_string()), + me: homepage_uri, + }, + )?)); + + let cookies = vec![ + format!( + r#"indieauth_state="{}"; Same-Site: None; Secure; Max-Age: 600"#, + state + ), + format!( + r#"indieauth_code_verifier="{}"; Same-Site: None; Secure; Max-Age: 600"#, + code_verifier + ), + ]; + + let cookie_header = cookies + .iter() + .map(|i| -> http_types::headers::HeaderValue { (i as &str).try_into().unwrap() }) + .collect::<Vec<_>>(); + + Ok(Response::builder(302) + .header("Location", authorization_endpoint.to_string()) + .header("Set-Cookie", &*cookie_header) + .build()) +} + +const INDIEAUTH_PKCE_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ + abcdefghijklmnopqrstuvwxyz\ + 1234567890-._~"; + +#[derive(Deserialize)] +struct IndieAuthCallbackResponse { + code: Option<String>, + error: Option<String>, + error_description: Option<String>, + #[allow(dead_code)] + error_uri: Option<String>, + // This needs to be further decoded to receive state back and will always be present + state: String, +} + +impl IndieAuthCallbackResponse { + fn is_successful(&self) -> bool { + self.code.is_some() + } +} + +#[derive(Serialize, Deserialize)] +struct IndieAuthCodeRedeem { + grant_type: String, + code: String, + client_id: String, + redirect_uri: String, + code_verifier: String, +} + +#[derive(Serialize, Deserialize)] +struct IndieWebProfile { + name: Option<String>, + url: Option<String>, + email: Option<String>, + photo: Option<String>, +} + +#[derive(Serialize, Deserialize)] +struct IndieAuthResponse { + me: String, + scope: Option<String>, + access_token: Option<String>, + token_type: Option<String>, + profile: Option<IndieWebProfile>, +} + +/// Handle IndieAuth parameters, fetch the final h-card and redirect the user to the homepage. +pub async fn callback<S: Storage>(mut req: Request<ApplicationState<S>>) -> Result { + let params: IndieAuthCallbackResponse = req.query()?; + let http: &surf::Client = &req.state().http_client; + let origin = req.url().origin().ascii_serialization(); + + if req.cookie("indieauth_state").unwrap().value() != params.state { + return Err(FrontendError::with_code(400, "The state doesn't match. A possible CSRF attack was prevented. Please try again later.").into()); + } + let state: IndieAuthClientState = + serde_urlencoded::from_bytes(&data_encoding::BASE64URL.decode(params.state.as_bytes())?)?; + + if !params.is_successful() { + return Err(FrontendError::with_code( + 400, + &format!( + "The authorization endpoint indicated a following error: {:?}: {:?}", + ¶ms.error, ¶ms.error_description + ), + ) + .into()); + } + + let authorization_endpoint = surf::Url::parse(&state.authorization_endpoint).unwrap(); + let mut code_response = http + .post(authorization_endpoint) + .body_string(serde_urlencoded::to_string(IndieAuthCodeRedeem { + grant_type: "authorization_code".to_string(), + code: params.code.unwrap().to_string(), + client_id: origin.to_string(), + redirect_uri: origin + "/login/callback", + code_verifier: req + .cookie("indieauth_code_verifier") + .unwrap() + .value() + .to_string(), + })?) + .header("Content-Type", "application/x-www-form-urlencoded") + .header("Accept", "application/json") + .send() + .await?; + + if code_response.status() != 200 { + return Err(FrontendError::with_code( + code_response.status(), + &format!( + "Authorization endpoint returned an error when redeeming the code: {}", + code_response.body_string().await? + ), + ) + .into()); + } + + let json: IndieAuthResponse = code_response.body_json().await?; + let session = req.session_mut(); + session.insert("user", &json.me)?; + + // TODO redirect to the page user came from + Ok(Response::builder(302).header("Location", "/").build()) +} diff --git a/kittybox-rs/src/frontend/mod.rs b/kittybox-rs/src/frontend/mod.rs new file mode 100644 index 0000000..b87f9c6 --- /dev/null +++ b/kittybox-rs/src/frontend/mod.rs @@ -0,0 +1,459 @@ +use std::convert::TryInto; +use crate::database::Storage; +use serde::Deserialize; +use futures_util::TryFutureExt; +use warp::{http::StatusCode, Filter, host::Authority, path::FullPath}; + +//pub mod login; + +#[allow(unused_imports)] +use kittybox_templates::{ErrorPage, MainPage, OnboardingPage, Template, POSTS_PER_PAGE}; + +pub use kittybox_util::IndiewebEndpoints; + +#[derive(Deserialize)] +struct QueryParams { + after: Option<String>, +} + +#[derive(Debug)] +struct FrontendError { + msg: String, + source: Option<Box<dyn std::error::Error + Send + Sync + 'static>>, + code: StatusCode, +} + +impl FrontendError { + pub fn with_code<C>(code: C, msg: &str) -> Self + where + C: TryInto<StatusCode>, + { + Self { + msg: msg.to_string(), + source: None, + code: code.try_into().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR), + } + } + pub fn msg(&self) -> &str { + &self.msg + } + pub fn code(&self) -> StatusCode { + self.code + } +} + +impl From<crate::database::StorageError> for FrontendError { + fn from(err: crate::database::StorageError) -> Self { + Self { + msg: "Database error".to_string(), + source: Some(Box::new(err)), + code: StatusCode::INTERNAL_SERVER_ERROR, + } + } +} + +impl std::error::Error for FrontendError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source + .as_ref() + .map(|e| e.as_ref() as &(dyn std::error::Error + 'static)) + } +} + +impl std::fmt::Display for FrontendError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.msg) + } +} + +impl warp::reject::Reject for FrontendError {} + +async fn get_post_from_database<S: Storage>( + db: &S, + url: &str, + after: Option<String>, + user: &Option<String>, +) -> std::result::Result<serde_json::Value, FrontendError> { + match db + .read_feed_with_limit(url, &after, POSTS_PER_PAGE, user) + .await + { + Ok(result) => match result { + Some(post) => Ok(post), + None => Err(FrontendError::with_code( + StatusCode::NOT_FOUND, + "Post not found in the database", + )), + }, + Err(err) => match err.kind() { + crate::database::ErrorKind::PermissionDenied => { + // TODO: Authentication + if user.is_some() { + Err(FrontendError::with_code( + StatusCode::FORBIDDEN, + "User authenticated AND forbidden to access this resource", + )) + } else { + Err(FrontendError::with_code( + StatusCode::UNAUTHORIZED, + "User needs to authenticate themselves", + )) + } + } + _ => Err(err.into()), + }, + } +} + +#[allow(dead_code)] +#[derive(Deserialize)] +struct OnboardingFeed { + slug: String, + name: String, +} + +#[allow(dead_code)] +#[derive(Deserialize)] +struct OnboardingData { + user: serde_json::Value, + first_post: serde_json::Value, + #[serde(default = "OnboardingData::default_blog_name")] + blog_name: String, + feeds: Vec<OnboardingFeed>, +} + +impl OnboardingData { + fn default_blog_name() -> String { + "Kitty Box!".to_owned() + } +} + +/*pub async fn onboarding_receiver<S: Storage>(mut req: Request<ApplicationState<S>>) -> Result { + use serde_json::json; + + log::debug!("Entering onboarding receiver..."); + + // This cannot error out as the URL must be valid. Or there is something horribly wrong + // and we shouldn't serve this request anyway. + <dyn AsMut<tide::http::Request>>::as_mut(&mut req) + .url_mut() + .set_scheme("https") + .unwrap(); + + log::debug!("Parsing the body..."); + let body = req.body_json::<OnboardingData>().await?; + log::debug!("Body parsed!"); + let backend = &req.state().storage; + + #[cfg(any(not(debug_assertions), test))] + let me = req.url(); + #[cfg(all(debug_assertions, not(test)))] + let me = url::Url::parse("https://localhost:8080/").unwrap(); + + log::debug!("me value: {:?}", me); + + if get_post_from_database(backend, me.as_str(), None, &None) + .await + .is_ok() + { + return Err(FrontendError::with_code( + StatusCode::Forbidden, + "Onboarding is over. Are you trying to take over somebody's website?!", + ) + .into()); + } + info!("Onboarding new user: {}", me); + + let user = crate::indieauth::User::new(me.as_str(), "https://kittybox.fireburn.ru/", "create"); + + log::debug!("Setting the site name to {}", &body.blog_name); + backend + .set_setting("site_name", user.me.as_str(), &body.blog_name) + .await?; + + if body.user["type"][0] != "h-card" || body.first_post["type"][0] != "h-entry" { + return Err(FrontendError::with_code( + StatusCode::BadRequest, + "user and first_post should be h-card and h-entry", + ) + .into()); + } + info!("Validated body.user and body.first_post as microformats2"); + + let mut hcard = body.user; + let hentry = body.first_post; + + // Ensure the h-card's UID is set to the main page, so it will be fetchable. + hcard["properties"]["uid"] = json!([me.as_str()]); + // Normalize the h-card - note that it should preserve the UID we set here. + let (_, hcard) = crate::micropub::normalize_mf2(hcard, &user); + // The h-card is written directly - all the stuff in the Micropub's + // post function is just to ensure that the posts will be syndicated + // and inserted into proper feeds. Here, we don't have a need for this, + // since the h-card is DIRECTLY accessible via its own URL. + log::debug!("Saving the h-card..."); + backend.put_post(&hcard, me.as_str()).await?; + + log::debug!("Creating feeds..."); + for feed in body.feeds { + if feed.name.is_empty() || feed.slug.is_empty() { + continue; + }; + log::debug!("Creating feed {} with slug {}", &feed.name, &feed.slug); + let (_, feed) = crate::micropub::normalize_mf2( + json!({ + "type": ["h-feed"], + "properties": {"name": [feed.name], "mp-slug": [feed.slug]} + }), + &user, + ); + + backend.put_post(&feed, me.as_str()).await?; + } + log::debug!("Saving the h-entry..."); + // This basically puts the h-entry post through the normal creation process. + // We need to insert it into feeds and optionally send a notification to everywhere. + req.set_ext(user); + crate::micropub::post::new_post(req, hentry).await?; + + Ok(Response::builder(201).header("Location", "/").build()) +} +*/ + +fn request_uri() -> impl Filter<Extract = (String,), Error = warp::Rejection> + Copy { + crate::util::require_host() + .and(warp::path::full()) + .map(|host: Authority, path: FullPath| "https://".to_owned() + host.as_str() + path.as_str()) +} + +#[forbid(clippy::unwrap_used)] +pub fn homepage<D: Storage>(db: D, endpoints: IndiewebEndpoints) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + let inject_db = move || db.clone(); + warp::any() + .map(inject_db.clone()) + .and(crate::util::require_host()) + .and(warp::query()) + .and_then(|db: D, host: Authority, q: QueryParams| async move { + let path = format!("https://{}/", host); + let feed_path = format!("https://{}/feeds/main", host); + + match tokio::try_join!( + get_post_from_database(&db, &path, None, &None), + get_post_from_database(&db, &feed_path, q.after, &None) + ) { + Ok((hcard, hfeed)) => Ok(( + Some(hcard), + Some(hfeed), + StatusCode::OK + )), + Err(err) => { + if err.code == StatusCode::NOT_FOUND { + // signal for onboarding flow + Ok((None, None, err.code)) + } else { + Err(warp::reject::custom(err)) + } + } + } + }) + .and(warp::any().map(move || endpoints.clone())) + .and(crate::util::require_host()) + .and(warp::any().map(inject_db)) + .then(|content: (Option<serde_json::Value>, Option<serde_json::Value>, StatusCode), endpoints: IndiewebEndpoints, host: Authority, db: D| async move { + let owner = format!("https://{}/", host.as_str()); + let blog_name = db.get_setting(crate::database::Settings::SiteName, &owner).await + .unwrap_or_else(|_| "Kitty Box!".to_string()); + let feeds = db.get_channels(&owner).await.unwrap_or_default(); + match content { + (Some(card), Some(feed), StatusCode::OK) => { + Box::new(warp::reply::html(Template { + title: &blog_name, + blog_name: &blog_name, + endpoints: Some(endpoints), + feeds, + user: None, // TODO + content: MainPage { feed: &feed, card: &card }.to_string() + }.to_string())) as Box<dyn warp::Reply> + }, + (None, None, StatusCode::NOT_FOUND) => { + // TODO Onboarding + Box::new(warp::redirect::found( + hyper::Uri::from_static("/onboarding") + )) as Box<dyn warp::Reply> + } + _ => unreachable!() + } + }) +} + +pub fn onboarding<D: 'static + Storage>( + db: D, + endpoints: IndiewebEndpoints, + http: reqwest::Client +) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + let inject_db = move || db.clone(); + warp::get() + .map(move || warp::reply::html(Template { + title: "Kittybox - Onboarding", + blog_name: "Kittybox", + endpoints: Some(endpoints.clone()), + feeds: vec![], + user: None, + content: OnboardingPage {}.to_string() + }.to_string())) + .or(warp::post() + .and(crate::util::require_host()) + .and(warp::any().map(inject_db)) + .and(warp::body::json::<OnboardingData>()) + .and(warp::any().map(move || http.clone())) + .and_then(|host: warp::host::Authority, db: D, body: OnboardingData, http: reqwest::Client| async move { + let user_uid = format!("https://{}/", host.as_str()); + if db.post_exists(&user_uid).await.map_err(FrontendError::from)? { + + return Ok(warp::redirect(hyper::Uri::from_static("/"))); + } + let user = crate::indieauth::User::new(&user_uid, "https://kittybox.fireburn.ru/", "create"); + if body.user["type"][0] != "h-card" || body.first_post["type"][0] != "h-entry" { + return Err(FrontendError::with_code(StatusCode::BAD_REQUEST, "user and first_post should be an h-card and an h-entry").into()); + } + db.set_setting(crate::database::Settings::SiteName, user.me.as_str(), &body.blog_name) + .await + .map_err(FrontendError::from)?; + + let (_, hcard) = { + let mut hcard = body.user; + hcard["properties"]["uid"] = serde_json::json!([&user_uid]); + crate::micropub::normalize_mf2(hcard, &user) + }; + db.put_post(&hcard, &user_uid).await.map_err(FrontendError::from)?; + let (uid, post) = crate::micropub::normalize_mf2(body.first_post, &user); + crate::micropub::_post(user, uid, post, db, http).await.map_err(|e| { + FrontendError { + msg: "Error while posting the first post".to_string(), + source: Some(Box::new(e)), + code: StatusCode::INTERNAL_SERVER_ERROR + } + })?; + Ok::<_, warp::Rejection>(warp::redirect(hyper::Uri::from_static("/"))) + })) + +} + +#[forbid(clippy::unwrap_used)] +pub fn catchall<D: Storage>(db: D, endpoints: IndiewebEndpoints) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + let inject_db = move || db.clone(); + warp::any() + .map(inject_db.clone()) + .and(request_uri()) + .and(warp::query()) + .and_then(|db: D, path: String, query: QueryParams| async move { + get_post_from_database(&db, &path, query.after, &None).map_err(warp::reject::custom).await + }) + // Rendering pipeline + .and_then(|post: serde_json::Value| async move { + let post_name = &post["properties"]["name"][0].as_str().to_owned(); + match post["type"][0] + .as_str() + { + Some("h-entry") => Ok(( + post_name.unwrap_or("Note").to_string(), + kittybox_templates::Entry { post: &post }.to_string(), + StatusCode::OK + )), + Some("h-card") => Ok(( + post_name.unwrap_or("Contact card").to_string(), + kittybox_templates::VCard { card: &post }.to_string(), + StatusCode::OK + )), + Some("h-feed") => Ok(( + post_name.unwrap_or("Feed").to_string(), + kittybox_templates::Feed { feed: &post }.to_string(), + StatusCode::OK + )), + _ => Err(warp::reject::custom(FrontendError::with_code( + StatusCode::INTERNAL_SERVER_ERROR, + &format!("Couldn't render an unknown type: {}", post["type"][0]), + ))) + } + }) + .recover(|err: warp::Rejection| { + use warp::Rejection; + use futures_util::future; + if let Some(err) = err.find::<FrontendError>() { + return future::ok::<(String, String, StatusCode), Rejection>(( + format!("Error: HTTP {}", err.code().as_u16()), + ErrorPage { code: err.code(), msg: Some(err.msg().to_string()) }.to_string(), + err.code() + )); + } + future::err::<(String, String, StatusCode), Rejection>(err) + }) + .unify() + .and(warp::any().map(move || endpoints.clone())) + .and(crate::util::require_host()) + .and(warp::any().map(inject_db)) + .then(|content: (String, String, StatusCode), endpoints: IndiewebEndpoints, host: Authority, db: D| async move { + let owner = format!("https://{}/", host.as_str()); + let blog_name = db.get_setting(crate::database::Settings::SiteName, &owner).await + .unwrap_or_else(|_| "Kitty Box!".to_string()); + let feeds = db.get_channels(&owner).await.unwrap_or_default(); + let (title, content, code) = content; + warp::reply::with_status(warp::reply::html(Template { + title: &title, + blog_name: &blog_name, + endpoints: Some(endpoints), + feeds, + user: None, // TODO + content, + }.to_string()), code) + }) + +} + +static STYLE_CSS: &[u8] = include_bytes!("./style.css"); +static ONBOARDING_JS: &[u8] = include_bytes!("./onboarding.js"); +static ONBOARDING_CSS: &[u8] = include_bytes!("./onboarding.css"); + +static MIME_JS: &str = "application/javascript"; +static MIME_CSS: &str = "text/css"; + +fn _dispatch_static(name: &str) -> Option<(&'static [u8], &'static str)> { + match name { + "style.css" => Some((STYLE_CSS, MIME_CSS)), + "onboarding.js" => Some((ONBOARDING_JS, MIME_JS)), + "onboarding.css" => Some((ONBOARDING_CSS, MIME_CSS)), + _ => None + } +} + +pub fn static_files() -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Copy { + use futures_util::future; + + warp::get() + .and(warp::path::param() + .and_then(|filename: String| { + match _dispatch_static(&filename) { + Some((buf, content_type)) => future::ok( + warp::reply::with_header( + buf, "Content-Type", content_type + ) + ), + None => future::err(warp::reject()) + } + })) + .or(warp::head() + .and(warp::path::param() + .and_then(|filename: String| { + match _dispatch_static(&filename) { + Some((buf, content_type)) => future::ok( + warp::reply::with_header( + warp::reply::with_header( + warp::reply(), "Content-Type", content_type + ), + "Content-Length", buf.len() + ) + ), + None => future::err(warp::reject()) + } + }))) +} diff --git a/kittybox-rs/src/frontend/onboarding.css b/kittybox-rs/src/frontend/onboarding.css new file mode 100644 index 0000000..6f191b9 --- /dev/null +++ b/kittybox-rs/src/frontend/onboarding.css @@ -0,0 +1,33 @@ +form.onboarding > ul#progressbar > li.active { + font-weight: bold; +} +form.onboarding > ul#progressbar { + display: flex; list-style: none; justify-content: space-around; +} + +form.onboarding > fieldset > div.switch_card_buttons { + display: flex; + justify-content: space-between; + width: 100%; +} +form.onboarding > fieldset > div.switch_card_buttons button:last-child { + margin-left: auto; +} +.form_group, .multi_input { + display: flex; + flex-direction: column; +} +.multi_input { + align-items: start; +} +.multi_input > input { + width: 100%; + align-self: stretch; +} +form.onboarding > fieldset > .form_group + * { + margin-top: .75rem; +} +form.onboarding textarea { + width: 100%; + resize: vertical; +} diff --git a/kittybox-rs/src/frontend/onboarding.js b/kittybox-rs/src/frontend/onboarding.js new file mode 100644 index 0000000..7f9aa32 --- /dev/null +++ b/kittybox-rs/src/frontend/onboarding.js @@ -0,0 +1,87 @@ +const firstOnboardingCard = "intro"; + +function switchOnboardingCard(card) { + Array.from(document.querySelectorAll("form.onboarding > fieldset")).map(node => { + if (node.id == card) { + node.style.display = "block"; + } else { + node.style.display = "none"; + } + }); + + Array.from(document.querySelectorAll("form.onboarding > ul#progressbar > li")).map(node => { + if (node.id == card) { + node.classList.add("active") + } else { + node.classList.remove("active") + } + }) +}; + +window.kittybox_onboarding = { + switchOnboardingCard +}; + +document.querySelector("form.onboarding > ul#progressbar").style.display = ""; +switchOnboardingCard(firstOnboardingCard); + +function switchCardOnClick(event) { + switchOnboardingCard(event.target.dataset.card) +} + +function multiInputAddMore(event) { + let parent = event.target.parentElement; + let template = event.target.parentElement.querySelector("template").content.cloneNode(true); + parent.prepend(template); +} + +Array.from(document.querySelectorAll("form.onboarding > fieldset button.switch_card")).map(button => { + button.addEventListener("click", switchCardOnClick) +}) + +Array.from(document.querySelectorAll("form.onboarding > fieldset div.multi_input > button.add_more")).map(button => { + button.addEventListener("click", multiInputAddMore) + multiInputAddMore({ target: button }); +}) + +const form = document.querySelector("form.onboarding"); +console.log(form); +form.onsubmit = async (event) => { + console.log(event); + event.preventDefault(); + const form = event.target; + const json = { + user: { + type: ["h-card"], + properties: { + name: [form.querySelector("#hcard_name").value], + pronoun: Array.from(form.querySelectorAll("#hcard_pronouns")).map(input => input.value).filter(i => i != ""), + url: Array.from(form.querySelectorAll("#hcard_url")).map(input => input.value).filter(i => i != ""), + note: [form.querySelector("#hcard_note").value] + } + }, + first_post: { + type: ["h-entry"], + properties: { + content: [form.querySelector("#first_post_content").value] + } + }, + blog_name: form.querySelector("#blog_name").value, + feeds: Array.from(form.querySelectorAll(".multi_input#custom_feeds > fieldset.feed")).map(form => { + return { + name: form.querySelector("#feed_name").value, + slug: form.querySelector("#feed_slug").value + } + }).filter(feed => feed.name == "" || feed.slug == "") + }; + + await fetch("/", { + method: "POST", + body: JSON.stringify(json), + headers: { "Content-Type": "application/json" } + }).then(response => { + if (response.status == 201) { + window.location.href = window.location.href; + } + }) +} \ No newline at end of file diff --git a/kittybox-rs/src/frontend/style.css b/kittybox-rs/src/frontend/style.css new file mode 100644 index 0000000..109bba0 --- /dev/null +++ b/kittybox-rs/src/frontend/style.css @@ -0,0 +1,194 @@ +@import url('https://fonts.googleapis.com/css2?family=Caveat:wght@500&family=Lato&display=swap'); + +:root { + font-family: var(--font-normal); + --font-normal: 'Lato', sans-serif; + --font-accent: 'Caveat', cursive; + --type-scale: 1.250; + + --primary-accent: purple; + --secondary-accent: gold; +} +* { + box-sizing: border-box; +} +body { + margin: 0; +} +h1, h2, h3, h4, h5, h6 { + font-family: var(--font-accent); +} +.titanic { + font-size: 3.815rem +} +h1, .xxxlarge { + margin-top: 0; + margin-bottom: 0; + font-size: 3.052rem; +} +h2, .xxlarge {font-size: 2.441rem;} +h3, .xlarge {font-size: 1.953rem;} +h4, .larger {font-size: 1.563rem;} +h5, .large {font-size: 1.25rem;} +h6, .normal {font-size: 1rem;} +small, .small { font-size: 0.8em; } + +nav#headerbar { + background: var(--primary-accent); + color: whitesmoke; + border-bottom: .75rem solid var(--secondary-accent); + padding: .3rem; + vertical-align: center; + position: sticky; + top: 0; +} +nav#headerbar a#homepage { + font-weight: bolder; + font-family: var(--font-accent); + font-size: 2rem; +} +nav#headerbar > ul { + display: flex; + padding: inherit; + margin: inherit; + gap: .75em; +} +nav#headerbar > ul > li { + display: inline-flex; + flex-direction: column; + marker: none; + padding: inherit; + margin: inherit; + justify-content: center; +} +nav#headerbar > ul > li.shiftright { + margin-left: auto; +} +nav#headerbar a { + color: white; +} +body > main { + max-width: 60rem; + margin: auto; + padding: .75rem; +} +body > footer { + text-align: center; +} +.sidebyside { + display: flex; + flex-wrap: wrap; + gap: .75rem; + margin-top: .75rem; + margin-bottom: .75rem; +} +.sidebyside > * { + width: 100%; + margin-top: 0; + margin-bottom: 0; + border: .125rem solid black; + border-radius: .75rem; + padding: .75rem; + margin-top: 0 !important; + margin-bottom: 0 !important; + flex-basis: 28rem; + flex-grow: 1; +} +article > * + * { + margin-top: .75rem; +} +article > header { + padding-bottom: .75rem; + border-bottom: 1px solid gray; +} +article > footer { + border-top: 1px solid gray; +} +article.h-entry, article.h-feed, article.h-card, article.h-event { + border: 2px solid black; + border-radius: .75rem; + padding: .75rem; + margin-top: .75rem; + margin-bottom: .75rem; +} +.webinteractions > ul.counters { + display: inline-flex; + padding: inherit; + margin: inherit; + gap: .75em; + flex-wrap: wrap; +} +.webinteractions > ul.counters > li > .icon { + font-size: 1.5em; +} +.webinteractions > ul.counters > li { + display: inline-flex; + align-items: center; + gap: .5em; +} +article.h-entry > header.metadata ul { + padding-inline-start: unset; + margin: unset; +} +article.h-entry > header.metadata ul.categories { + flex-wrap: wrap; + display: inline-flex; + list-style-type: none; +} +article.h-entry > header.metadata ul.categories li { + display: inline; + margin-inline-start: unset; +} +article.h-entry > header.metadata ul li { + margin-inline-start: 2.5em; +} +article.h-entry .e-content pre { + border: 1px solid gray; + border-radius: 0.5em; + overflow-y: auto; + padding: 0.5em; +} +article.h-entry img.u-photo { + max-width: 80%; + max-height: 90vh; + display: block; + margin: auto; +} +article.h-entry img.u-photo + * { + margin-top: .75rem; +} +article.h-entry > header.metadata span + span::before { + content: " | " +} +li.p-category::before { + content: " #"; +} + +article.h-entry ul.categories { + gap: .2em; +} +article.h-card img.u-photo { + border-radius: 100%; + float: left; + height: 8rem; + border: 1px solid gray; + margin-right: .75em; + object-fit: cover; + aspect-ratio: 1; +} + +.mini-h-card img { + height: 2em; + display: inline-block; + border: 2px solid gray; + border-radius: 100%; + margin-right: 0.5rem; +} + +.mini-h-card * { + vertical-align: middle; +} + +.mini-h-card a { + text-decoration: none; +} diff --git a/kittybox-rs/src/index.html b/kittybox-rs/src/index.html new file mode 100644 index 0000000..1fc2a96 --- /dev/null +++ b/kittybox-rs/src/index.html @@ -0,0 +1,182 @@ +<html> + <head> + <meta charset="utf-8"> + <title>Kittybox-Micropub debug client</title> + <style type="text/css"> + * { + box-sizing: border-box; + } + :root { + font-family: sans-serif; + } + body { + margin: 0; + } + body > main { + margin: auto; + max-width: 1024px; + } + h1.header { + margin-top: 0.75em; + text-align: center; + } + fieldset + fieldset, fieldset + input, section + section, section + fieldset { + margin-top: 0.75em; + } + input[type="submit"] { + margin-left: auto; + display: block; + } + form > fieldset > section > label { + width: 100%; + display: block; + } + form > fieldset > section > input, form > fieldset > section > textarea { + width: 100%; + } + textarea { + min-height: 10em; + } + </style> + <script type="module"> + const form = document.getElementById("micropub"); + const channel_select_radio = document.getElementById("select_channels"); + channel_select_radio.onclick = async () => { + const channels = await query_channels() + if (channels !== undefined) { + populate_channel_list(channels) + } + } + const no_channel_radio = document.getElementById("no_channel"); + no_channel_radio.onclick = () => { + document.getElementById("channels").style.display = "none"; + const channel_list = document.getElementById("channels_target") + channel_list.innerHTML = ""; + } + function construct_body(form) { + return { + type: ["h-entry"], + properties: { + content: [form.elements.content.value], + name: form.elements.name.value ? [form.elements.name.value] : undefined, + category: form.elements.category.value ? form.elements.category.value.split(",").map(val => val.trim()) : undefined, + channel: form.elements.channel_select.value ? Array.from(form.elements.channel).map(i => i.checked ? i.value : false).filter(i => i) : undefined + } + } + } + + async function query_channels() { + const response = await fetch(form.action + "?q=config", { + headers: { + "Authorization": `Bearer ${form.elements.access_token.value}` + } + }) + + const config = await response.json(); + + return config["channels"] + } + + function populate_channel_list(channels) { + document.getElementById("channels").style.display = "block"; + const channel_list = document.getElementById("channels_target") + channel_list.innerHTML = ""; + channels.forEach((channel) => { + const template = document.getElementById("channel_selector").content.cloneNode(true) + const input = template.querySelector("input") + const label = template.querySelector("label") + input.id = `channel_selector_option_${channel.uid}` + input.value = channel.uid + label.for = input.id + label.innerHTML = `<a href="${channel.uid}">${channel.name}</a>` + + channel_list.appendChild(template) + }) + } + + form.onsubmit = async (event) => { + event.preventDefault() + console.log(JSON.stringify(construct_body(form))) + try { + const response = await fetch(form.action, { + method: form.method, + headers: { + "Authorization": `Bearer ${form.elements.access_token.value}`, + "Content-Type": "application/json" + }, + body: JSON.stringify(construct_body(form)) + }) + if (response.status != 201 || response.status != 202) { + console.error(await response.json()); + } + if (response.headers.get("Location")) { + window.location.href = response.headers.get("Location"); + } + } catch (e) { + console.error(e) + } + } + </script> + </head> + <body> + <h1 class="header">Kittybox-Micropub debug client</h1> + + <main> + <p> + In a pinch? Lost your Micropub client, but need to make a quick announcement? + Worry not, the debug client has your back. <i>I just hope you have a spare Micropub token stored somewhere like I do...</i> + </p> + + <form action="/micropub" method="POST" id="micropub"> + <fieldset> + <legend>Authorization details</legend> + <section> + <label for="access_token">Access token:</label> + <input id="access_token" name="access_token" type="password"> + + <p><a href="https://gimme-a-token.5eb.nl/" target="_blank">Get an access token (will open in a new tab)</a></p> + </section> + </fieldset> + <fieldset> + <legend>Post details:</legend> + <section> + <label for="name">Name (leave blank for an unnamed post):</label> + <input id="name" type="text"> + </section> + <section> + <label for="content">Content:</label> + <textarea id="content" placeholder="Your post's text goes here"></textarea> + </section> + <section> + <label for="category">Categories (separeted by commas):</label> + <input id="category" type="text"> + </section> + <fieldset> + <legend>Channels</legend> + <section> + <input type="radio" id="no_channel" name="channel_select" checked value=""> + <label for="no_channel">Default channel only</label> + </section> + + <section> + <input type="radio" id="select_channels" name="channel_select" value="on"> + <label for="select_channels">Select channels manually</label> + </section> + + <fieldset id="channels" style="display: none"> + <legend>Available channels:</legend> + <template id="channel_selector"> + <section> + <input type="checkbox" name="channel" id="" value=""> + <label for=""></label> + </section> + </template> + <div id="channels_target"></div> + </fieldset> + </fieldset> + </fieldset> + <input type="submit"> + </form> + </main> + </body> +</html> \ No newline at end of file diff --git a/kittybox-rs/src/indieauth.rs b/kittybox-rs/src/indieauth.rs new file mode 100644 index 0000000..57c0301 --- /dev/null +++ b/kittybox-rs/src/indieauth.rs @@ -0,0 +1,291 @@ +use url::Url; +use serde::{Serialize, Deserialize}; +use warp::{Filter, Rejection, reject::MissingHeader}; + +#[derive(Deserialize, Serialize, Debug, PartialEq, Clone)] +pub struct User { + pub me: Url, + pub client_id: Url, + scope: String, +} + +#[derive(Debug, Clone, PartialEq, Copy)] +pub enum ErrorKind { + PermissionDenied, + NotAuthorized, + TokenEndpointError, + JsonParsing, + Other +} + +#[derive(Deserialize, Serialize, Debug, Clone)] +pub struct TokenEndpointError { + error: String, + error_description: String +} + +#[derive(Debug)] +pub struct IndieAuthError { + source: Option<Box<dyn std::error::Error + Send + Sync>>, + kind: ErrorKind, + msg: String +} + +impl std::error::Error for IndieAuthError { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + self.source.as_ref().map(|e| e.as_ref() as &dyn std::error::Error) + } +} + +impl std::fmt::Display for IndieAuthError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match match self.kind { + ErrorKind::TokenEndpointError => write!(f, "token endpoint returned an error: "), + ErrorKind::JsonParsing => write!(f, "error while parsing token endpoint response: "), + ErrorKind::NotAuthorized => write!(f, "token endpoint did not recognize the token: "), + ErrorKind::PermissionDenied => write!(f, "token endpoint rejected the token: "), + ErrorKind::Other => write!(f, "token endpoint communication error: "), + } { + Ok(_) => write!(f, "{}", self.msg), + Err(err) => Err(err) + } + } +} + +impl From<serde_json::Error> for IndieAuthError { + fn from(err: serde_json::Error) -> Self { + Self { + msg: format!("{}", err), + source: Some(Box::new(err)), + kind: ErrorKind::JsonParsing, + } + } +} + +impl From<reqwest::Error> for IndieAuthError { + fn from(err: reqwest::Error) -> Self { + Self { + msg: format!("{}", err), + source: Some(Box::new(err)), + kind: ErrorKind::Other, + } + } +} + +impl warp::reject::Reject for IndieAuthError {} + +impl User { + pub fn check_scope(&self, scope: &str) -> bool { + self.scopes().any(|i| i == scope) + } + pub fn scopes(&self) -> std::str::SplitAsciiWhitespace<'_> { + self.scope.split_ascii_whitespace() + } + pub fn new(me: &str, client_id: &str, scope: &str) -> Self { + Self { + me: Url::parse(me).unwrap(), + client_id: Url::parse(client_id).unwrap(), + scope: scope.to_string(), + } + } +} + +pub fn require_token(token_endpoint: String, http: reqwest::Client) -> impl Filter<Extract = (User,), Error = Rejection> + Clone { + // It might be OK to panic here, because we're still inside the initialisation sequence for now. + // Proper error handling on the top of this should be used though. + let token_endpoint_uri = url::Url::parse(&token_endpoint) + .expect("Couldn't parse the token endpoint URI!"); + warp::any() + .map(move || token_endpoint_uri.clone()) + .and(warp::any().map(move || http.clone())) + .and(warp::header::<String>("Authorization").recover(|err: Rejection| async move { + if err.find::<MissingHeader>().is_some() { + Err(IndieAuthError { + source: None, + msg: "No Authorization header provided.".to_string(), + kind: ErrorKind::NotAuthorized + }.into()) + } else { + Err(err) + } + }).unify()) + .and_then(|token_endpoint, http: reqwest::Client, token| async move { + use hyper::StatusCode; + + match http + .get(token_endpoint) + .header("Authorization", token) + .header("Accept", "application/json") + .send() + .await + { + Ok(res) => match res.status() { + StatusCode::OK => match res.json::<serde_json::Value>().await { + Ok(json) => match serde_json::from_value::<User>(json.clone()) { + Ok(user) => Ok(user), + Err(err) => { + if let Some(false) = json["active"].as_bool() { + Err(IndieAuthError { + source: None, + kind: ErrorKind::NotAuthorized, + msg: "The token is not active for this user.".to_owned() + }.into()) + } else { + Err(IndieAuthError::from(err).into()) + } + } + } + Err(err) => Err(IndieAuthError::from(err).into()) + }, + StatusCode::BAD_REQUEST => { + match res.json::<TokenEndpointError>().await { + Ok(err) => { + if err.error == "unauthorized" { + Err(IndieAuthError { + source: None, + kind: ErrorKind::NotAuthorized, + msg: err.error_description + }.into()) + } else { + Err(IndieAuthError { + source: None, + kind: ErrorKind::TokenEndpointError, + msg: err.error_description + }.into()) + } + }, + Err(err) => Err(IndieAuthError::from(err).into()) + } + }, + _ => Err(IndieAuthError { + source: None, + msg: format!("Token endpoint returned {}", res.status()), + kind: ErrorKind::TokenEndpointError + }.into()) + }, + Err(err) => Err(warp::reject::custom(IndieAuthError::from(err))) + } + }) +} + +#[cfg(test)] +mod tests { + use super::{User, IndieAuthError, require_token}; + use httpmock::prelude::*; + + #[test] + fn user_scopes_are_checkable() { + let user = User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ); + + assert!(user.check_scope("create")); + assert!(!user.check_scope("delete")); + } + + #[inline] + fn get_http_client() -> reqwest::Client { + reqwest::Client::new() + } + + #[tokio::test] + async fn test_require_token_with_token() { + let server = MockServer::start_async().await; + server.mock_async(|when, then| { + when.path("/token") + .header("Authorization", "Bearer token"); + + then.status(200) + .header("Content-Type", "application/json") + .json_body(serde_json::to_value(User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + )).unwrap()); + }).await; + + let filter = require_token(server.url("/token"), get_http_client()); + + let res: User = warp::test::request() + .path("/") + .header("Authorization", "Bearer token") + .filter(&filter) + .await + .unwrap(); + + assert_eq!(res.me.as_str(), "https://fireburn.ru/") + } + + #[tokio::test] + async fn test_require_token_fake_token() { + let server = MockServer::start_async().await; + server.mock_async(|when, then| { + when.path("/refuse_token"); + + then.status(200) + .json_body(serde_json::json!({"active": false})); + }).await; + + let filter = require_token(server.url("/refuse_token"), get_http_client()); + + let res = warp::test::request() + .path("/") + .header("Authorization", "Bearer token") + .filter(&filter) + .await + .unwrap_err(); + + let err: &IndieAuthError = res.find().unwrap(); + assert_eq!(err.kind, super::ErrorKind::NotAuthorized); + } + + #[tokio::test] + async fn test_require_token_no_token() { + let server = MockServer::start_async().await; + let mock = server.mock_async(|when, then| { + when.path("/should_never_be_called"); + + then.status(500); + }).await; + let filter = require_token(server.url("/should_never_be_called"), get_http_client()); + + let res = warp::test::request() + .path("/") + .filter(&filter) + .await + .unwrap_err(); + + let err: &IndieAuthError = res.find().unwrap(); + assert_eq!(err.kind, super::ErrorKind::NotAuthorized); + + mock.assert_hits_async(0).await; + } + + #[tokio::test] + async fn test_require_token_400_error_unauthorized() { + let server = MockServer::start_async().await; + server.mock_async(|when, then| { + when.path("/refuse_token_with_400"); + + then.status(400) + .json_body(serde_json::json!({ + "error": "unauthorized", + "error_description": "The token provided was malformed" + })); + }).await; + + let filter = require_token(server.url("/refuse_token_with_400"), get_http_client()); + + let res = warp::test::request() + .path("/") + .header("Authorization", "Bearer token") + .filter(&filter) + .await + .unwrap_err(); + + let err: &IndieAuthError = res.find().unwrap(); + assert_eq!(err.kind, super::ErrorKind::NotAuthorized); + } +} diff --git a/kittybox-rs/src/lib.rs b/kittybox-rs/src/lib.rs new file mode 100644 index 0000000..1800b5b --- /dev/null +++ b/kittybox-rs/src/lib.rs @@ -0,0 +1,103 @@ +#![forbid(unsafe_code)] +#![warn(clippy::todo)] + +pub mod metrics; +/// Database abstraction layer for Kittybox, allowing the CMS to work with any kind of database. +pub mod database; +pub mod micropub; +pub mod media; +pub mod indieauth; +pub mod frontend; + +pub(crate) mod rejections { + #[derive(Debug)] + pub(crate) struct UnacceptableContentType; + impl warp::reject::Reject for UnacceptableContentType {} + + #[derive(Debug)] + pub(crate) struct HostHeaderUnset; + impl warp::reject::Reject for HostHeaderUnset {} +} + +pub static MICROPUB_CLIENT: &[u8] = include_bytes!("./index.html"); + +pub mod util { + use warp::{Filter, host::Authority}; + use super::rejections; + + pub fn require_host() -> impl Filter<Extract = (Authority,), Error = warp::Rejection> + Copy { + warp::host::optional() + .and_then(|authority: Option<Authority>| async move { + authority.ok_or_else(|| warp::reject::custom(rejections::HostHeaderUnset)) + }) + } + + pub fn parse_accept() -> impl Filter<Extract = (http_types::Mime,), Error = warp::Rejection> + Copy { + warp::header::value("Accept").and_then(|accept: warp::http::HeaderValue| async move { + let mut accept: http_types::content::Accept = { + // This is unneccesarily complicated because I want to reuse some http-types parsing + // and http-types has constructor for Headers private so I need to construct + // a mock Request to reason about headers... this is so dumb wtf + // so much for zero-cost abstractions, huh + let bytes: &[u8] = accept.as_bytes(); + let value = http_types::headers::HeaderValue::from_bytes(bytes.to_vec()).unwrap(); + let values: http_types::headers::HeaderValues = vec![value].into(); + let mut request = http_types::Request::new(http_types::Method::Get, "http://example.com/"); + request.append_header("Accept".parse::<http_types::headers::HeaderName>().unwrap(), &values); + http_types::content::Accept::from_headers(&request).unwrap().unwrap() + }; + + // This code is INCREDIBLY dumb, honestly... + // why did I even try to use it? + // TODO vendor this stuff in so I can customize it + match accept.negotiate(&[ + "text/html; encoding=\"utf-8\"".into(), + "application/json; encoding=\"utf-8\"".into(), + "text/html".into(), + "application/json".into(), + + ]) { + Ok(mime) => { + Ok(http_types::Mime::from(mime.value().as_str())) + }, + Err(err) => { + log::error!("Content-Type negotiation error: {:?}, accepting: {:?}", err, accept); + Err(warp::reject::custom(rejections::UnacceptableContentType)) + } + } + }) + } + + mod tests { + #[tokio::test] + async fn test_require_host_with_host() { + use super::require_host; + + let filter = require_host(); + + let res = warp::test::request() + .path("/") + .header("Host", "localhost:8080") + .filter(&filter) + .await + .unwrap(); + + assert_eq!(res, "localhost:8080"); + + } + + #[tokio::test] + async fn test_require_host_no_host() { + use super::require_host; + + let filter = require_host(); + + let res = warp::test::request() + .path("/") + .filter(&filter) + .await; + + assert!(res.is_err()); + } + } +} diff --git a/kittybox-rs/src/main.rs b/kittybox-rs/src/main.rs new file mode 100644 index 0000000..eb70885 --- /dev/null +++ b/kittybox-rs/src/main.rs @@ -0,0 +1,256 @@ +use log::{debug, error, info}; +use std::{convert::Infallible, env, time::Duration}; +use url::Url; +use warp::{Filter, host::Authority}; + +#[tokio::main] +async fn main() { + // TODO turn into a feature so I can enable and disable it + #[cfg(debug_assertions)] + console_subscriber::init(); + + // TODO use tracing instead of log + let logger_env = env_logger::Env::new().filter_or("RUST_LOG", "info"); + env_logger::init_from_env(logger_env); + + info!("Starting the kittybox server..."); + + let backend_uri: String = match env::var("BACKEND_URI") { + Ok(val) => { + debug!("Backend URI: {}", val); + val + } + Err(_) => { + error!("BACKEND_URI is not set, cannot find a database"); + std::process::exit(1); + } + }; + let token_endpoint: Url = match env::var("TOKEN_ENDPOINT") { + Ok(val) => { + debug!("Token endpoint: {}", val); + match Url::parse(&val) { + Ok(val) => val, + _ => { + error!("Token endpoint URL cannot be parsed, aborting."); + std::process::exit(1) + } + } + } + Err(_) => { + error!("TOKEN_ENDPOINT is not set, will not be able to authorize users!"); + std::process::exit(1) + } + }; + let authorization_endpoint: Url = match env::var("AUTHORIZATION_ENDPOINT") { + Ok(val) => { + debug!("Auth endpoint: {}", val); + match Url::parse(&val) { + Ok(val) => val, + _ => { + error!("Authorization endpoint URL cannot be parsed, aborting."); + std::process::exit(1) + } + } + } + Err(_) => { + error!("AUTHORIZATION_ENDPOINT is not set, will not be able to confirm token and ID requests using IndieAuth!"); + std::process::exit(1) + } + }; + + //let internal_token: Option<String> = env::var("KITTYBOX_INTERNAL_TOKEN").ok(); + + /*let cookie_secret: String = match env::var("COOKIE_SECRET").ok() { + Some(value) => value, + None => { + if let Ok(filename) = env::var("COOKIE_SECRET_FILE") { + use tokio::io::AsyncReadExt; + + let mut file = tokio::fs::File::open(filename).await.map_err(|e| { + error!("Couldn't open the cookie secret file: {}", e); + std::process::exit(1); + }).unwrap(); + let mut temp_string = String::new(); + file.read_to_string(&mut temp_string).await.map_err(|e| { + error!("Couldn't read the cookie secret from file: {}", e); + std::process::exit(1); + }).unwrap(); + + temp_string + } else { + error!("COOKIE_SECRET or COOKIE_SECRET_FILE is not set, will not be able to log in users securely!"); + std::process::exit(1); + } + } + };*/ + + let listen_at = match env::var("SERVE_AT") + .ok() + .unwrap_or_else(|| "[::]:8080".to_string()) + .parse::<std::net::SocketAddr>() { + Ok(addr) => addr, + Err(e) => { + error!("Cannot parse SERVE_AT: {}", e); + std::process::exit(1); + } + }; + + // This thing handles redirects automatically but is type-incompatible with hyper::Client + // Bonus: less generics to be aware of, this thing hides its complexity + let http: reqwest::Client = { + #[allow(unused_mut)] + let mut builder = reqwest::Client::builder() + .user_agent(concat!( + env!("CARGO_PKG_NAME"), + "/", + env!("CARGO_PKG_VERSION") + )); + // TODO: add a root certificate if there's an environment variable pointing at it + //builder = builder.add_root_certificate(reqwest::Certificate::from_pem(todo!())); + + builder.build().unwrap() + }; + + if backend_uri.starts_with("redis") { + println!("The Redis backend is deprecated."); + std::process::exit(1); + } else if backend_uri.starts_with("file") { + + let database = { + let folder = backend_uri.strip_prefix("file://").unwrap(); + let path = std::path::PathBuf::from(folder); + match kittybox::database::FileStorage::new(path).await { + Ok(db) => db, + Err(err) => { + error!("Error creating database: {:?}", err); + std::process::exit(1); + } + } + }; + + let endpoints = kittybox::frontend::IndiewebEndpoints { + authorization_endpoint: authorization_endpoint.to_string(), + token_endpoint: token_endpoint.to_string(), + webmention: None, + microsub: None, + }; + + let homepage = warp::get() + .and(warp::path::end()) + .and(kittybox::frontend::homepage(database.clone(), endpoints.clone())); + + let onboarding = warp::path("onboarding") + .and(warp::path::end()) + .and(kittybox::frontend::onboarding( + database.clone(), + endpoints.clone(), + http.clone() + )); + + let micropub = warp::path("micropub") + .and(warp::path::end() + .and(kittybox::micropub::micropub( + database.clone(), + token_endpoint.to_string(), + http.clone() + )) + .or(warp::get() + .and(warp::path("client")) + .and(warp::path::end()) + .map(|| warp::reply::html(kittybox::MICROPUB_CLIENT)))); + + let media = warp::path("media") + .and(warp::path::end() + .and(kittybox::media::media()) + .or(kittybox::util::require_host() + .and(warp::path::param()) + .map(|_host: Authority, path: String| format!("media file {}", path)))); + + // TODO remember how login logic works because I forgor + let login = warp::path("login") + .and(warp::path("callback") + .map(|| "callback!") + // TODO form on GET and handler on POST + .or(warp::path::end().map(|| "login page!"))); + + // TODO prettier error response + let coffee = warp::path("coffee") + .map(|| warp::reply::with_status("I'm a teapot!", warp::http::StatusCode::IM_A_TEAPOT)); + + let static_files = warp::path("static") + .and(kittybox::frontend::static_files()); + + let catchall = kittybox::frontend::catchall( + database.clone(), + endpoints.clone() + ); + + let health = warp::path("health").and(warp::path::end()).map(|| "OK"); + let metrics = warp::path("metrics").and(warp::path::end()).map(kittybox::metrics::gather); + + let app = homepage + .or(onboarding) + .or(metrics + .or(health)) + .or(static_files) + .or(login) + .or(coffee) + .or(micropub) + .or(media) + .or(catchall) + .with(warp::log("kittybox")) + .with(kittybox::metrics::metrics(vec![ + "health".to_string(), + "micropub".to_string(), + "static".to_string(), + "media".to_string(), + "metrics".to_string() + ])) + ; + + let svc = warp::service(app); + + let mut listenfd = listenfd::ListenFd::from_env(); + let tcp_listener: std::net::TcpListener = if let Ok(Some(listener)) = listenfd.take_tcp_listener(0) { + listener + } else { + std::net::TcpListener::bind(listen_at).unwrap() + }; + tcp_listener.set_nonblocking(true).unwrap(); + + info!("Listening on {}", tcp_listener.local_addr().unwrap()); + let server = hyper::server::Server::from_tcp(tcp_listener) + .unwrap() + .tcp_keepalive(Some(Duration::from_secs(30 * 60))) + .serve(hyper::service::make_service_fn(move |_| { + let service = svc.clone(); + async move { + Ok::<_, Infallible>(service) + } + })) + .with_graceful_shutdown(async move { + // Defer to C-c handler whenever we're not on Unix + // TODO consider using a diverging future here + #[cfg(not(unix))] + return tokio::signal::ctrl_c().await.unwrap(); + #[cfg(unix)] + { + use tokio::signal::unix::{signal, SignalKind}; + + signal(SignalKind::terminate()) + .unwrap() + .recv() + .await + .unwrap() + } + }); + + if let Err(err) = server.await { + error!("Error serving requests: {}", err); + std::process::exit(1); + } + } else { + println!("Unknown backend, not starting."); + std::process::exit(1); + } +} diff --git a/kittybox-rs/src/media/mod.rs b/kittybox-rs/src/media/mod.rs new file mode 100644 index 0000000..0d46e0c --- /dev/null +++ b/kittybox-rs/src/media/mod.rs @@ -0,0 +1,46 @@ +use futures_util::StreamExt; +use bytes::buf::Buf; +use warp::{Filter, Rejection, Reply, multipart::{FormData, Part}}; + +pub fn query() -> impl Filter<Extract = (impl Reply,), Error = Rejection> + Clone { + warp::get() + .and(crate::util::require_host()) + .map(|host| "media endpoint query...") +} + +pub fn options() -> impl Filter<Extract = (impl Reply,), Error = Rejection> + Clone { + warp::options() + .map(|| warp::reply::json::<Option<()>>(&None)) + .with(warp::reply::with::header("Allow", "GET, POST")) +} + +pub fn upload() -> impl Filter<Extract = (impl Reply,), Error = Rejection> + Clone { + warp::post() + .and(crate::util::require_host()) + .and(warp::multipart::form().max_length(1024*1024*150/*mb*/)) + .and_then(|host, mut form: FormData| async move { + // TODO get rid of the double unwrap() here + let file: Part = form.next().await.unwrap().unwrap(); + log::debug!("Uploaded: {:?}, type: {:?}", file.filename(), file.content_type()); + + let mut data = file.stream(); + while let Some(buf) = data.next().await { + // TODO save it into a file + log::debug!("buffer length: {:?}", buf.map(|b| b.remaining())); + } + Ok::<_, warp::Rejection>(warp::reply::with_header( + warp::reply::with_status( + "", + warp::http::StatusCode::CREATED + ), + "Location", + "./awoo.png" + )) + }) +} + +pub fn media() -> impl Filter<Extract = (impl Reply,), Error = Rejection> + Clone { + upload() + .or(query()) + .or(options()) +} diff --git a/kittybox-rs/src/metrics.rs b/kittybox-rs/src/metrics.rs new file mode 100644 index 0000000..48f5d9b --- /dev/null +++ b/kittybox-rs/src/metrics.rs @@ -0,0 +1,21 @@ +#![allow(unused_imports, dead_code)] +use async_trait::async_trait; +use lazy_static::lazy_static; +use std::time::{Duration, Instant}; +use prometheus::Encoder; + +// TODO: Vendor in the Metrics struct from warp_prometheus and rework the path matching algorithm + +pub fn metrics(path_includes: Vec<String>) -> warp::log::Log<impl Fn(warp::log::Info) + Clone> { + let metrics = warp_prometheus::Metrics::new(prometheus::default_registry(), &path_includes); + warp::log::custom(move |info| metrics.http_metrics(info)) +} + +pub fn gather() -> Vec<u8> { + let mut buffer: Vec<u8> = vec![]; + let encoder = prometheus::TextEncoder::new(); + let metric_families = prometheus::gather(); + encoder.encode(&metric_families, &mut buffer).unwrap(); + + buffer +} diff --git a/kittybox-rs/src/micropub/get.rs b/kittybox-rs/src/micropub/get.rs new file mode 100644 index 0000000..718714a --- /dev/null +++ b/kittybox-rs/src/micropub/get.rs @@ -0,0 +1,82 @@ +use crate::database::{MicropubChannel, Storage}; +use crate::indieauth::User; +use crate::ApplicationState; +use tide::prelude::{json, Deserialize}; +use tide::{Request, Response, Result}; + +#[derive(Deserialize)] +struct QueryOptions { + q: String, + url: Option<String>, +} + +pub async fn get_handler<Backend>(req: Request<ApplicationState<Backend>>) -> Result +where + Backend: Storage + Send + Sync, +{ + let user = req.ext::<User>().unwrap(); + let backend = &req.state().storage; + let media_endpoint = &req.state().media_endpoint; + let query = req.query::<QueryOptions>().unwrap_or(QueryOptions { + q: "".to_string(), + url: None, + }); + match &*query.q { + "config" => { + let channels: Vec<MicropubChannel>; + match backend.get_channels(user.me.as_str()).await { + Ok(chans) => channels = chans, + Err(err) => return Ok(err.into()) + } + Ok(Response::builder(200).body(json!({ + "q": ["source", "config", "channel"], + "channels": channels, + "media-endpoint": media_endpoint + })).build()) + }, + "channel" => { + let channels: Vec<MicropubChannel>; + match backend.get_channels(user.me.as_str()).await { + Ok(chans) => channels = chans, + Err(err) => return Ok(err.into()) + } + Ok(Response::builder(200).body(json!(channels)).build()) + } + "source" => { + if user.check_scope("create") || user.check_scope("update") || user.check_scope("delete") || user.check_scope("undelete") { + if let Some(url) = query.url { + match backend.get_post(&url).await { + Ok(post) => if let Some(post) = post { + Ok(Response::builder(200).body(post).build()) + } else { + Ok(Response::builder(404).build()) + }, + Err(err) => Ok(err.into()) + } + } else { + Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "Please provide `url`." + })).build()) + } + } else { + Ok(Response::builder(401).body(json!({ + "error": "insufficient_scope", + "error_description": "You don't have the required scopes to proceed.", + "scope": "update" + })).build()) + } + }, + // TODO: ?q=food, ?q=geo, ?q=contacts + // Depends on indexing posts + // Errors + "" => Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "No ?q= parameter specified. Try ?q=config maybe?" + })).build()), + _ => Ok(Response::builder(400).body(json!({ + "error": "invalid_request", + "error_description": "Unsupported ?q= query. Try ?q=config and see the q array for supported values." + })).build()) + } +} diff --git a/kittybox-rs/src/micropub/mod.rs b/kittybox-rs/src/micropub/mod.rs new file mode 100644 index 0000000..f426c77 --- /dev/null +++ b/kittybox-rs/src/micropub/mod.rs @@ -0,0 +1,964 @@ +use std::convert::Infallible; +use std::fmt::Display; +use either::Either; +use log::{info, warn, error}; +use warp::http::StatusCode; +use warp::{Filter, Rejection, reject::InvalidQuery}; +use serde_json::json; +use serde::{Serialize, Deserialize}; +use crate::database::{MicropubChannel, Storage, StorageError}; +use crate::indieauth::User; +use crate::micropub::util::form_to_mf2_json; + +#[derive(Serialize, Deserialize, Debug, PartialEq)] +#[serde(rename_all = "kebab-case")] +enum QueryType { + Source, + Config, + Channel, + SyndicateTo +} + +#[derive(Serialize, Deserialize, Debug)] +struct MicropubQuery { + q: QueryType, + url: Option<String> +} + +#[derive(Serialize, Deserialize, PartialEq, Debug)] +#[serde(rename_all = "snake_case")] +enum ErrorType { + AlreadyExists, + Forbidden, + InternalServerError, + InvalidRequest, + InvalidScope, + NotAuthorized, + NotFound, + UnsupportedMediaType +} + +#[derive(Serialize, Deserialize, Debug)] +pub(crate) struct MicropubError { + error: ErrorType, + error_description: String +} + +impl From<StorageError> for MicropubError { + fn from(err: StorageError) -> Self { + Self { + error: match err.kind() { + crate::database::ErrorKind::NotFound => ErrorType::NotFound, + _ => ErrorType::InternalServerError + }, + error_description: format!("Backend error: {}", err) + } + } +} + +impl std::error::Error for MicropubError {} + +impl Display for MicropubError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("Micropub error: ")?; + f.write_str(&self.error_description) + } +} + +impl From<&MicropubError> for StatusCode { + fn from(err: &MicropubError) -> Self { + use ErrorType::*; + match err.error { + AlreadyExists => StatusCode::CONFLICT, + Forbidden => StatusCode::FORBIDDEN, + InternalServerError => StatusCode::INTERNAL_SERVER_ERROR, + InvalidRequest => StatusCode::BAD_REQUEST, + InvalidScope => StatusCode::UNAUTHORIZED, + NotAuthorized => StatusCode::UNAUTHORIZED, + NotFound => StatusCode::NOT_FOUND, + UnsupportedMediaType => StatusCode::UNSUPPORTED_MEDIA_TYPE, + } + } +} +impl From<MicropubError> for StatusCode { + fn from(err: MicropubError) -> Self { + (&err).into() + } +} + +impl From<serde_json::Error> for MicropubError { + fn from(err: serde_json::Error) -> Self { + use ErrorType::*; + Self { + error: InvalidRequest, + error_description: err.to_string() + } + } +} + +impl MicropubError { + fn new(error: ErrorType, error_description: &str) -> Self { + Self { + error, + error_description: error_description.to_owned() + } + } +} + +impl warp::reject::Reject for MicropubError {} + +mod post; +pub(crate) use post::normalize_mf2; + +mod util { + use serde_json::json; + + pub(crate) fn form_to_mf2_json(form: Vec<(String, String)>) -> serde_json::Value { + let mut mf2 = json!({"type": [], "properties": {}}); + for (k, v) in form { + if k == "h" { + mf2["type"] + .as_array_mut() + .unwrap() + .push(json!("h-".to_string() + &v)); + } else if k != "access_token" { + let key = k.strip_suffix("[]").unwrap_or(&k); + match mf2["properties"][key].as_array_mut() { + Some(prop) => prop.push(json!(v)), + None => mf2["properties"][key] = json!([v]), + } + } + } + if mf2["type"].as_array().unwrap().is_empty() { + mf2["type"].as_array_mut().unwrap().push(json!("h-entry")); + } + mf2 + } + + #[cfg(test)] + mod tests { + use serde_json::json; + #[test] + fn test_form_to_mf2() { + assert_eq!( + super::form_to_mf2_json( + serde_urlencoded::from_str( + "h=entry&content=something%20interesting" + ).unwrap() + ), + json!({ + "type": ["h-entry"], + "properties": { + "content": ["something interesting"] + } + }) + ) + } + } +} + +#[derive(Debug)] +struct FetchedPostContext { + url: url::Url, + mf2: serde_json::Value, + webmention: Option<url::Url> +} + +fn populate_reply_context(mf2: &serde_json::Value, prop: &str, ctxs: &[FetchedPostContext]) -> Option<serde_json::Value> { + if mf2["properties"][prop].is_array() { + Some(json!( + mf2["properties"][prop] + .as_array() + // Safe to unwrap because we checked its existence and type + // And it's not like we can make it disappear without unsafe code + .unwrap() + .iter() + // This seems to be O(n^2) and I don't like it. + // Nevertheless, I lack required knowledge to optimize it. Also, it works, so... + .map(|i| ctxs.iter() + .find(|ctx| Some(ctx.url.as_str()) == i.as_str()) + .and_then(|ctx| ctx.mf2["items"].get(0)) + .or(Some(i)) + .unwrap()) + .collect::<Vec<&serde_json::Value>>() + )) + } else { + None + } +} + +// TODO actually save the post to the database and schedule post-processing +pub(crate) async fn _post<D: 'static + Storage>( + user: crate::indieauth::User, + uid: String, + mf2: serde_json::Value, + db: D, + http: reqwest::Client +) -> Result<impl warp::Reply, MicropubError> { + // Here, we have the following guarantees: + // - The user is the same user for this host (guaranteed by ensure_same_user) + // - The MF2-JSON document is normalized (guaranteed by normalize_mf2)\ + // - The MF2-JSON document contains a UID + // - The MF2-JSON document's URL list contains its UID + // - The MF2-JSON document's "content" field contains an HTML blob, if present + // - The MF2-JSON document's publishing datetime is present + // - The MF2-JSON document's target channels are set + // - The MF2-JSON document's author is set + + // Security check! Do we have an oAuth2 scope to proceed? + if !user.check_scope("create") { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "Not enough privileges - try acquiring the \"create\" scope.".to_owned() + }); + } + + // Security check #2! Are we posting to our own website? + if !uid.starts_with(user.me.as_str()) || mf2["properties"]["channel"] + .as_array() + .unwrap_or(&vec![]) + .iter() + .any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) + { + return Err(MicropubError { + error: ErrorType::Forbidden, + error_description: "You're posting to a website that's not yours.".to_owned() + }); + } + + // Security check #3! Are we overwriting an existing document? + if db.post_exists(&uid).await? { + return Err(MicropubError { + error: ErrorType::AlreadyExists, + error_description: "UID clash was detected, operation aborted.".to_owned() + }); + } + + // Save the post + db.put_post(&mf2, user.me.as_str()).await?; + + let mut channels = mf2["properties"]["channel"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap_or("")) + .filter(|i| !i.is_empty()); + + let default_channel = user.me.join(post::DEFAULT_CHANNEL_PATH).unwrap().to_string(); + let vcards_channel = user.me.join(post::CONTACTS_CHANNEL_PATH).unwrap().to_string(); + let food_channel = user.me.join(post::FOOD_CHANNEL_PATH).unwrap().to_string(); + let default_channels = vec![default_channel, vcards_channel, food_channel]; + + for chan in &mut channels { + if db.post_exists(chan).await? { + db.update_post(chan, json!({"add": {"children": [uid]}})).await?; + } else if default_channels.iter().any(|i| chan == i) { + post::create_feed(&db, &uid, chan, &user).await?; + } else { + warn!("Ignoring non-existent channel: {}", chan); + } + } + + let reply = warp::reply::with_status( + warp::reply::with_header( + warp::reply::json(&json!({"location": &uid})), + "Location", &uid + ), + StatusCode::ACCEPTED + ); + + // TODO: Post-processing the post (aka second write pass) + // - [x] Download rich reply contexts + // - [ ] Syndicate the post if requested, add links to the syndicated copies + // - [ ] Send WebSub notifications to the hub (if we happen to have one) + // - [x] Send webmentions + tokio::task::spawn(async move { + use futures_util::StreamExt; + + let uid: &str = mf2["properties"]["uid"][0].as_str().unwrap(); + + let context_props = ["in-reply-to", "like-of", "repost-of", "bookmark-of"]; + let mut context_urls: Vec<url::Url> = vec![]; + for prop in &context_props { + if let Some(array) = mf2["properties"][prop].as_array() { + context_urls.extend( + array + .iter() + .filter_map(|v| v.as_str()) + .filter_map(|v| v.parse::<url::Url>().ok()), + ); + } + } + // TODO parse HTML in e-content and add links found here + context_urls.sort_unstable_by_key(|u| u.to_string()); + context_urls.dedup(); + + // TODO: Make a stream to fetch all these posts and convert them to MF2 + let post_contexts = { + let http = &http; + tokio_stream::iter(context_urls.into_iter()) + .then(move |url: url::Url| http.get(url).send()) + .filter_map(|response| futures::future::ready(response.ok())) + .filter(|response| futures::future::ready(response.status() == 200)) + .filter_map(|response: reqwest::Response| async move { + // 1. We need to preserve the URL + // 2. We need to get the HTML for MF2 processing + // 3. We need to get the webmention endpoint address + // All of that can be done in one go. + let url = response.url().clone(); + // TODO parse link headers + let links = response + .headers() + .get_all(hyper::http::header::LINK) + .iter() + .cloned() + .collect::<Vec<hyper::http::HeaderValue>>(); + let html = response.text().await; + if html.is_err() { + return None; + } + let html = html.unwrap(); + let mf2 = microformats::from_html(&html, url.clone()).unwrap(); + // TODO use first Link: header if available + let webmention: Option<url::Url> = mf2.rels.by_rels().get("webmention") + .and_then(|i| i.first().cloned()); + + dbg!(Some(FetchedPostContext { + url, mf2: serde_json::to_value(mf2).unwrap(), webmention + })) + }) + .collect::<Vec<FetchedPostContext>>() + .await + }; + + let mut update = json!({ "replace": {} }); + for prop in &context_props { + if let Some(json) = populate_reply_context(&mf2, prop, &post_contexts) { + update["replace"][prop] = json; + } + } + if !update["replace"].as_object().unwrap().is_empty() { + if let Err(err) = db.update_post(uid, update).await { + error!("Failed to update post with rich reply contexts: {}", err); + } + } + + // At this point we can start syndicating the post. + // Currently we don't really support any syndication endpoints, but still! + /*if let Some(syndicate_to) = mf2["properties"]["mp-syndicate-to"].as_array() { + let http = &http; + tokio_stream::iter(syndicate_to) + .filter_map(|i| futures::future::ready(i.as_str())) + .for_each_concurrent(3, |s: &str| async move { + #[allow(clippy::match_single_binding)] + match s { + _ => { + todo!("Syndicate to generic webmention-aware service {}", s); + } + // TODO special handling for non-webmention-aware services like the birdsite + } + }) + .await; + }*/ + + { + let http = &http; + tokio_stream::iter( + post_contexts.into_iter() + .filter(|ctx| ctx.webmention.is_some())) + .for_each_concurrent(2, |ctx| async move { + let mut map = std::collections::HashMap::new(); + map.insert("source", uid); + map.insert("target", ctx.url.as_str()); + + match http.post(ctx.webmention.unwrap().clone()) + .form(&map) + .send() + .await + { + Ok(res) => { + if !res.status().is_success() { + warn!( + "Failed to send a webmention for {}: got HTTP {}", + ctx.url, res.status() + ); + } else { + info!("Sent a webmention to {}, got HTTP {}", ctx.url, res.status()) + } + }, + Err(err) => warn!("Failed to send a webmention for {}: {}", ctx.url, err) + } + }) + .await; + } + }); + + Ok(reply) +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +enum ActionType { + Delete, + Update +} + +#[derive(Serialize, Deserialize)] +struct MicropubFormAction { + action: ActionType, + url: String +} + +#[derive(Serialize, Deserialize)] +struct MicropubAction { + action: ActionType, + url: String, + #[serde(skip_serializing_if = "Option::is_none")] + replace: Option<serde_json::Value>, + #[serde(skip_serializing_if = "Option::is_none")] + add: Option<serde_json::Value>, + #[serde(skip_serializing_if = "Option::is_none")] + delete: Option<serde_json::Value> +} + +impl From<MicropubFormAction> for MicropubAction { + fn from(a: MicropubFormAction) -> Self { + Self { + action: a.action, + url: a.url, + replace: None, add: None, delete: None + } + } +} + +// TODO perform the requested actions synchronously +async fn post_action<D: Storage>( + action: MicropubAction, + db: D, + user: User +) -> Result<impl warp::Reply, MicropubError> { + + let uri = if let Ok(uri) = action.url.parse::<hyper::Uri>() { + uri + } else { + return Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Your URL doesn't parse properly.".to_owned() + }); + }; + + if uri.authority().unwrap() != user.me.as_str().parse::<hyper::Uri>().unwrap().authority().unwrap() { + return Err(MicropubError { + error: ErrorType::Forbidden, + error_description: "Don't tamper with others' posts!".to_owned() + }); + } + + match action.action { + ActionType::Delete => { + if !user.check_scope("delete") { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "You need a \"delete\" scope for this.".to_owned() + }); + } + + db.delete_post(&action.url).await? + }, + ActionType::Update => { + if !user.check_scope("update") { + return Err(MicropubError { + error: ErrorType::InvalidScope, + error_description: "You need an \"update\" scope for this.".to_owned() + }); + } + + db.update_post( + &action.url, + // Here, unwrapping is safe, because this value + // was recently deserialized from JSON already. + serde_json::to_value(&action).unwrap() + ).await? + }, + } + + Ok(warp::reply::reply()) +} + +async fn check_auth(host: warp::host::Authority, user: User) -> Result<User, warp::Rejection> { + let user_authority = warp::http::Uri::try_from(user.me.as_str()) + .unwrap() + .authority() + .unwrap() + .clone(); + // TODO compare with potential list of allowed websites + // to allow one user to edit several websites with one token + if host != user_authority { + Err(warp::reject::custom(MicropubError::new( + ErrorType::NotAuthorized, + "This user is not authorized to use Micropub on this website." + ))) + } else { + Ok(user) + } +} + +#[cfg(any(not(debug_assertions), test))] +fn ensure_same_user_as_host( + token_endpoint: String, + http: reqwest::Client +) -> impl Filter<Extract = (User,), Error = warp::Rejection> + Clone { + crate::util::require_host() + .and(crate::indieauth::require_token(token_endpoint, http)) + .and_then(check_auth) +} + +async fn dispatch_post_body( + mut body: impl bytes::Buf, + mimetype: http_types::Mime +) -> Result<Either<MicropubAction, serde_json::Value>, warp::Rejection> { + // Since hyper::common::buf::BufList doesn't implement Clone, we can't use Clone in here + // We have to copy the body. Ugh!!! + // so much for zero-copy buffers + let body = { + let mut _body: Vec<u8> = Vec::default(); + while body.has_remaining() { + _body.extend(body.chunk()); + body.advance(body.chunk().len()); + } + _body + }; + match mimetype.essence() { + "application/json" => { + if let Ok(body) = serde_json::from_slice::<MicropubAction>(&body) { + Ok(Either::Left(body)) + } else if let Ok(body) = serde_json::from_slice::<serde_json::Value>(&body) { + // quick sanity check + if !body.is_object() || !body["type"].is_array() { + return Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid MF2-JSON detected: `.` should be an object, `.type` should be an array of MF2 types".to_owned() + }.into()) + } + Ok(Either::Right(body)) + } else { + Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid JSON object passed.".to_owned() + }.into()) + } + }, + "application/x-www-form-urlencoded" => { + if let Ok(body) = serde_urlencoded::from_bytes::<MicropubFormAction>(&body) { + Ok(Either::Left(body.into())) + } else if let Ok(body) = serde_urlencoded::from_bytes::<Vec<(String, String)>>(&body) { + Ok(Either::Right(form_to_mf2_json(body))) + } else { + Err(MicropubError { + error: ErrorType::InvalidRequest, + error_description: "Invalid form-encoded data. Try h=entry&content=Hello!".to_owned() + }.into()) + } + }, + other => Err(MicropubError { + error: ErrorType::UnsupportedMediaType, + error_description: format!("Unsupported media type: {}. Try application/json?", other) + }.into()) + } +} + +#[cfg_attr(all(debug_assertions, not(test)), allow(unused_variables))] +pub fn post<D: 'static + Storage>( + db: D, + token_endpoint: String, + http: reqwest::Client +) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + let inject_db = warp::any().map(move || db.clone()); + #[cfg(all(debug_assertions, not(test)))] + let ensure_same_user = warp::any().map(|| crate::indieauth::User::new( + "http://localhost:8080/", + "https://quill.p3k.io/", + "create update delete media" + )); + #[cfg(any(not(debug_assertions), test))] + let ensure_same_user = ensure_same_user_as_host(token_endpoint, http.clone()); + + warp::post() + .and(warp::body::content_length_limit(1024 * 512) + .and(warp::body::aggregate()) + .and(warp::header::<http_types::Mime>("Content-Type")) + .and_then(dispatch_post_body)) + .and(inject_db) + .and(warp::any().map(move || http.clone())) + .and(ensure_same_user) + .and_then(|body: Either<MicropubAction, serde_json::Value>, db: D, http: reqwest::Client, user: crate::indieauth::User| async move { + (match body { + Either::Left(action) => { + post_action(action, db, user).await.map(|p| Box::new(p) as Box<dyn warp::Reply>) + }, + Either::Right(post) => { + let (uid, mf2) = post::normalize_mf2(post, &user); + _post(user, uid, mf2, db, http).await.map(|p| Box::new(p) as Box<dyn warp::Reply>) + } + }).map_err(warp::reject::custom) + }) +} + +pub fn options() -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + warp::options() + // TODO make it reply with a basic description of Micropub spec + .map(|| warp::reply::json::<Option<()>>(&None)) + .with(warp::reply::with::header("Allow", "GET, POST")) +} + +async fn _query<D: Storage>( + db: D, + query: MicropubQuery, + user: crate::indieauth::User +) -> Box<dyn warp::Reply> { + let user_authority = warp::http::Uri::try_from(user.me.as_str()) + .unwrap() + .authority() + .unwrap() + .clone(); + + match query.q { + QueryType::Config => { + let channels: Vec<MicropubChannel> = match db.get_channels(user_authority.as_str()).await { + Ok(chans) => chans, + Err(err) => return Box::new(warp::reply::with_status( + warp::reply::json(&MicropubError::new( + ErrorType::InternalServerError, + &format!("Error fetching channels: {}", err) + )), + StatusCode::INTERNAL_SERVER_ERROR + )) + }; + + Box::new(warp::reply::json(json!({ + "q": [ + QueryType::Source, + QueryType::Config, + QueryType::Channel, + QueryType::SyndicateTo + ], + "channels": channels, + "_kittybox_authority": user_authority.as_str(), + "syndicate-to": [] + }).as_object().unwrap())) + }, + QueryType::Source => { + match query.url { + Some(url) => { + if warp::http::Uri::try_from(&url).unwrap().authority().unwrap() != &user_authority { + return Box::new(warp::reply::with_status( + warp::reply::json(&MicropubError::new( + ErrorType::NotAuthorized, + "You are requesting a post from a website that doesn't belong to you." + )), + StatusCode::UNAUTHORIZED + )) + } + match db.get_post(&url).await { + Ok(some) => match some { + Some(post) => Box::new(warp::reply::json(&post)), + None => Box::new(warp::reply::with_status( + warp::reply::json(&MicropubError::new( + ErrorType::NotFound, + "The specified MF2 object was not found in database." + )), + StatusCode::NOT_FOUND + )) + }, + Err(err) => { + Box::new(warp::reply::json(&MicropubError::new( + ErrorType::InternalServerError, + &format!("Backend error: {}", err) + ))) + } + } + }, + None => { + // Here, one should probably attempt to query at least the main feed and collect posts + // Using a pre-made query function can't be done because it does unneeded filtering + // Don't implement for now, this is optional + Box::new(warp::reply::with_status( + warp::reply::json(&MicropubError::new( + ErrorType::InvalidRequest, + "Querying for post list is not implemented yet." + )), + StatusCode::BAD_REQUEST + )) + } + } + }, + QueryType::Channel => { + let channels: Vec<MicropubChannel> = match db.get_channels(user_authority.as_str()).await { + Ok(chans) => chans, + Err(err) => return Box::new(warp::reply::with_status( + warp::reply::json(&MicropubError::new( + ErrorType::InternalServerError, + &format!("Error fetching channels: {}", err) + )), + StatusCode::INTERNAL_SERVER_ERROR + )) + }; + + Box::new(warp::reply::json(&json!({ "channels": channels }))) + }, + QueryType::SyndicateTo => { + Box::new(warp::reply::json(&json!({ "syndicate-to": [] }))) + } + } +} + +pub fn query<D: Storage>( + db: D, + token_endpoint: String, + http: reqwest::Client +) -> impl Filter<Extract = (impl warp::Reply,), Error = warp::Rejection> + Clone { + warp::get() + .map(move || db.clone()) + .and(warp::query::<MicropubQuery>()) + .and(crate::util::require_host() + .and(crate::indieauth::require_token(token_endpoint, http)) + .and_then(check_auth)) + .then(_query) + .recover(|e: warp::Rejection| async move { + if let Some(err) = e.find::<MicropubError>() { + Ok(warp::reply::json(err)) + } else { + Err(e) + } + }) +} + +pub async fn recover(err: Rejection) -> Result<impl warp::Reply, Infallible> { + if let Some(error) = err.find::<MicropubError>() { + return Ok(warp::reply::with_status(warp::reply::json(&error), error.into())) + } + let error = if err.find::<InvalidQuery>().is_some() { + MicropubError::new( + ErrorType::InvalidRequest, + "Invalid query parameters sent. Try ?q=config to see what you can do." + ) + } else { + log::error!("Unhandled rejection: {:?}", err); + MicropubError::new( + ErrorType::InternalServerError, + &format!("Unknown error: {:?}", err) + ) + }; + + Ok(warp::reply::with_status(warp::reply::json(&error), error.into())) +} + +pub fn micropub<D: 'static + Storage>( + db: D, + token_endpoint: String, + http: reqwest::Client +) -> impl Filter<Extract = (impl warp::Reply,), Error = Infallible> + Clone { + query(db.clone(), token_endpoint.clone(), http.clone()) + .or(post(db, token_endpoint, http)) + .or(options()) + .recover(recover) +} +#[cfg(test)] +#[allow(dead_code)] +impl MicropubQuery { + fn config() -> Self { + Self { + q: QueryType::Config, + url: None + } + } + + fn source(url: &str) -> Self { + Self { + q: QueryType::Source, + url: Some(url.to_owned()) + } + } +} + +#[cfg(test)] +mod tests { + use hyper::body::HttpBody; + use crate::{database::Storage, micropub::MicropubError}; + use warp::{Filter, Reply}; + use serde_json::json; + + use super::FetchedPostContext; + + #[test] + fn test_populate_reply_context() { + let already_expanded_reply_ctx = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "like-of": [ + "https://fireburn.ru/posts/example", + already_expanded_reply_ctx, + "https://fireburn.ru/posts/non-existent" + ] + } + }); + let test_ctx = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is a post which was reacted to."] + } + }); + let reply_contexts = vec![ + FetchedPostContext { + url: "https://fireburn.ru/posts/example".parse().unwrap(), + mf2: json!({ + "items": [ + test_ctx + ] + }), + webmention: None + } + ]; + + let like_of = super::populate_reply_context(&mf2, "like-of", &reply_contexts).unwrap(); + + assert_eq!(like_of[0], test_ctx); + assert_eq!(like_of[1], already_expanded_reply_ctx); + assert_eq!(like_of[2], "https://fireburn.ru/posts/non-existent"); + } + + #[tokio::test] + async fn check_post_reject_scope() { + let inject_db = { + let db = crate::database::MemoryStorage::new(); + + move || db.clone() + }; + let db = inject_db(); + + let res = warp::test::request() + .filter(&warp::any() + .map(inject_db) + .and_then(move |db| async move { + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let user = crate::indieauth::User::new( + "https://localhost:8080/", + "https://kittybox.fireburn.ru/", + "profile" + ); + let (uid, mf2) = super::post::normalize_mf2(post, &user); + + super::_post( + user, uid, mf2, db, reqwest::Client::new() + ).await.map_err(warp::reject::custom) + }) + ) + .await + .map(|_| panic!("Tried to do something with a reply!")) + .unwrap_err(); + + if let Some(err) = res.find::<MicropubError>() { + assert_eq!(err.error, super::ErrorType::InvalidScope); + } else { + panic!("Did not return MicropubError"); + } + + let hashmap = db.mapping.read().await; + assert!(hashmap.is_empty()); + } + + #[tokio::test] + async fn check_post_mf2() { + let inject_db = { + let db = crate::database::MemoryStorage::new(); + + move || db.clone() + }; + let db = inject_db(); + + let res = warp::test::request() + .filter(&warp::any() + .map(inject_db) + .and_then(move |db| async move { + let post = json!({ + "type": ["h-entry"], + "properties": { + "content": ["Hello world!"] + } + }); + let user = crate::indieauth::User::new( + "https://localhost:8080/", + "https://kittybox.fireburn.ru/", + "create" + ); + let (uid, mf2) = super::post::normalize_mf2(post, &user); + + super::_post( + user, uid, mf2, db, reqwest::Client::new() + ).await.map_err(warp::reject::custom) + }) + ) + .await + .unwrap() + .into_response(); + + assert!(res.headers().contains_key("Location")); + let location = res.headers().get("Location").unwrap(); + assert!(db.post_exists(location.to_str().unwrap()).await.unwrap()); + assert!(db.post_exists("https://localhost:8080/feeds/main").await.unwrap()); + } + + #[tokio::test] + async fn test_check_auth() { + let err = warp::test::request() + .filter(&warp::any() + .map(|| ( + warp::host::Authority::from_static("aaronparecki.com"), + crate::indieauth::User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media" + ))) + .untuple_one() + .and_then(super::check_auth)) + .await + .unwrap_err(); + + let json: &MicropubError = err.find::<MicropubError>().unwrap(); + assert_eq!(json.error, super::ErrorType::NotAuthorized); + } + + #[tokio::test] + async fn test_query_foreign_url() { + let mut res = warp::test::request() + .filter(&warp::any().then(|| super::_query( + crate::database::MemoryStorage::new(), + super::MicropubQuery::source("https://aaronparecki.com/feeds/main"), + crate::indieauth::User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media" + ) + ))) + .await + .unwrap() + .into_response(); + + assert_eq!(res.status(), 401); + let body = res.body_mut().data().await.unwrap().unwrap(); + let json: MicropubError = serde_json::from_slice(&body as &[u8]).unwrap(); + assert_eq!(json.error, super::ErrorType::NotAuthorized); + } +} + diff --git a/kittybox-rs/src/micropub/post.rs b/kittybox-rs/src/micropub/post.rs new file mode 100644 index 0000000..cf9f3d9 --- /dev/null +++ b/kittybox-rs/src/micropub/post.rs @@ -0,0 +1,944 @@ +use crate::database::Storage; +use crate::indieauth::User; +use chrono::prelude::*; +use core::iter::Iterator; +use newbase60::num_to_sxg; +use std::convert::TryInto; +use serde_json::json; + +pub(crate) static DEFAULT_CHANNEL_PATH: &str = "/feeds/main"; +static DEFAULT_CHANNEL_NAME: &str = "Main feed"; +pub(crate) static CONTACTS_CHANNEL_PATH: &str = "/feeds/vcards"; +static CONTACTS_CHANNEL_NAME: &str = "My address book"; +pub(crate) static FOOD_CHANNEL_PATH: &str = "/feeds/food"; +static FOOD_CHANNEL_NAME: &str = "My recipe book"; + +fn get_folder_from_type(post_type: &str) -> String { + (match post_type { + "h-feed" => "feeds/", + "h-card" => "vcards/", + "h-event" => "events/", + "h-food" => "food/", + _ => "posts/", + }) + .to_string() +} + +/// Reset the datetime to a proper datetime. +/// Do not attempt to recover the information. +/// Do not pass GO. Do not collect $200. +fn reset_dt(post: &mut serde_json::Value) -> DateTime<FixedOffset> { + let curtime: DateTime<Local> = Local::now(); + post["properties"]["published"] = json!([curtime.to_rfc3339()]); + chrono::DateTime::from(curtime) +} + +pub fn normalize_mf2(mut body: serde_json::Value, user: &User) -> (String, serde_json::Value) { + // Normalize the MF2 object here. + let me = &user.me; + let folder = get_folder_from_type(body["type"][0].as_str().unwrap()); + let published: DateTime<FixedOffset> = if let Some(dt) = body["properties"]["published"][0].as_str() { + // Check if the datetime is parsable. + match DateTime::parse_from_rfc3339(dt) { + Ok(dt) => dt, + Err(_) => reset_dt(&mut body) + } + } else { + // Set the datetime. + // Note: this code block duplicates functionality with the above failsafe. + // Consider refactoring it to a helper function? + reset_dt(&mut body) + }; + match body["properties"]["uid"][0].as_str() { + None => { + let uid = serde_json::Value::String( + me.join( + &(folder.clone() + + &num_to_sxg(published.timestamp_millis().try_into().unwrap())), + ) + .unwrap() + .to_string(), + ); + body["properties"]["uid"] = serde_json::Value::Array(vec![uid.clone()]); + match body["properties"]["url"].as_array_mut() { + Some(array) => array.push(uid), + None => body["properties"]["url"] = body["properties"]["uid"].clone(), + } + } + Some(uid_str) => { + let uid = uid_str.to_string(); + match body["properties"]["url"].as_array_mut() { + Some(array) => { + if !array.iter().any(|i| i.as_str().unwrap_or("") == uid) { + array.push(serde_json::Value::String(uid)) + } + } + None => body["properties"]["url"] = body["properties"]["uid"].clone(), + } + } + } + if let Some(slugs) = body["properties"]["mp-slug"].as_array() { + let new_urls = slugs + .iter() + .map(|i| i.as_str().unwrap_or("")) + .filter(|i| i != &"") + .map(|i| me.join(&((&folder).clone() + i)).unwrap().to_string()) + .collect::<Vec<String>>(); + let urls = body["properties"]["url"].as_array_mut().unwrap(); + new_urls.iter().for_each(|i| urls.push(json!(i))); + } + let props = body["properties"].as_object_mut().unwrap(); + props.remove("mp-slug"); + + if body["properties"]["content"][0].is_string() { + // Convert the content to HTML using the `markdown` crate + body["properties"]["content"] = json!([{ + "html": markdown::to_html(body["properties"]["content"][0].as_str().unwrap()), + "value": body["properties"]["content"][0] + }]) + } + // TODO: apply this normalization to editing too + if body["properties"]["mp-channel"].is_array() { + let mut additional_channels = body["properties"]["mp-channel"].as_array().unwrap().clone(); + if let Some(array) = body["properties"]["channel"].as_array_mut() { + array.append(&mut additional_channels); + } else { + body["properties"]["channel"] = json!(additional_channels) + } + body["properties"].as_object_mut().unwrap().remove("mp-channel"); + } else if body["properties"]["mp-channel"].is_string() { + let chan = body["properties"]["mp-channel"].as_str().unwrap().to_owned(); + if let Some(array) = body["properties"]["channel"].as_array_mut() { + array.push(json!(chan)) + } else { + body["properties"]["channel"] = json!([chan]); + } + body["properties"].as_object_mut().unwrap().remove("mp-channel"); + } + if body["properties"]["channel"][0].as_str().is_none() { + match body["type"][0].as_str() { + Some("h-entry") => { + // Set the channel to the main channel... + let default_channel = me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + Some("h-card") => { + let default_channel = me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + Some("h-food") => { + let default_channel = me.join(FOOD_CHANNEL_PATH).unwrap().to_string(); + + body["properties"]["channel"] = json!([default_channel]); + } + // TODO h-event + /*"h-event" => { + let default_channel + },*/ + _ => { + body["properties"]["channel"] = json!([]); + } + } + } + body["properties"]["posted-with"] = json!([user.client_id]); + if body["properties"]["author"][0].as_str().is_none() { + body["properties"]["author"] = json!([me.as_str()]) + } + // TODO: maybe highlight #hashtags? + // Find other processing to do and insert it here + return ( + body["properties"]["uid"][0].as_str().unwrap().to_string(), + body, + ); +} + +/*pub async fn new_post<S: Storage>( + req: Request<ApplicationState<S>>, + body: serde_json::Value, +) -> Result { + // First, check for rights. + let user = req.ext::<User>().unwrap(); + let storage = &req.state().storage; + if !user.check_scope("create") { + return error_json!( + 401, + "invalid_scope", + "Not enough privileges to post. Try a token with a \"create\" scope instead." + ); + } + let (uid, post) = normalize_mf2(body, user); + + // Security check! + // This software might also be used in a multi-user setting + // where several users or identities share one Micropub server + // (maybe a family website or a shitpost sideblog?) + if !post["properties"]["uid"][0] + .as_str() + .unwrap() + .starts_with(user.me.as_str()) + || post["properties"]["channel"] + .as_array() + .unwrap() + .iter() + .any(|url| !url.as_str().unwrap().starts_with(user.me.as_str())) + { + return error_json!( + 403, + "forbidden", + "You're trying to post to someone else's website..." + ); + } + + match storage.post_exists(&uid).await { + Ok(exists) => { + if exists { + return error_json!( + 409, + "already_exists", + format!( + "A post with the exact same UID already exists in the database: {}", + uid + ) + ); + } + } + Err(err) => return Ok(err.into()), + } + + if let Err(err) = storage.put_post(&post, user.me.as_str()).await { + return error_json!(500, "database_error", format!("{}", err)); + } + + // It makes sense to use a loop here, because you wouldn't post to a hundred channels at once + // Mostly one or two, and even those ones will be the ones picked for you by software + for channel in post["properties"]["channel"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap_or("").to_string()) + .filter(|i| !i.is_empty()) + .collect::<Vec<_>>() + { + let default_channel = user.me.join(DEFAULT_CHANNEL_PATH).unwrap().to_string(); + let vcards_channel = user.me.join(CONTACTS_CHANNEL_PATH).unwrap().to_string(); + let food_channel = user.me.join(FOOD_CHANNEL_PATH).unwrap().to_string(); + match storage.post_exists(&channel).await { + Ok(exists) => { + if exists { + if let Err(err) = storage + .update_post(&channel, json!({"add": {"children": [uid]}})) + .await + { + return error_json!( + 500, + "database_error", + format!( + "Couldn't insert post into the channel due to a database error: {}", + err + ) + ); + } + } else if channel == default_channel + || channel == vcards_channel + || channel == food_channel + { + if let Err(err) = create_feed(storage, &uid, &channel, user).await { + return error_json!( + 500, + "database_error", + format!("Couldn't save feed: {}", err) + ); + } + } else { + warn!( + "Ignoring request to post to a non-existent feed: {}", + channel + ); + } + } + Err(err) => return error_json!(500, "database_error", err), + } + } + // END WRITE BOUNDARY + + // do background processing on the post + async_std::task::spawn(post_process_new_post(req, post)); + + Ok(Response::builder(202) + .header("Location", &uid) + .body(json!({"status": "accepted", "location": &uid})) + .build()) +}*/ + +pub(crate) async fn create_feed( + storage: &impl Storage, + uid: &str, + channel: &str, + user: &User, +) -> crate::database::Result<()> { + let path = url::Url::parse(channel).unwrap().path().to_string(); + + // Note to Future Vika: DO NOT CONVERT THIS TO A MATCH BLOCK + // It will get treated as a binding instead of a const + // See `rustc --explain E0530` for more info + let name = if path == DEFAULT_CHANNEL_PATH { + DEFAULT_CHANNEL_NAME + } else if path == CONTACTS_CHANNEL_PATH { + CONTACTS_CHANNEL_NAME + } else if path == FOOD_CHANNEL_PATH { + FOOD_CHANNEL_NAME + } else { + panic!("Tried to create an unknown default feed!") + }; + + let (_, feed) = normalize_mf2( + json!({ + "type": ["h-feed"], + "properties": { + "name": [name], + "uid": [channel] + }, + "children": [uid] + }), + user, + ); + storage.put_post(&feed, user.me.as_str()).await +} + +/*async fn post_process_new_post<S: Storage>( + req: Request<ApplicationState<S>>, + post: serde_json::Value, +) { + // TODO: Post-processing the post (aka second write pass) + // - [-] Download rich reply contexts + // - [-] Syndicate the post if requested, add links to the syndicated copies + // - [ ] Send WebSub notifications to the hub (if we happen to have one) + // - [x] Send webmentions + let http = &req.state().http_client; + let uid = post["properties"]["uid"][0].as_str().unwrap().to_string(); + // 1. Download rich reply contexts + // This needs to be done first, because at this step we can also determine webmention endpoints + // and save them for later use. Additionally, the richer our content is, the better. + // This needs to be done asynchronously, so the posting experience for the author will be as fast + // as possible without making them wait for potentially slow downstream websites to load + // 1.1. Collect the list of contextually-significant post to load context from. + // This will include reply-tos, liked, reposted and bookmarked content + // + // TODO: Fetch links mentioned in a post, since we need to send webmentions to those as mentions + let mut contextually_significant_posts: Vec<surf::Url> = vec![]; + for prop in &["in-reply-to", "like-of", "repost-of", "bookmark-of"] { + if let Some(array) = post["properties"][prop].as_array() { + contextually_significant_posts.extend( + array + .iter() + .filter_map(|v| v.as_str().and_then(|v| surf::Url::parse(v).ok())), + ); + } + } + // 1.2. Deduplicate the list + contextually_significant_posts.sort_unstable(); + contextually_significant_posts.dedup(); + + // 1.3. Fetch the posts with their bodies and save them in a new Vec<(surf::Url, String)> + let posts_with_bodies: Vec<(surf::Url, surf::Response, String)> = + stream::iter(contextually_significant_posts.into_iter()) + .filter_map(|v: surf::Url| async move { + if let Ok(res) = http.get(&v).send().await { + if res.status() != 200 { + None + } else { + Some((v, res)) + } + } else { + None + } + }) + .filter_map(|(v, mut res): (surf::Url, surf::Response)| async move { + if let Ok(body) = res.body_string().await { + Some((v, res, body)) + } else { + None + } + }) + .collect() + .await; + // 1.4. Parse the bodies and include them in relevant places on the MF2 struct + // This requires an MF2 parser, and there are none for Rust at the moment. + // + // TODO: integrate https://gitlab.com/vikanezrimaya/mf2-parser when it's ready + + // 2. Syndicate the post + let syndicated_copies: Vec<serde_json::Value>; + if let Some(syndication_targets) = post["properties"]["syndicate-to"].as_array() { + syndicated_copies = stream::iter( + syndication_targets + .iter() + .filter_map(|v| v.as_str()) + .filter_map(|t| surf::Url::parse(t).ok()) + .collect::<Vec<_>>() + .into_iter() + .map(|_t: surf::Url| async move { + // TODO: Define supported syndication methods + // and syndicate the endpoint there + // Possible ideas: + // - indieweb.xyz (might need a lot of space for the buttons though, investigate proposing grouping syndication targets) + // - news.indieweb.org (IndieNews - needs a category linking to #indienews) + // - Twitter via brid.gy (do I really need Twitter syndication tho?) + if false { + Some("") + } else { + None + } + }), + ) + .buffer_unordered(3) + .filter_map(|v| async move { v }) + .map(|v| serde_json::Value::String(v.to_string())) + .collect::<Vec<_>>() + .await; + } else { + syndicated_copies = vec![] + } + // Save the post a second time here after syndication + // We use update_post here to prevent race conditions since its required to be atomic + let mut update = json!({ + "action": "update", + "url": &uid + }); + if !syndicated_copies.is_empty() { + update["add"] = json!({}); + update["add"]["syndication"] = serde_json::Value::Array(syndicated_copies); + } + if !posts_with_bodies.is_empty() { + error!("Replacing context links with parsed MF2-JSON data is not yet implemented (but it's ok! it'll just be less pretty)") + /* TODO: Replace context links with parsed MF2-JSON data * / + update["replace"] = {} + update["replace"]["like-of"] = [] + update["replace"]["in-reply-to"] = [] + update["replace"]["bookmark-of"] = [] + update["replace"]["repost-of"] = [] + // */ + } + // We don't need the original copy of the post anymore... I hope! + // This will act as a safeguard so I can't read stale data by accident anymore... + drop(post); + if let Err(err) = req.state().storage.update_post(&uid, update).await { + error!("Encountered error while post-processing a post: {}", err) + // At this point, we can still continue, we just won't have rich data for the post + // I wonder why could it even happen except in case of a database disconnection? + } + // 3. Send WebSub notifications + // TODO WebSub support + + // 4. Send webmentions + // We'll need the bodies here to get their endpoints + let source = &uid; + stream::iter(posts_with_bodies.into_iter()) + .filter_map( + |(url, response, body): (surf::Url, surf::Response, String)| async move { + // Check Link headers first + // the first webmention endpoint will be returned + if let Some(values) = response.header("Link") { + let iter = values.iter().flat_map(|i| i.as_str().split(',')); + + // Honestly I don't like this parser. It's very crude. + // But it should do the job. But I don't like it. + for link in iter { + let mut split = link.split(';'); + + match split.next() { + Some(uri) => { + if let Some(uri) = uri.strip_prefix('<') { + if let Some(uri) = uri.strip_suffix('>') { + for prop in split { + let lowercased = prop.to_ascii_lowercase(); + if &lowercased == "rel=\"webmention\"" + || &lowercased == "rel=webmention" + { + if let Ok(endpoint) = url.join(uri) { + return Some((url, endpoint)); + } + } + } + } + } + } + None => continue, + } + } + } + // TODO: Replace this function once the MF2 parser is ready + // A compliant parser's output format includes rels, + // we could just find a Webmention one in there + let pattern = + easy_scraper::Pattern::new(r#"<link href="{{url}}" rel="webmention">"#) + .expect("Pattern for webmentions couldn't be parsed"); + let matches = pattern.matches(&body); + if matches.is_empty() { + return None; + } + let endpoint = &matches[0]["url"]; + if let Ok(endpoint) = url.join(endpoint) { + Some((url, endpoint)) + } else { + None + } + }, + ) + .map(|(target, endpoint)| async move { + info!( + "Sending webmention to {} about {}", + source, + &target.to_string() + ); + let response = http + .post(&endpoint) + .content_type("application/x-www-form-urlencoded") + .body( + serde_urlencoded::to_string(vec![ + ("source", source), + ("target", &target.to_string()), + ]) + .expect("Couldn't construct webmention form"), + ) + .send() + .await; + match response { + Ok(response) => { + if response.status() == 200 + || response.status() == 201 + || response.status() == 202 + { + info!("Sent webmention for {} to {}", target, endpoint); + Ok(()) + } else { + error!( + "Sending webmention for {} to {} failed: Endpoint replied with HTTP {}", + target, + endpoint, + response.status() + ); + Err(()) + } + } + Err(err) => { + error!( + "Sending webmention for {} to {} failed: {}", + target, endpoint, err + ); + Err(()) + } + } + }) + .buffer_unordered(3) + .collect::<Vec<_>>() + .await; +}*/ + +/*async fn process_json<S: Storage>( + req: Request<ApplicationState<S>>, + body: serde_json::Value, +) -> Result { + let is_action = body["action"].is_string() && body["url"].is_string(); + if is_action { + // This could be an update, a deletion or an undeletion request. + // Process it separately. + let action = body["action"].as_str().unwrap(); + let url = body["url"].as_str().unwrap(); + let user = req.ext::<User>().unwrap(); + match action { + "delete" => { + if !user.check_scope("delete") { + return error_json!( + 401, + "insufficient_scope", + "You need a `delete` scope to delete posts." + ); + } + // This special scope is not available through a token endpoint, since the + // authorization endpoint is supposed to reject any auth request trying to get this + // scope. It is intended for TRUSTED external services that need to modify the + // database while ignoring any access controls + if (url::Url::parse(url)?.origin().ascii_serialization() + "/") != user.me.as_str() + && !user.check_scope("kittybox_internal:do_what_thou_wilt") + { + return error_json!( + 403, + "forbidden", + "You're not allowed to delete someone else's posts." + ); + } + if let Err(error) = req.state().storage.delete_post(url).await { + return Ok(error.into()); + } + Ok(Response::builder(200).build()) + } + "update" => { + if !user.check_scope("update") { + return error_json!( + 401, + "insufficient_scope", + "You need an `update` scope to update posts." + ); + } + if (url::Url::parse(url)?.origin().ascii_serialization() + "/") != user.me.as_str() + && !user.check_scope("kittybox_internal:do_what_thou_wilt") + { + return error_json!( + 403, + "forbidden", + "You're not allowed to delete someone else's posts." + ); + } + if let Err(error) = req.state().storage.update_post(url, body.clone()).await { + Ok(error.into()) + } else { + Ok(Response::builder(204).build()) + } + } + _ => return error_json!(400, "invalid_request", "This action is not supported."), + } + } else if body["type"][0].is_string() { + // This is definitely an h-entry or something similar. Check if it has properties? + if body["properties"].is_object() { + // Ok, this is definitely a new h-entry. Let's save it. + return new_post(req, body).await; + } else { + return error_json!( + 400, + "invalid_request", + "This MF2-JSON object has a type, but not properties. This makes no sense to post." + ); + } + } else { + return error_json!( + 400, + "invalid_request", + "Try sending MF2-structured data or an object with an \"action\" and \"url\" keys." + ); + } +}*/ + +/*async fn process_form<S: Storage>( + req: Request<ApplicationState<S>>, + form: Vec<(String, String)>, +) -> Result { + if let Some((_, v)) = form.iter().find(|(k, _)| k == "action") { + if v == "delete" { + let user = req.ext::<User>().unwrap(); + if !user.check_scope("delete") { + return error_json!( + 401, + "insufficient_scope", + "You cannot delete posts without a `delete` scope." + ); + } + match form.iter().find(|(k, _)| k == "url") { + Some((_, url)) => { + if (url::Url::parse(url)?.origin().ascii_serialization() + "/") + != user.me.as_str() + && !user.check_scope("kittybox_internal:do_what_thou_wilt") + { + return error_json!( + 403, + "forbidden", + "You're not allowed to delete someone else's posts." + ); + } + if let Err(error) = req.state().storage.delete_post(url).await { + return error_json!(500, "database_error", error); + } + return Ok(Response::builder(200).build()); + } + None => { + return error_json!( + 400, + "invalid_request", + "Please provide an `url` to delete." + ) + } + } + } else { + return error_json!(400, "invalid_request", "This action is not supported in form-encoded mode. (JSON requests support more actions, use JSON!)"); + } + } + + let mf2 = convert_form_to_mf2_json(form); + + if mf2["properties"].as_object().unwrap().keys().len() > 0 { + return new_post(req, mf2).await; + } + return error_json!( + 400, + "invalid_request", + "Try sending h=entry&content=something%20interesting" + ); +}*/ + +/*pub async fn post_handler<S: Storage>(mut req: Request<ApplicationState<S>>) -> Result { + match req.content_type() { + Some(value) => { + if value == Mime::from_str("application/json").unwrap() { + match req.body_json::<serde_json::Value>().await { + Ok(parsed) => return process_json(req, parsed).await, + Err(err) => { + return error_json!( + 400, + "invalid_request", + format!("Parsing JSON failed: {:?}", err) + ) + } + } + } else if value == Mime::from_str("application/x-www-form-urlencoded").unwrap() { + match req.body_form::<Vec<(String, String)>>().await { + Ok(parsed) => return process_form(req, parsed).await, + Err(err) => { + return error_json!( + 400, + "invalid_request", + format!("Parsing form failed: {:?}", err) + ) + } + } + } else { + return error_json!( + 415, "unsupported_media_type", + "What's this? Try sending JSON instead. (urlencoded form also works but is less cute)" + ); + } + } + _ => { + return error_json!( + 415, "unsupported_media_type", + "You didn't send a Content-Type header, so we don't know how to parse your request." + ); + } + } +}*/ + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_no_replace_uid() { + let mf2 = json!({ + "type": ["h-card"], + "properties": { + "uid": ["https://fireburn.ru/"], + "name": ["Vika Nezrimaya"], + "note": ["A crazy programmer girl who wants some hugs"] + } + }); + + let (uid, normalized) = normalize_mf2( + mf2.clone(), + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ), + ); + assert_eq!( + normalized["properties"]["uid"][0], mf2["properties"]["uid"][0], + "UID was replaced" + ); + assert_eq!( + normalized["properties"]["uid"][0], uid, + "Returned post location doesn't match UID" + ); + } + + #[test] + fn test_mp_channel() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "uid": ["https://fireburn.ru/posts/test"], + "content": [{"html": "<p>Hello world!</p>"}], + "mp-channel": ["https://fireburn.ru/feeds/test"] + } + }); + + let (_, normalized) = normalize_mf2( + mf2.clone(), + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ) + ); + + assert_eq!( + normalized["properties"]["channel"], + mf2["properties"]["mp-channel"] + ); + } + + #[test] + fn test_mp_channel_as_string() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "uid": ["https://fireburn.ru/posts/test"], + "content": [{"html": "<p>Hello world!</p>"}], + "mp-channel": "https://fireburn.ru/feeds/test" + } + }); + + let (_, normalized) = normalize_mf2( + mf2.clone(), + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ) + ); + + assert_eq!( + normalized["properties"]["channel"][0], + mf2["properties"]["mp-channel"] + ); + } + + #[test] + fn test_normalize_mf2() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is content!"] + } + }); + + let (uid, post) = normalize_mf2( + mf2, + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ), + ); + assert_eq!( + post["properties"]["published"] + .as_array() + .expect("post['published'] is undefined") + .len(), + 1, + "Post doesn't have a published time" + ); + DateTime::parse_from_rfc3339(post["properties"]["published"][0].as_str().unwrap()) + .expect("Couldn't parse date from rfc3339"); + assert!( + !post["properties"]["url"] + .as_array() + .expect("post['url'] is undefined") + .is_empty(), + "Post doesn't have any URLs" + ); + assert_eq!( + post["properties"]["uid"] + .as_array() + .expect("post['uid'] is undefined") + .len(), + 1, + "Post doesn't have a single UID" + ); + assert_eq!( + post["properties"]["uid"][0], uid, + "UID of a post and its supposed location don't match" + ); + assert!( + uid.starts_with("https://fireburn.ru/posts/"), + "The post namespace is incorrect" + ); + assert_eq!( + post["properties"]["content"][0]["html"] + .as_str() + .expect("Post doesn't have a rich content object") + .trim(), + "<p>This is content!</p>", + "Parsed Markdown content doesn't match expected HTML" + ); + assert_eq!( + post["properties"]["channel"][0], "https://fireburn.ru/feeds/main", + "Post isn't posted to the main channel" + ); + assert_eq!( + post["properties"]["author"][0], "https://fireburn.ru/", + "Post author is unknown" + ); + } + + #[test] + fn test_mp_slug() { + let mf2 = json!({ + "type": ["h-entry"], + "properties": { + "content": ["This is content!"], + "mp-slug": ["hello-post"] + }, + }); + + let (_, post) = normalize_mf2( + mf2, + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ), + ); + assert!( + post["properties"]["url"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap()) + .any(|i| i == "https://fireburn.ru/posts/hello-post"), + "Didn't found an URL pointing to the location expected by the mp-slug semantics" + ); + assert!( + post["properties"]["mp-slug"].as_array().is_none(), + "mp-slug wasn't deleted from the array!" + ) + } + + #[test] + fn test_normalize_feed() { + let mf2 = json!({ + "type": ["h-feed"], + "properties": { + "name": "Main feed", + "mp-slug": ["main"] + } + }); + + let (uid, post) = normalize_mf2( + mf2, + &User::new( + "https://fireburn.ru/", + "https://quill.p3k.io/", + "create update media", + ), + ); + assert_eq!( + post["properties"]["uid"][0], uid, + "UID of a post and its supposed location don't match" + ); + assert_eq!(post["properties"]["author"][0], "https://fireburn.ru/"); + assert!( + post["properties"]["url"] + .as_array() + .unwrap() + .iter() + .map(|i| i.as_str().unwrap()) + .any(|i| i == "https://fireburn.ru/feeds/main"), + "Didn't found an URL pointing to the location expected by the mp-slug semantics" + ); + assert!( + post["properties"]["mp-slug"].as_array().is_none(), + "mp-slug wasn't deleted from the array!" + ) + } +} |