From 5610a5f0bf1a9df02bd3d5b55e2cdebef2440360 Mon Sep 17 00:00:00 2001 From: Vika Date: Tue, 24 May 2022 17:18:30 +0300 Subject: flake.nix: reorganize - Kittybox's source code is moved to a subfolder - This improves build caching by Nix since it doesn't take changes to other files into account - Package and test definitions were spun into separate files - This makes my flake.nix much easier to navigate - This also makes it somewhat possible to use without flakes (but it is still not easy, so use flakes!) - Some attributes were moved in compliance with Nix 2.8's changes to flake schema --- kittybox-rs/src/database/file/mod.rs | 619 +++++++++++++++++++++++++++++++++++ 1 file changed, 619 insertions(+) create mode 100644 kittybox-rs/src/database/file/mod.rs (limited to 'kittybox-rs/src/database/file') diff --git a/kittybox-rs/src/database/file/mod.rs b/kittybox-rs/src/database/file/mod.rs new file mode 100644 index 0000000..1e7aa96 --- /dev/null +++ b/kittybox-rs/src/database/file/mod.rs @@ -0,0 +1,619 @@ +//#![warn(clippy::unwrap_used)] +use crate::database::{filter_post, ErrorKind, Result, Storage, StorageError, Settings}; +use std::io::ErrorKind as IOErrorKind; +use tokio::fs::{File, OpenOptions}; +use tokio::io::{AsyncReadExt, AsyncWriteExt}; +use tokio::task::spawn_blocking; +use async_trait::async_trait; +use futures::{stream, StreamExt, TryStreamExt}; +use log::debug; +use serde_json::json; +use std::collections::HashMap; +use std::path::{Path, PathBuf}; + +impl From for StorageError { + fn from(source: std::io::Error) -> Self { + Self::with_source( + match source.kind() { + IOErrorKind::NotFound => ErrorKind::NotFound, + IOErrorKind::AlreadyExists => ErrorKind::Conflict, + _ => ErrorKind::Backend, + }, + "file I/O error", + Box::new(source), + ) + } +} + +impl From for StorageError { + fn from(source: tokio::time::error::Elapsed) -> Self { + Self::with_source( + ErrorKind::Backend, + "timeout on I/O operation", + Box::new(source) + ) + } +} + +// Copied from https://stackoverflow.com/questions/39340924 +// This routine is adapted from the *old* Path's `path_relative_from` +// function, which works differently from the new `relative_from` function. +// In particular, this handles the case on unix where both paths are +// absolute but with only the root as the common directory. +fn path_relative_from(path: &Path, base: &Path) -> Option { + use std::path::Component; + + if path.is_absolute() != base.is_absolute() { + if path.is_absolute() { + Some(PathBuf::from(path)) + } else { + None + } + } else { + let mut ita = path.components(); + let mut itb = base.components(); + let mut comps: Vec = vec![]; + loop { + match (ita.next(), itb.next()) { + (None, None) => break, + (Some(a), None) => { + comps.push(a); + comps.extend(ita.by_ref()); + break; + } + (None, _) => comps.push(Component::ParentDir), + (Some(a), Some(b)) if comps.is_empty() && a == b => (), + (Some(a), Some(b)) if b == Component::CurDir => comps.push(a), + (Some(_), Some(b)) if b == Component::ParentDir => return None, + (Some(a), Some(_)) => { + comps.push(Component::ParentDir); + for _ in itb { + comps.push(Component::ParentDir); + } + comps.push(a); + comps.extend(ita.by_ref()); + break; + } + } + } + Some(comps.iter().map(|c| c.as_os_str()).collect()) + } +} + +#[allow(clippy::unwrap_used, clippy::expect_used)] +#[cfg(test)] +mod tests { + #[test] + fn test_relative_path_resolving() { + let path1 = std::path::Path::new("/home/vika/Projects/kittybox"); + let path2 = std::path::Path::new("/home/vika/Projects/nixpkgs"); + let relative_path = super::path_relative_from(path2, path1).unwrap(); + + assert_eq!(relative_path, std::path::Path::new("../nixpkgs")) + } +} + +// TODO: Check that the path ACTUALLY IS INSIDE THE ROOT FOLDER +// This could be checked by completely resolving the path +// and checking if it has a common prefix +fn url_to_path(root: &Path, url: &str) -> PathBuf { + let path = url_to_relative_path(url).to_logical_path(root); + if !path.starts_with(root) { + // TODO: handle more gracefully + panic!("Security error: {:?} is not a prefix of {:?}", path, root) + } else { + path + } +} + +fn url_to_relative_path(url: &str) -> relative_path::RelativePathBuf { + let url = warp::http::Uri::try_from(url).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string() + url.path() + ".json"); + + path +} + +fn modify_post(post: &serde_json::Value, update: &serde_json::Value) -> Result { + let mut add_keys: HashMap> = HashMap::new(); + let mut remove_keys: Vec = vec![]; + let mut remove_values: HashMap> = HashMap::new(); + let mut post = post.clone(); + + if let Some(delete) = update["delete"].as_array() { + remove_keys.extend( + delete + .iter() + .filter_map(|v| v.as_str()) + .map(|v| v.to_string()), + ); + } else if let Some(delete) = update["delete"].as_object() { + for (k, v) in delete { + if let Some(v) = v.as_array() { + remove_values + .entry(k.to_string()) + .or_default() + .extend(v.clone()); + } else { + return Err(StorageError::new( + ErrorKind::BadRequest, + "Malformed update object", + )); + } + } + } + if let Some(add) = update["add"].as_object() { + for (k, v) in add { + if let Some(v) = v.as_array() { + add_keys.insert(k.to_string(), v.clone()); + } else { + return Err(StorageError::new( + ErrorKind::BadRequest, + "Malformed update object", + )); + } + } + } + if let Some(replace) = update["replace"].as_object() { + for (k, v) in replace { + remove_keys.push(k.to_string()); + if let Some(v) = v.as_array() { + add_keys.insert(k.to_string(), v.clone()); + } else { + return Err(StorageError::new(ErrorKind::BadRequest, "Malformed update object")); + } + } + } + + if let Some(props) = post["properties"].as_object_mut() { + for k in remove_keys { + props.remove(&k); + } + } + for (k, v) in remove_values { + let k = &k; + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + v.iter().for_each(|v| { + if let Some(vec) = props[k].as_array_mut() { + if let Some(index) = vec.iter().position(|w| w == v) { + vec.remove(index); + } + } + }); + } + for (k, v) in add_keys { + let props = if k == "children" { + &mut post + } else { + &mut post["properties"] + }; + let k = &k; + if let Some(prop) = props[k].as_array_mut() { + if k == "children" { + v.into_iter() + .rev() + .for_each(|v| prop.insert(0, v)); + } else { + prop.extend(v.into_iter()); + } + } else { + post["properties"][k] = serde_json::Value::Array(v) + } + } + Ok(post) +} + +#[derive(Clone, Debug)] +/// A backend using a folder with JSON files as a backing store. +/// Uses symbolic links to represent a many-to-one mapping of URLs to a post. +pub struct FileStorage { + root_dir: PathBuf, +} + +impl FileStorage { + /// Create a new storage wrapping a folder specified by root_dir. + pub async fn new(root_dir: PathBuf) -> Result { + // TODO check if the dir is writable + Ok(Self { root_dir }) + } +} + +async fn hydrate_author( + feed: &mut serde_json::Value, + user: &'_ Option, + storage: &S, +) { + let url = feed["properties"]["uid"][0] + .as_str() + .expect("MF2 value should have a UID set! Check if you used normalize_mf2 before recording the post!"); + if let Some(author) = feed["properties"]["author"].as_array().cloned() { + if !feed["type"] + .as_array() + .expect("MF2 value should have a type set!") + .iter() + .any(|i| i == "h-card") + { + let author_list: Vec = stream::iter(author.iter()) + .then(|i| async move { + if let Some(i) = i.as_str() { + match storage.get_post(i).await { + Ok(post) => match post { + Some(post) => match filter_post(post, user) { + Some(author) => author, + None => json!(i), + }, + None => json!(i), + }, + Err(e) => { + log::error!("Error while hydrating post {}: {}", url, e); + json!(i) + } + } + } else { + i.clone() + } + }) + .collect::>() + .await; + if let Some(props) = feed["properties"].as_object_mut() { + props["author"] = json!(author_list); + } else { + feed["properties"] = json!({"author": author_list}); + } + } + } +} + +#[async_trait] +impl Storage for FileStorage { + async fn post_exists(&self, url: &str) -> Result { + let path = url_to_path(&self.root_dir, url); + debug!("Checking if {:?} exists...", path); + /*let result = match tokio::fs::metadata(path).await { + Ok(metadata) => { + Ok(true) + }, + Err(err) => { + if err.kind() == IOErrorKind::NotFound { + Ok(false) + } else { + Err(err.into()) + } + } + };*/ + #[allow(clippy::unwrap_used)] // JoinHandle captures panics, this closure shouldn't panic + Ok(spawn_blocking(move || path.is_file()).await.unwrap()) + } + + async fn get_post(&self, url: &str) -> Result> { + let path = url_to_path(&self.root_dir, url); + // TODO: check that the path actually belongs to the dir of user who requested it + // it's not like you CAN access someone else's private posts with it + // so it's not exactly a security issue, but it's still not good + debug!("Opening {:?}", path); + + match File::open(&path).await { + Ok(mut file) => { + let mut content = String::new(); + // Typechecks because OS magic acts on references + // to FDs as if they were behind a mutex + AsyncReadExt::read_to_string(&mut file, &mut content).await?; + debug!("Read {} bytes successfully from {:?}", content.as_bytes().len(), &path); + Ok(Some(serde_json::from_str(&content)?)) + }, + Err(err) => { + if err.kind() == IOErrorKind::NotFound { + Ok(None) + } else { + Err(err.into()) + } + } + } + } + + async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()> { + let key = post["properties"]["uid"][0] + .as_str() + .expect("Tried to save a post without UID"); + let path = url_to_path(&self.root_dir, key); + let tempfile = (&path).with_extension("tmp"); + debug!("Creating {:?}", path); + + let parent = path.parent().expect("Parent for this directory should always exist").to_owned(); + if !parent.is_dir() { + tokio::fs::create_dir_all(parent).await?; + } + + let mut file = tokio::fs::OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfile).await?; + + file.write_all(post.to_string().as_bytes()).await?; + file.flush().await?; + drop(file); + tokio::fs::rename(&tempfile, &path).await?; + + if let Some(urls) = post["properties"]["url"].as_array() { + for url in urls + .iter() + .map(|i| i.as_str().unwrap()) + { + if url != key && url.starts_with(user) { + let link = url_to_path(&self.root_dir, url); + debug!("Creating a symlink at {:?}", link); + let orig = path.clone(); + // We're supposed to have a parent here. + let basedir = link.parent().ok_or_else(|| { + StorageError::new( + ErrorKind::Backend, + "Failed to calculate parent directory when creating a symlink", + ) + })?; + let relative = path_relative_from(&orig, basedir).unwrap(); + println!("{:?} - {:?} = {:?}", &orig, &basedir, &relative); + tokio::fs::symlink(relative, link).await?; + } + } + } + + if post["type"] + .as_array() + .unwrap() + .iter() + .any(|s| s.as_str() == Some("h-feed")) + { + println!("Adding to channel list..."); + // Add the h-feed to the channel list + let mut path = relative_path::RelativePathBuf::new(); + path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); + path.push("channels"); + + let path = path.to_path(&self.root_dir); + let tempfilename = (&path).with_extension("tmp"); + let channel_name = post["properties"]["name"][0] + .as_str() + .map(|s| s.to_string()) + .unwrap_or_else(String::default); + let key = key.to_string(); + + let mut tempfile = OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfilename).await?; + let mut file = OpenOptions::new() + .read(true) + .write(true) + .truncate(false) + .create(true) + .open(&path).await?; + + let mut content = String::new(); + file.read_to_string(&mut content).await?; + drop(file); + let mut channels: Vec = if !content.is_empty() { + serde_json::from_str(&content)? + } else { + Vec::default() + }; + + channels.push(super::MicropubChannel { + uid: key.to_string(), + name: channel_name, + }); + + tempfile.write_all(serde_json::to_string(&channels)?.as_bytes()).await?; + tempfile.flush().await?; + drop(tempfile); + tokio::fs::rename(tempfilename, path).await?; + } + Ok(()) + } + + async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()> { + let path = url_to_path(&self.root_dir, url); + let tempfilename = path.with_extension("tmp"); + #[allow(unused_variables)] + let (old_json, new_json) = { + let mut temp = OpenOptions::new() + .write(true) + .create_new(true) + .open(&tempfilename) + .await?; + let mut file = OpenOptions::new() + .read(true) + .open(&path) + .await?; + + let mut content = String::new(); + file.read_to_string(&mut content).await?; + let json: serde_json::Value = serde_json::from_str(&content)?; + drop(file); + // Apply the editing algorithms + let new_json = modify_post(&json, &update)?; + + temp.write_all(new_json.to_string().as_bytes()).await?; + temp.flush().await?; + drop(temp); + tokio::fs::rename(tempfilename, path).await?; + + (json, new_json) + }; + // TODO check if URLs changed between old and new JSON + Ok(()) + } + + async fn get_channels(&self, user: &'_ str) -> Result> { + let mut path = relative_path::RelativePathBuf::new(); + path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); + path.push("channels"); + + let path = path.to_path(&self.root_dir); + match File::open(&path).await { + Ok(mut f) => { + let mut content = String::new(); + f.read_to_string(&mut content).await?; + // This should not happen, but if it does, handle it gracefully + if content.is_empty() { + return Ok(vec![]); + } + let channels: Vec = serde_json::from_str(&content)?; + Ok(channels) + } + Err(e) => { + if e.kind() == IOErrorKind::NotFound { + Ok(vec![]) + } else { + Err(e.into()) + } + } + } + } + + async fn read_feed_with_limit( + &self, + url: &'_ str, + after: &'_ Option, + limit: usize, + user: &'_ Option, + ) -> Result> { + if let Some(feed) = self.get_post(url).await? { + if let Some(mut feed) = filter_post(feed, user) { + if feed["children"].is_array() { + // This code contains several clones. It looks + // like the borrow checker thinks it is preventing + // me from doing something incredibly stupid. The + // borrow checker may or may not be right. + let children = feed["children"].as_array().unwrap().clone(); + let mut posts_iter = children + .into_iter() + .map(|s: serde_json::Value| s.as_str().unwrap().to_string()); + // Note: we can't actually use skip_while here because we end up emitting `after`. + // This imperative snippet consumes after instead of emitting it, allowing the + // stream of posts to return only those items that truly come *after* + if let Some(after) = after { + for s in posts_iter.by_ref() { + if &s == after { + break + } + } + }; + let posts = stream::iter(posts_iter) + .map(|url: String| async move { self.get_post(&url).await }) + .buffered(std::cmp::min(3, limit)) + // Hack to unwrap the Option and sieve out broken links + // Broken links return None, and Stream::filter_map skips Nones. + .try_filter_map(|post: Option| async move { Ok(post) }) + .try_filter_map(|post| async move { Ok(filter_post(post, user)) }) + .and_then(|mut post| async move { + hydrate_author(&mut post, user, self).await; + Ok(post) + }) + .take(limit); + + match posts.try_collect::>().await { + Ok(posts) => feed["children"] = serde_json::json!(posts), + Err(err) => { + return Err(StorageError::with_source( + ErrorKind::Other, + "Feed assembly error", + Box::new(err), + )); + } + } + } + hydrate_author(&mut feed, user, self).await; + Ok(Some(feed)) + } else { + Err(StorageError::new( + ErrorKind::PermissionDenied, + "specified user cannot access this post", + )) + } + } else { + Ok(None) + } + } + + async fn delete_post(&self, url: &'_ str) -> Result<()> { + let path = url_to_path(&self.root_dir, url); + if let Err(e) = tokio::fs::remove_file(path).await { + Err(e.into()) + } else { + // TODO check for dangling references in the channel list + Ok(()) + } + } + + async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result { + log::debug!("User for getting settings: {}", user); + let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string()); + path.push("settings"); + + let path = path.to_path(&self.root_dir); + log::debug!("Getting settings from {:?}", &path); + let setting = setting.to_string(); + let mut file = File::open(path).await?; + let mut content = String::new(); + file.read_to_string(&mut content).await?; + + let settings: HashMap = serde_json::from_str(&content)?; + // XXX consider returning string slices instead of cloning a string every time + // it might come with a performance hit and/or memory usage inflation + settings + .get(&setting) + .cloned() + .ok_or_else(|| StorageError::new(ErrorKind::Backend, "Setting not set")) + } + + async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()> { + let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); + let mut path = relative_path::RelativePathBuf::new(); + path.push(url.authority().unwrap().to_string()); + path.push("settings"); + + let path = path.to_path(&self.root_dir); + let temppath = path.with_extension("tmp"); + + let parent = path.parent().unwrap().to_owned(); + if !spawn_blocking(move || parent.is_dir()).await.unwrap() { + tokio::fs::create_dir_all(path.parent().unwrap()).await?; + } + + let (setting, value) = (setting.to_string(), value.to_string()); + + let mut tempfile = OpenOptions::new() + .write(true) + .create_new(true) + .open(&temppath) + .await?; + + let mut settings: HashMap = match File::open(&path).await { + Ok(mut f) => { + let mut content = String::new(); + f.read_to_string(&mut content).await?; + if content.is_empty() { + HashMap::default() + } else { + serde_json::from_str(&content)? + } + } + Err(err) => if err.kind() == IOErrorKind::NotFound { + HashMap::default() + } else { + return Err(err.into()) + } + }; + settings.insert(setting, value); + tempfile.write_all(serde_json::to_string(&settings)?.as_bytes()).await?; + drop(tempfile); + tokio::fs::rename(temppath, path).await?; + Ok(()) + } +} -- cgit 1.4.1