//#![warn(clippy::unwrap_used)] use crate::database::{filter_post, ErrorKind, Result, Settings, Storage, StorageError}; use async_trait::async_trait; use futures::{stream, StreamExt, TryStreamExt}; use serde_json::json; use std::collections::HashMap; use std::io::ErrorKind as IOErrorKind; use std::path::{Path, PathBuf}; use tokio::fs::{File, OpenOptions}; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::task::spawn_blocking; use tracing::debug; impl From for StorageError { fn from(source: std::io::Error) -> Self { Self::with_source( match source.kind() { IOErrorKind::NotFound => ErrorKind::NotFound, IOErrorKind::AlreadyExists => ErrorKind::Conflict, _ => ErrorKind::Backend, }, "file I/O error", Box::new(source), ) } } impl From for StorageError { fn from(source: tokio::time::error::Elapsed) -> Self { Self::with_source( ErrorKind::Backend, "timeout on I/O operation", Box::new(source), ) } } // Copied from https://stackoverflow.com/questions/39340924 // This routine is adapted from the *old* Path's `path_relative_from` // function, which works differently from the new `relative_from` function. // In particular, this handles the case on unix where both paths are // absolute but with only the root as the common directory. fn path_relative_from(path: &Path, base: &Path) -> Option { use std::path::Component; if path.is_absolute() != base.is_absolute() { if path.is_absolute() { Some(PathBuf::from(path)) } else { None } } else { let mut ita = path.components(); let mut itb = base.components(); let mut comps: Vec = vec![]; loop { match (ita.next(), itb.next()) { (None, None) => break, (Some(a), None) => { comps.push(a); comps.extend(ita.by_ref()); break; } (None, _) => comps.push(Component::ParentDir), (Some(a), Some(b)) if comps.is_empty() && a == b => (), (Some(a), Some(b)) if b == Component::CurDir => comps.push(a), (Some(_), Some(b)) if b == Component::ParentDir => return None, (Some(a), Some(_)) => { comps.push(Component::ParentDir); for _ in itb { comps.push(Component::ParentDir); } comps.push(a); comps.extend(ita.by_ref()); break; } } } Some(comps.iter().map(|c| c.as_os_str()).collect()) } } #[allow(clippy::unwrap_used, clippy::expect_used)] #[cfg(test)] mod tests { #[test] fn test_relative_path_resolving() { let path1 = std::path::Path::new("/home/vika/Projects/kittybox"); let path2 = std::path::Path::new("/home/vika/Projects/nixpkgs"); let relative_path = super::path_relative_from(path2, path1).unwrap(); assert_eq!(relative_path, std::path::Path::new("../nixpkgs")) } } // TODO: Check that the path ACTUALLY IS INSIDE THE ROOT FOLDER // This could be checked by completely resolving the path // and checking if it has a common prefix fn url_to_path(root: &Path, url: &str) -> PathBuf { let path = url_to_relative_path(url).to_logical_path(root); if !path.starts_with(root) { // TODO: handle more gracefully panic!("Security error: {:?} is not a prefix of {:?}", path, root) } else { path } } fn url_to_relative_path(url: &str) -> relative_path::RelativePathBuf { let url = axum::http::Uri::try_from(url).expect("Couldn't parse a URL"); let mut path = relative_path::RelativePathBuf::new(); path.push(url.authority().unwrap().to_string() + url.path() + ".json"); path } fn modify_post(post: &serde_json::Value, update: &serde_json::Value) -> Result { let mut add_keys: HashMap> = HashMap::new(); let mut remove_keys: Vec = vec![]; let mut remove_values: HashMap> = HashMap::new(); let mut post = post.clone(); if let Some(delete) = update["delete"].as_array() { remove_keys.extend( delete .iter() .filter_map(|v| v.as_str()) .map(|v| v.to_string()), ); } else if let Some(delete) = update["delete"].as_object() { for (k, v) in delete { if let Some(v) = v.as_array() { remove_values .entry(k.to_string()) .or_default() .extend(v.clone()); } else { return Err(StorageError::new( ErrorKind::BadRequest, "Malformed update object", )); } } } if let Some(add) = update["add"].as_object() { for (k, v) in add { if let Some(v) = v.as_array() { add_keys.insert(k.to_string(), v.clone()); } else { return Err(StorageError::new( ErrorKind::BadRequest, "Malformed update object", )); } } } if let Some(replace) = update["replace"].as_object() { for (k, v) in replace { remove_keys.push(k.to_string()); if let Some(v) = v.as_array() { add_keys.insert(k.to_string(), v.clone()); } else { return Err(StorageError::new( ErrorKind::BadRequest, "Malformed update object", )); } } } if let Some(props) = post["properties"].as_object_mut() { for k in remove_keys { props.remove(&k); } } for (k, v) in remove_values { let k = &k; let props = if k == "children" { &mut post } else { &mut post["properties"] }; v.iter().for_each(|v| { if let Some(vec) = props[k].as_array_mut() { if let Some(index) = vec.iter().position(|w| w == v) { vec.remove(index); } } }); } for (k, v) in add_keys { let props = if k == "children" { &mut post } else { &mut post["properties"] }; let k = &k; if let Some(prop) = props[k].as_array_mut() { if k == "children" { v.into_iter().rev().for_each(|v| prop.insert(0, v)); } else { prop.extend(v.into_iter()); } } else { post["properties"][k] = serde_json::Value::Array(v) } } Ok(post) } #[derive(Clone, Debug)] /// A backend using a folder with JSON files as a backing store. /// Uses symbolic links to represent a many-to-one mapping of URLs to a post. pub struct FileStorage { root_dir: PathBuf, } impl FileStorage { /// Create a new storage wrapping a folder specified by root_dir. pub async fn new(root_dir: PathBuf) -> Result { // TODO check if the dir is writable Ok(Self { root_dir }) } } async fn hydrate_author( feed: &mut serde_json::Value, user: &'_ Option, storage: &S, ) { let url = feed["properties"]["uid"][0] .as_str() .expect("MF2 value should have a UID set! Check if you used normalize_mf2 before recording the post!"); if let Some(author) = feed["properties"]["author"].as_array().cloned() { if !feed["type"] .as_array() .expect("MF2 value should have a type set!") .iter() .any(|i| i == "h-card") { let author_list: Vec = stream::iter(author.iter()) .then(|i| async move { if let Some(i) = i.as_str() { match storage.get_post(i).await { Ok(post) => match post { Some(post) => match filter_post(post, user) { Some(author) => author, None => json!(i), }, None => json!(i), }, Err(e) => { log::error!("Error while hydrating post {}: {}", url, e); json!(i) } } } else { i.clone() } }) .collect::>() .await; if let Some(props) = feed["properties"].as_object_mut() { props["author"] = json!(author_list); } else { feed["properties"] = json!({ "author": author_list }); } } } } #[async_trait] impl Storage for FileStorage { #[tracing::instrument(skip(self))] async fn post_exists(&self, url: &str) -> Result { let path = url_to_path(&self.root_dir, url); debug!("Checking if {:?} exists...", path); /*let result = match tokio::fs::metadata(path).await { Ok(metadata) => { Ok(true) }, Err(err) => { if err.kind() == IOErrorKind::NotFound { Ok(false) } else { Err(err.into()) } } };*/ #[allow(clippy::unwrap_used)] // JoinHandle captures panics, this closure shouldn't panic Ok(spawn_blocking(move || path.is_file()).await.unwrap()) } #[tracing::instrument(skip(self))] async fn get_post(&self, url: &str) -> Result> { let path = url_to_path(&self.root_dir, url); // TODO: check that the path actually belongs to the dir of user who requested it // it's not like you CAN access someone else's private posts with it // so it's not exactly a security issue, but it's still not good debug!("Opening {:?}", path); match File::open(&path).await { Ok(mut file) => { let mut content = String::new(); // Typechecks because OS magic acts on references // to FDs as if they were behind a mutex AsyncReadExt::read_to_string(&mut file, &mut content).await?; debug!( "Read {} bytes successfully from {:?}", content.as_bytes().len(), &path ); Ok(Some(serde_json::from_str(&content)?)) } Err(err) => { if err.kind() == IOErrorKind::NotFound { Ok(None) } else { Err(err.into()) } } } } #[tracing::instrument(skip(self))] async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()> { let key = post["properties"]["uid"][0] .as_str() .expect("Tried to save a post without UID"); let path = url_to_path(&self.root_dir, key); let tempfile = (&path).with_extension("tmp"); debug!("Creating {:?}", path); let parent = path .parent() .expect("Parent for this directory should always exist") .to_owned(); if !parent.is_dir() { tokio::fs::create_dir_all(parent).await?; } let mut file = tokio::fs::OpenOptions::new() .write(true) .create_new(true) .open(&tempfile) .await?; file.write_all(post.to_string().as_bytes()).await?; file.flush().await?; drop(file); tokio::fs::rename(&tempfile, &path).await?; if let Some(urls) = post["properties"]["url"].as_array() { for url in urls.iter().map(|i| i.as_str().unwrap()) { if url != key && url.starts_with(user) { let link = url_to_path(&self.root_dir, url); debug!("Creating a symlink at {:?}", link); let orig = path.clone(); // We're supposed to have a parent here. let basedir = link.parent().ok_or_else(|| { StorageError::new( ErrorKind::Backend, "Failed to calculate parent directory when creating a symlink", ) })?; let relative = path_relative_from(&orig, basedir).unwrap(); println!("{:?} - {:?} = {:?}", &orig, &basedir, &relative); tokio::fs::symlink(relative, link).await?; } } } if post["type"] .as_array() .unwrap() .iter() .any(|s| s.as_str() == Some("h-feed")) { println!("Adding to channel list..."); // Add the h-feed to the channel list let mut path = relative_path::RelativePathBuf::new(); path.push( axum::http::Uri::try_from(user.to_string()) .unwrap() .authority() .unwrap() .to_string(), ); path.push("channels"); let path = path.to_path(&self.root_dir); let tempfilename = (&path).with_extension("tmp"); let channel_name = post["properties"]["name"][0] .as_str() .map(|s| s.to_string()) .unwrap_or_else(String::default); let key = key.to_string(); let mut tempfile = OpenOptions::new() .write(true) .create_new(true) .open(&tempfilename) .await?; let mut file = OpenOptions::new() .read(true) .write(true) .truncate(false) .create(true) .open(&path) .await?; let mut content = String::new(); file.read_to_string(&mut content).await?; drop(file); let mut channels: Vec = if !content.is_empty() { serde_json::from_str(&content)? } else { Vec::default() }; channels.push(super::MicropubChannel { uid: key.to_string(), name: channel_name, }); tempfile .write_all(serde_json::to_string(&channels)?.as_bytes()) .await?; tempfile.flush().await?; drop(tempfile); tokio::fs::rename(tempfilename, path).await?; } Ok(()) } #[tracing::instrument(skip(self))] async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()> { let path = url_to_path(&self.root_dir, url); let tempfilename = path.with_extension("tmp"); #[allow(unused_variables)] let (old_json, new_json) = { let mut temp = OpenOptions::new() .write(true) .create_new(true) .open(&tempfilename) .await?; let mut file = OpenOptions::new().read(true).open(&path).await?; let mut content = String::new(); file.read_to_string(&mut content).await?; let json: serde_json::Value = serde_json::from_str(&content)?; drop(file); // Apply the editing algorithms let new_json = modify_post(&json, &update)?; temp.write_all(new_json.to_string().as_bytes()).await?; temp.flush().await?; drop(temp); tokio::fs::rename(tempfilename, path).await?; (json, new_json) }; // TODO check if URLs changed between old and new JSON Ok(()) } #[tracing::instrument(skip(self))] async fn get_channels(&self, user: &'_ str) -> Result> { let mut path = relative_path::RelativePathBuf::new(); path.push( axum::http::Uri::try_from(user.to_string()) .unwrap() .authority() .unwrap() .to_string(), ); path.push("channels"); let path = path.to_path(&self.root_dir); match File::open(&path).await { Ok(mut f) => { let mut content = String::new(); f.read_to_string(&mut content).await?; // This should not happen, but if it does, handle it gracefully if content.is_empty() { return Ok(vec![]); } let channels: Vec = serde_json::from_str(&content)?; Ok(channels) } Err(e) => { if e.kind() == IOErrorKind::NotFound { Ok(vec![]) } else { Err(e.into()) } } } } #[tracing::instrument(skip(self))] async fn read_feed_with_limit( &self, url: &'_ str, after: &'_ Option, limit: usize, user: &'_ Option, ) -> Result> { if let Some(feed) = self.get_post(url).await? { if let Some(mut feed) = filter_post(feed, user) { if feed["children"].is_array() { // Take this out of the MF2-JSON document to save memory // // This uses a clever match with enum destructuring // to extract the underlying Vec without cloning it let children: Vec = match feed["children"].take() { serde_json::Value::Array(children) => children, // We've already checked it's an array _ => unreachable!() }; let mut posts_iter = children .into_iter() .map(|s: serde_json::Value| s.as_str().unwrap().to_string()); // Note: we can't actually use `skip_while` here because we end up emitting `after`. // This imperative snippet consumes after instead of emitting it, allowing the // stream of posts to return only those items that truly come *after* that one. // If I would implement an Iter combinator like this, I would call it `skip_until` if let Some(after) = after { for s in posts_iter.by_ref() { if &s == after { break; } } }; let posts = stream::iter(posts_iter) .map(|url: String| async move { self.get_post(&url).await }) .buffered(std::cmp::min(3, limit)) // Hack to unwrap the Option and sieve out broken links // Broken links return None, and Stream::filter_map skips Nones. .try_filter_map(|post: Option| async move { Ok(post) }) .try_filter_map(|post| async move { Ok(filter_post(post, user)) }) .and_then(|mut post| async move { hydrate_author(&mut post, user, self).await; Ok(post) }) .take(limit); match posts.try_collect::>().await { Ok(posts) => feed["children"] = serde_json::json!(posts), Err(err) => { return Err(StorageError::with_source( ErrorKind::Other, "Feed assembly error", Box::new(err), )); } } } hydrate_author(&mut feed, user, self).await; Ok(Some(feed)) } else { Err(StorageError::new( ErrorKind::PermissionDenied, "specified user cannot access this post", )) } } else { Ok(None) } } #[tracing::instrument(skip(self))] async fn delete_post(&self, url: &'_ str) -> Result<()> { let path = url_to_path(&self.root_dir, url); if let Err(e) = tokio::fs::remove_file(path).await { Err(e.into()) } else { // TODO check for dangling references in the channel list Ok(()) } } #[tracing::instrument(skip(self))] async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result { log::debug!("User for getting settings: {}", user); let url = axum::http::Uri::try_from(user).expect("Couldn't parse a URL"); let mut path = relative_path::RelativePathBuf::new(); path.push(url.authority().unwrap().to_string()); path.push("settings"); let path = path.to_path(&self.root_dir); log::debug!("Getting settings from {:?}", &path); let setting = setting.to_string(); let mut file = File::open(path).await?; let mut content = String::new(); file.read_to_string(&mut content).await?; let settings: HashMap = serde_json::from_str(&content)?; // XXX consider returning string slices instead of cloning a string every time // it might come with a performance hit and/or memory usage inflation settings .get(&setting) .cloned() .ok_or_else(|| StorageError::new(ErrorKind::Backend, "Setting not set")) } #[tracing::instrument(skip(self))] async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()> { let url = axum::http::Uri::try_from(user).expect("Couldn't parse a URL"); let mut path = relative_path::RelativePathBuf::new(); path.push(url.authority().unwrap().to_string()); path.push("settings"); let path = path.to_path(&self.root_dir); let temppath = path.with_extension("tmp"); let parent = path.parent().unwrap().to_owned(); if !spawn_blocking(move || parent.is_dir()).await.unwrap() { tokio::fs::create_dir_all(path.parent().unwrap()).await?; } let (setting, value) = (setting.to_string(), value.to_string()); let mut tempfile = OpenOptions::new() .write(true) .create_new(true) .open(&temppath) .await?; let mut settings: HashMap = match File::open(&path).await { Ok(mut f) => { let mut content = String::new(); f.read_to_string(&mut content).await?; if content.is_empty() { HashMap::default() } else { serde_json::from_str(&content)? } } Err(err) => { if err.kind() == IOErrorKind::NotFound { HashMap::default() } else { return Err(err.into()); } } }; settings.insert(setting, value); tempfile .write_all(serde_json::to_string(&settings)?.as_bytes()) .await?; drop(tempfile); tokio::fs::rename(temppath, path).await?; Ok(()) } }