diff options
Diffstat (limited to 'src/database/file/mod.rs')
-rw-r--r-- | src/database/file/mod.rs | 619 |
1 files changed, 0 insertions, 619 deletions
diff --git a/src/database/file/mod.rs b/src/database/file/mod.rs deleted file mode 100644 index 1e7aa96..0000000 --- a/src/database/file/mod.rs +++ /dev/null @@ -1,619 +0,0 @@ -//#![warn(clippy::unwrap_used)] -use crate::database::{filter_post, ErrorKind, Result, Storage, StorageError, Settings}; -use std::io::ErrorKind as IOErrorKind; -use tokio::fs::{File, OpenOptions}; -use tokio::io::{AsyncReadExt, AsyncWriteExt}; -use tokio::task::spawn_blocking; -use async_trait::async_trait; -use futures::{stream, StreamExt, TryStreamExt}; -use log::debug; -use serde_json::json; -use std::collections::HashMap; -use std::path::{Path, PathBuf}; - -impl From<std::io::Error> for StorageError { - fn from(source: std::io::Error) -> Self { - Self::with_source( - match source.kind() { - IOErrorKind::NotFound => ErrorKind::NotFound, - IOErrorKind::AlreadyExists => ErrorKind::Conflict, - _ => ErrorKind::Backend, - }, - "file I/O error", - Box::new(source), - ) - } -} - -impl From<tokio::time::error::Elapsed> for StorageError { - fn from(source: tokio::time::error::Elapsed) -> Self { - Self::with_source( - ErrorKind::Backend, - "timeout on I/O operation", - Box::new(source) - ) - } -} - -// Copied from https://stackoverflow.com/questions/39340924 -// This routine is adapted from the *old* Path's `path_relative_from` -// function, which works differently from the new `relative_from` function. -// In particular, this handles the case on unix where both paths are -// absolute but with only the root as the common directory. -fn path_relative_from(path: &Path, base: &Path) -> Option<PathBuf> { - use std::path::Component; - - if path.is_absolute() != base.is_absolute() { - if path.is_absolute() { - Some(PathBuf::from(path)) - } else { - None - } - } else { - let mut ita = path.components(); - let mut itb = base.components(); - let mut comps: Vec<Component> = vec![]; - loop { - match (ita.next(), itb.next()) { - (None, None) => break, - (Some(a), None) => { - comps.push(a); - comps.extend(ita.by_ref()); - break; - } - (None, _) => comps.push(Component::ParentDir), - (Some(a), Some(b)) if comps.is_empty() && a == b => (), - (Some(a), Some(b)) if b == Component::CurDir => comps.push(a), - (Some(_), Some(b)) if b == Component::ParentDir => return None, - (Some(a), Some(_)) => { - comps.push(Component::ParentDir); - for _ in itb { - comps.push(Component::ParentDir); - } - comps.push(a); - comps.extend(ita.by_ref()); - break; - } - } - } - Some(comps.iter().map(|c| c.as_os_str()).collect()) - } -} - -#[allow(clippy::unwrap_used, clippy::expect_used)] -#[cfg(test)] -mod tests { - #[test] - fn test_relative_path_resolving() { - let path1 = std::path::Path::new("/home/vika/Projects/kittybox"); - let path2 = std::path::Path::new("/home/vika/Projects/nixpkgs"); - let relative_path = super::path_relative_from(path2, path1).unwrap(); - - assert_eq!(relative_path, std::path::Path::new("../nixpkgs")) - } -} - -// TODO: Check that the path ACTUALLY IS INSIDE THE ROOT FOLDER -// This could be checked by completely resolving the path -// and checking if it has a common prefix -fn url_to_path(root: &Path, url: &str) -> PathBuf { - let path = url_to_relative_path(url).to_logical_path(root); - if !path.starts_with(root) { - // TODO: handle more gracefully - panic!("Security error: {:?} is not a prefix of {:?}", path, root) - } else { - path - } -} - -fn url_to_relative_path(url: &str) -> relative_path::RelativePathBuf { - let url = warp::http::Uri::try_from(url).expect("Couldn't parse a URL"); - let mut path = relative_path::RelativePathBuf::new(); - path.push(url.authority().unwrap().to_string() + url.path() + ".json"); - - path -} - -fn modify_post(post: &serde_json::Value, update: &serde_json::Value) -> Result<serde_json::Value> { - let mut add_keys: HashMap<String, Vec<serde_json::Value>> = HashMap::new(); - let mut remove_keys: Vec<String> = vec![]; - let mut remove_values: HashMap<String, Vec<serde_json::Value>> = HashMap::new(); - let mut post = post.clone(); - - if let Some(delete) = update["delete"].as_array() { - remove_keys.extend( - delete - .iter() - .filter_map(|v| v.as_str()) - .map(|v| v.to_string()), - ); - } else if let Some(delete) = update["delete"].as_object() { - for (k, v) in delete { - if let Some(v) = v.as_array() { - remove_values - .entry(k.to_string()) - .or_default() - .extend(v.clone()); - } else { - return Err(StorageError::new( - ErrorKind::BadRequest, - "Malformed update object", - )); - } - } - } - if let Some(add) = update["add"].as_object() { - for (k, v) in add { - if let Some(v) = v.as_array() { - add_keys.insert(k.to_string(), v.clone()); - } else { - return Err(StorageError::new( - ErrorKind::BadRequest, - "Malformed update object", - )); - } - } - } - if let Some(replace) = update["replace"].as_object() { - for (k, v) in replace { - remove_keys.push(k.to_string()); - if let Some(v) = v.as_array() { - add_keys.insert(k.to_string(), v.clone()); - } else { - return Err(StorageError::new(ErrorKind::BadRequest, "Malformed update object")); - } - } - } - - if let Some(props) = post["properties"].as_object_mut() { - for k in remove_keys { - props.remove(&k); - } - } - for (k, v) in remove_values { - let k = &k; - let props = if k == "children" { - &mut post - } else { - &mut post["properties"] - }; - v.iter().for_each(|v| { - if let Some(vec) = props[k].as_array_mut() { - if let Some(index) = vec.iter().position(|w| w == v) { - vec.remove(index); - } - } - }); - } - for (k, v) in add_keys { - let props = if k == "children" { - &mut post - } else { - &mut post["properties"] - }; - let k = &k; - if let Some(prop) = props[k].as_array_mut() { - if k == "children" { - v.into_iter() - .rev() - .for_each(|v| prop.insert(0, v)); - } else { - prop.extend(v.into_iter()); - } - } else { - post["properties"][k] = serde_json::Value::Array(v) - } - } - Ok(post) -} - -#[derive(Clone, Debug)] -/// A backend using a folder with JSON files as a backing store. -/// Uses symbolic links to represent a many-to-one mapping of URLs to a post. -pub struct FileStorage { - root_dir: PathBuf, -} - -impl FileStorage { - /// Create a new storage wrapping a folder specified by root_dir. - pub async fn new(root_dir: PathBuf) -> Result<Self> { - // TODO check if the dir is writable - Ok(Self { root_dir }) - } -} - -async fn hydrate_author<S: Storage>( - feed: &mut serde_json::Value, - user: &'_ Option<String>, - storage: &S, -) { - let url = feed["properties"]["uid"][0] - .as_str() - .expect("MF2 value should have a UID set! Check if you used normalize_mf2 before recording the post!"); - if let Some(author) = feed["properties"]["author"].as_array().cloned() { - if !feed["type"] - .as_array() - .expect("MF2 value should have a type set!") - .iter() - .any(|i| i == "h-card") - { - let author_list: Vec<serde_json::Value> = stream::iter(author.iter()) - .then(|i| async move { - if let Some(i) = i.as_str() { - match storage.get_post(i).await { - Ok(post) => match post { - Some(post) => match filter_post(post, user) { - Some(author) => author, - None => json!(i), - }, - None => json!(i), - }, - Err(e) => { - log::error!("Error while hydrating post {}: {}", url, e); - json!(i) - } - } - } else { - i.clone() - } - }) - .collect::<Vec<_>>() - .await; - if let Some(props) = feed["properties"].as_object_mut() { - props["author"] = json!(author_list); - } else { - feed["properties"] = json!({"author": author_list}); - } - } - } -} - -#[async_trait] -impl Storage for FileStorage { - async fn post_exists(&self, url: &str) -> Result<bool> { - let path = url_to_path(&self.root_dir, url); - debug!("Checking if {:?} exists...", path); - /*let result = match tokio::fs::metadata(path).await { - Ok(metadata) => { - Ok(true) - }, - Err(err) => { - if err.kind() == IOErrorKind::NotFound { - Ok(false) - } else { - Err(err.into()) - } - } - };*/ - #[allow(clippy::unwrap_used)] // JoinHandle captures panics, this closure shouldn't panic - Ok(spawn_blocking(move || path.is_file()).await.unwrap()) - } - - async fn get_post(&self, url: &str) -> Result<Option<serde_json::Value>> { - let path = url_to_path(&self.root_dir, url); - // TODO: check that the path actually belongs to the dir of user who requested it - // it's not like you CAN access someone else's private posts with it - // so it's not exactly a security issue, but it's still not good - debug!("Opening {:?}", path); - - match File::open(&path).await { - Ok(mut file) => { - let mut content = String::new(); - // Typechecks because OS magic acts on references - // to FDs as if they were behind a mutex - AsyncReadExt::read_to_string(&mut file, &mut content).await?; - debug!("Read {} bytes successfully from {:?}", content.as_bytes().len(), &path); - Ok(Some(serde_json::from_str(&content)?)) - }, - Err(err) => { - if err.kind() == IOErrorKind::NotFound { - Ok(None) - } else { - Err(err.into()) - } - } - } - } - - async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()> { - let key = post["properties"]["uid"][0] - .as_str() - .expect("Tried to save a post without UID"); - let path = url_to_path(&self.root_dir, key); - let tempfile = (&path).with_extension("tmp"); - debug!("Creating {:?}", path); - - let parent = path.parent().expect("Parent for this directory should always exist").to_owned(); - if !parent.is_dir() { - tokio::fs::create_dir_all(parent).await?; - } - - let mut file = tokio::fs::OpenOptions::new() - .write(true) - .create_new(true) - .open(&tempfile).await?; - - file.write_all(post.to_string().as_bytes()).await?; - file.flush().await?; - drop(file); - tokio::fs::rename(&tempfile, &path).await?; - - if let Some(urls) = post["properties"]["url"].as_array() { - for url in urls - .iter() - .map(|i| i.as_str().unwrap()) - { - if url != key && url.starts_with(user) { - let link = url_to_path(&self.root_dir, url); - debug!("Creating a symlink at {:?}", link); - let orig = path.clone(); - // We're supposed to have a parent here. - let basedir = link.parent().ok_or_else(|| { - StorageError::new( - ErrorKind::Backend, - "Failed to calculate parent directory when creating a symlink", - ) - })?; - let relative = path_relative_from(&orig, basedir).unwrap(); - println!("{:?} - {:?} = {:?}", &orig, &basedir, &relative); - tokio::fs::symlink(relative, link).await?; - } - } - } - - if post["type"] - .as_array() - .unwrap() - .iter() - .any(|s| s.as_str() == Some("h-feed")) - { - println!("Adding to channel list..."); - // Add the h-feed to the channel list - let mut path = relative_path::RelativePathBuf::new(); - path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); - path.push("channels"); - - let path = path.to_path(&self.root_dir); - let tempfilename = (&path).with_extension("tmp"); - let channel_name = post["properties"]["name"][0] - .as_str() - .map(|s| s.to_string()) - .unwrap_or_else(String::default); - let key = key.to_string(); - - let mut tempfile = OpenOptions::new() - .write(true) - .create_new(true) - .open(&tempfilename).await?; - let mut file = OpenOptions::new() - .read(true) - .write(true) - .truncate(false) - .create(true) - .open(&path).await?; - - let mut content = String::new(); - file.read_to_string(&mut content).await?; - drop(file); - let mut channels: Vec<super::MicropubChannel> = if !content.is_empty() { - serde_json::from_str(&content)? - } else { - Vec::default() - }; - - channels.push(super::MicropubChannel { - uid: key.to_string(), - name: channel_name, - }); - - tempfile.write_all(serde_json::to_string(&channels)?.as_bytes()).await?; - tempfile.flush().await?; - drop(tempfile); - tokio::fs::rename(tempfilename, path).await?; - } - Ok(()) - } - - async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()> { - let path = url_to_path(&self.root_dir, url); - let tempfilename = path.with_extension("tmp"); - #[allow(unused_variables)] - let (old_json, new_json) = { - let mut temp = OpenOptions::new() - .write(true) - .create_new(true) - .open(&tempfilename) - .await?; - let mut file = OpenOptions::new() - .read(true) - .open(&path) - .await?; - - let mut content = String::new(); - file.read_to_string(&mut content).await?; - let json: serde_json::Value = serde_json::from_str(&content)?; - drop(file); - // Apply the editing algorithms - let new_json = modify_post(&json, &update)?; - - temp.write_all(new_json.to_string().as_bytes()).await?; - temp.flush().await?; - drop(temp); - tokio::fs::rename(tempfilename, path).await?; - - (json, new_json) - }; - // TODO check if URLs changed between old and new JSON - Ok(()) - } - - async fn get_channels(&self, user: &'_ str) -> Result<Vec<super::MicropubChannel>> { - let mut path = relative_path::RelativePathBuf::new(); - path.push(warp::http::Uri::try_from(user.to_string()).unwrap().authority().unwrap().to_string()); - path.push("channels"); - - let path = path.to_path(&self.root_dir); - match File::open(&path).await { - Ok(mut f) => { - let mut content = String::new(); - f.read_to_string(&mut content).await?; - // This should not happen, but if it does, handle it gracefully - if content.is_empty() { - return Ok(vec![]); - } - let channels: Vec<super::MicropubChannel> = serde_json::from_str(&content)?; - Ok(channels) - } - Err(e) => { - if e.kind() == IOErrorKind::NotFound { - Ok(vec![]) - } else { - Err(e.into()) - } - } - } - } - - async fn read_feed_with_limit( - &self, - url: &'_ str, - after: &'_ Option<String>, - limit: usize, - user: &'_ Option<String>, - ) -> Result<Option<serde_json::Value>> { - if let Some(feed) = self.get_post(url).await? { - if let Some(mut feed) = filter_post(feed, user) { - if feed["children"].is_array() { - // This code contains several clones. It looks - // like the borrow checker thinks it is preventing - // me from doing something incredibly stupid. The - // borrow checker may or may not be right. - let children = feed["children"].as_array().unwrap().clone(); - let mut posts_iter = children - .into_iter() - .map(|s: serde_json::Value| s.as_str().unwrap().to_string()); - // Note: we can't actually use skip_while here because we end up emitting `after`. - // This imperative snippet consumes after instead of emitting it, allowing the - // stream of posts to return only those items that truly come *after* - if let Some(after) = after { - for s in posts_iter.by_ref() { - if &s == after { - break - } - } - }; - let posts = stream::iter(posts_iter) - .map(|url: String| async move { self.get_post(&url).await }) - .buffered(std::cmp::min(3, limit)) - // Hack to unwrap the Option and sieve out broken links - // Broken links return None, and Stream::filter_map skips Nones. - .try_filter_map(|post: Option<serde_json::Value>| async move { Ok(post) }) - .try_filter_map(|post| async move { Ok(filter_post(post, user)) }) - .and_then(|mut post| async move { - hydrate_author(&mut post, user, self).await; - Ok(post) - }) - .take(limit); - - match posts.try_collect::<Vec<serde_json::Value>>().await { - Ok(posts) => feed["children"] = serde_json::json!(posts), - Err(err) => { - return Err(StorageError::with_source( - ErrorKind::Other, - "Feed assembly error", - Box::new(err), - )); - } - } - } - hydrate_author(&mut feed, user, self).await; - Ok(Some(feed)) - } else { - Err(StorageError::new( - ErrorKind::PermissionDenied, - "specified user cannot access this post", - )) - } - } else { - Ok(None) - } - } - - async fn delete_post(&self, url: &'_ str) -> Result<()> { - let path = url_to_path(&self.root_dir, url); - if let Err(e) = tokio::fs::remove_file(path).await { - Err(e.into()) - } else { - // TODO check for dangling references in the channel list - Ok(()) - } - } - - async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result<String> { - log::debug!("User for getting settings: {}", user); - let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); - let mut path = relative_path::RelativePathBuf::new(); - path.push(url.authority().unwrap().to_string()); - path.push("settings"); - - let path = path.to_path(&self.root_dir); - log::debug!("Getting settings from {:?}", &path); - let setting = setting.to_string(); - let mut file = File::open(path).await?; - let mut content = String::new(); - file.read_to_string(&mut content).await?; - - let settings: HashMap<String, String> = serde_json::from_str(&content)?; - // XXX consider returning string slices instead of cloning a string every time - // it might come with a performance hit and/or memory usage inflation - settings - .get(&setting) - .cloned() - .ok_or_else(|| StorageError::new(ErrorKind::Backend, "Setting not set")) - } - - async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()> { - let url = warp::http::Uri::try_from(user).expect("Couldn't parse a URL"); - let mut path = relative_path::RelativePathBuf::new(); - path.push(url.authority().unwrap().to_string()); - path.push("settings"); - - let path = path.to_path(&self.root_dir); - let temppath = path.with_extension("tmp"); - - let parent = path.parent().unwrap().to_owned(); - if !spawn_blocking(move || parent.is_dir()).await.unwrap() { - tokio::fs::create_dir_all(path.parent().unwrap()).await?; - } - - let (setting, value) = (setting.to_string(), value.to_string()); - - let mut tempfile = OpenOptions::new() - .write(true) - .create_new(true) - .open(&temppath) - .await?; - - let mut settings: HashMap<String, String> = match File::open(&path).await { - Ok(mut f) => { - let mut content = String::new(); - f.read_to_string(&mut content).await?; - if content.is_empty() { - HashMap::default() - } else { - serde_json::from_str(&content)? - } - } - Err(err) => if err.kind() == IOErrorKind::NotFound { - HashMap::default() - } else { - return Err(err.into()) - } - }; - settings.insert(setting, value); - tempfile.write_all(serde_json::to_string(&settings)?.as_bytes()).await?; - drop(tempfile); - tokio::fs::rename(temppath, path).await?; - Ok(()) - } -} |