about summary refs log blame commit diff
path: root/kittybox-rs/src/database/mod.rs
blob: bd25d8d39fb5417f32bb71e0bd1f782b9f932b1e (plain) (tree)
1
2
3
4
5
6
7
8
                      
                                    
 
                                           
            
           

                                               
                                       
 
                                                                                  
                             
                                                      
            
                                                                      
                     
                                                                                                        
                
                                                                                                 
             
                                                                                                         
               
                                                                                   
                                                                                            
          
 
                                                                           
                                                     










                                                                                    
                                      
                         
                
                                                             
                    
 
 
                                                                    

                                                          





                                               
                                         



                                                                        












                                                                                             

                                        


                                               




                                                                 
              
                                 
                 
         
                                                                        



                                                         

                                 
                 
         


                                     
                                          

                               
 

                                                           







                                                                      




































                                                                              

                                                                                  







                                    
                                              
   

                                                                                               
                                                          





                                                                                 
       
                                                                                     
                                                                                       
 
                                                                             
       



                                                                      
                                                                                       
 
                                                                                                
                                                                                


                                                                        
       


                                                                               
       

                                                                               
       


                                                                            
                                  
              
                                  
                     
                                 
                                           
                                                                                      
                                                            
                                                                    
                                                                                    
                                               
                                                                                                


            
                                          
                         
 
                                                                        











                                                                                            


                                                    
                                                                            
                                               








                                                                    
                                     


                                                                         
             

                                                                          

                                                                           








                                                                    
                                     


                                                                         
             

                                                                          
     
                                                                          
                                                                      
                                                              















                                                                                            













                                                             
 

















                                                                                
             
                               

                                                    
             

         
                                                                        







                                                         


                                                    
                                                                                
                                   





                                                                  
     
                                                                
               



                                                    


                      
                                                                                         


                            
     
 



                                                           

                                   




































                                                                               


                                                       


                                                                        
                                                           







                                                                                    










                                                                                               


                                                                                     


                                                           


                                                                                             






                                                                                                  











                                                                    
          
     
                            
                               
                                   
                                                                                               

                                                                                 
                                                
             
          
     
                               
 
#![warn(missing_docs)]
use async_trait::async_trait;
use serde::{Deserialize, Serialize};

mod file;
pub use crate::database::file::FileStorage;
#[cfg(test)]
mod memory;
#[cfg(test)]
pub use crate::database::memory::MemoryStorage;

pub use kittybox_util::MicropubChannel;

/// Enum representing different errors that might occur during the database query.
#[derive(Debug, Clone, Copy)]
pub enum ErrorKind {
    /// Backend error (e.g. database connection error)
    Backend,
    /// Error due to insufficient contextual permissions for the query
    PermissionDenied,
    /// Error due to the database being unable to parse JSON returned from the backing storage.
    /// Usually indicative of someone fiddling with the database manually instead of using proper tools.
    JsonParsing,
    ///  - ErrorKind::NotFound - equivalent to a 404 error. Note, some requests return an Option,
    ///    in which case None is also equivalent to a 404.
    NotFound,
    /// The user's query or request to the database was malformed. Used whenever the database processes
    /// the user's query directly, such as when editing posts inside of the database (e.g. Redis backend)
    BadRequest,
    /// the user's query collided with an in-flight request and needs to be retried
    Conflict,
    ///  - ErrorKind::Other - when something so weird happens that it becomes undescribable.
    Other,
}

/// Enum representing settings that might be stored in the site's database.
#[derive(Deserialize, Serialize, Debug, Clone, Copy)]
#[serde(rename_all = "snake_case")]
pub enum Settings {
    /// The name of the website -- displayed in the header and the browser titlebar.
    SiteName,
}

impl std::string::ToString for Settings {
    fn to_string(&self) -> String {
        serde_variant::to_variant_name(self).unwrap().to_string()
    }
}

/// Error signalled from the database.
#[derive(Debug)]
pub struct StorageError {
    msg: String,
    source: Option<Box<dyn std::error::Error + Send + Sync>>,
    kind: ErrorKind,
}

impl std::error::Error for StorageError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        self.source
            .as_ref()
            .map(|e| e.as_ref() as &dyn std::error::Error)
    }
}
impl From<serde_json::Error> for StorageError {
    fn from(err: serde_json::Error) -> Self {
        Self {
            msg: format!("{}", err),
            source: Some(Box::new(err)),
            kind: ErrorKind::JsonParsing,
        }
    }
}
impl std::fmt::Display for StorageError {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "{}: {}",
            match self.kind {
                ErrorKind::Backend => "backend error",
                ErrorKind::JsonParsing => "JSON parsing error",
                ErrorKind::PermissionDenied => "permission denied",
                ErrorKind::NotFound => "not found",
                ErrorKind::BadRequest => "bad request",
                ErrorKind::Conflict => "conflict with an in-flight request or existing data",
                ErrorKind::Other => "generic storage layer error",
            },
            self.msg
        )
    }
}
impl serde::Serialize for StorageError {
    fn serialize<S: serde::Serializer>(
        &self,
        serializer: S,
    ) -> std::result::Result<S::Ok, S::Error> {
        serializer.serialize_str(&self.to_string())
    }
}
impl StorageError {
    /// Create a new StorageError of an ErrorKind with a message.
    fn new(kind: ErrorKind, msg: &str) -> Self {
        Self {
            msg: msg.to_string(),
            source: None,
            kind,
        }
    }
    /// Create a StorageError using another arbitrary Error as a source.
    fn with_source(
        kind: ErrorKind,
        msg: &str,
        source: Box<dyn std::error::Error + Send + Sync>,
    ) -> Self {
        Self {
            msg: msg.to_string(),
            source: Some(source),
            kind,
        }
    }
    /// Get the kind of an error.
    pub fn kind(&self) -> ErrorKind {
        self.kind
    }
    /// Get the message as a string slice.
    pub fn msg(&self) -> &str {
        &self.msg
    }
}

/// A special Result type for the Micropub backing storage.
pub type Result<T> = std::result::Result<T, StorageError>;

/// Filter the post according to the value of `user`.
///
/// Anonymous users cannot view private posts and protected locations;
/// Logged-in users can only view private posts targeted at them;
/// Logged-in users can't view private location data
pub fn filter_post(
    mut post: serde_json::Value,
    user: &'_ Option<String>,
) -> Option<serde_json::Value> {
    if post["properties"]["deleted"][0].is_string() {
        return Some(serde_json::json!({
            "type": post["type"],
            "properties": {
                "deleted": post["properties"]["deleted"]
            }
        }));
    }
    let empty_vec: Vec<serde_json::Value> = vec![];
    let author = post["properties"]["author"]
        .as_array()
        .unwrap_or(&empty_vec)
        .iter()
        .map(|i| i.as_str().unwrap().to_string());
    let visibility = post["properties"]["visibility"][0]
        .as_str()
        .unwrap_or("public");
    let mut audience = author.chain(
        post["properties"]["audience"]
            .as_array()
            .unwrap_or(&empty_vec)
            .iter()
            .map(|i| i.as_str().unwrap().to_string()),
    );
    if (visibility == "private" && !audience.any(|i| Some(i) == *user))
        || (visibility == "protected" && user.is_none())
    {
        return None;
    }
    if post["properties"]["location"].is_array() {
        let location_visibility = post["properties"]["location-visibility"][0]
            .as_str()
            .unwrap_or("private");
        let mut author = post["properties"]["author"]
            .as_array()
            .unwrap_or(&empty_vec)
            .iter()
            .map(|i| i.as_str().unwrap().to_string());
        if (location_visibility == "private" && !author.any(|i| Some(i) == *user))
            || (location_visibility == "protected" && user.is_none())
        {
            post["properties"]
                .as_object_mut()
                .unwrap()
                .remove("location");
        }
    }
    Some(post)
}

/// A storage backend for the Micropub server.
///
/// Implementations should note that all methods listed on this trait MUST be fully atomic
/// or lock the database so that write conflicts or reading half-written data should not occur.
#[async_trait]
pub trait Storage: std::fmt::Debug + Clone + Send + Sync {
    /// Check if a post exists in the database.
    async fn post_exists(&self, url: &str) -> Result<bool>;

    /// Load a post from the database in MF2-JSON format, deserialized from JSON.
    async fn get_post(&self, url: &str) -> Result<Option<serde_json::Value>>;

    /// Save a post to the database as an MF2-JSON structure.
    ///
    /// Note that the `post` object MUST have `post["properties"]["uid"][0]` defined.
    async fn put_post(&self, post: &'_ serde_json::Value, user: &'_ str) -> Result<()>;

    /// Modify a post using an update object as defined in the Micropub spec.
    ///
    /// Note to implementors: the update operation MUST be atomic and
    /// SHOULD lock the database to prevent two clients overwriting
    /// each other's changes or simply corrupting something. Rejecting
    /// is allowed in case of concurrent updates if waiting for a lock
    /// cannot be done.
    async fn update_post(&self, url: &'_ str, update: serde_json::Value) -> Result<()>;

    /// Get a list of channels available for the user represented by the URL `user` to write to.
    async fn get_channels(&self, user: &'_ str) -> Result<Vec<MicropubChannel>>;

    /// Fetch a feed at `url` and return a an h-feed object containing
    /// `limit` posts after a post by url `after`, filtering the content
    /// in context of a user specified by `user` (or an anonymous user).
    ///
    /// Specifically, private posts that don't include the user in the audience
    /// will be elided from the feed, and the posts containing location and not
    /// specifying post["properties"]["location-visibility"][0] == "public"
    /// will have their location data (but not check-in data) stripped.
    ///
    /// This function is used as an optimization so the client, whatever it is,
    /// doesn't have to fetch posts, then realize some of them are private, and
    /// fetch more posts.
    ///
    /// Note for implementors: if you use streams to fetch posts in parallel
    /// from the database, preferably make this method use a connection pool
    /// to reduce overhead of creating a database connection per post for
    /// parallel fetching.
    async fn read_feed_with_limit(
        &self,
        url: &'_ str,
        after: &'_ Option<String>,
        limit: usize,
        user: &'_ Option<String>,
    ) -> Result<Option<serde_json::Value>>;

    /// Deletes a post from the database irreversibly. 'nuff said. Must be idempotent.
    async fn delete_post(&self, url: &'_ str) -> Result<()>;

    /// Gets a setting from the setting store and passes the result.
    async fn get_setting(&self, setting: Settings, user: &'_ str) -> Result<String>;

    /// Commits a setting to the setting store.
    async fn set_setting(&self, setting: Settings, user: &'_ str, value: &'_ str) -> Result<()>;
}

#[cfg(test)]
mod tests {
    use super::{MicropubChannel, Storage};
    use serde_json::json;

    async fn test_basic_operations<Backend: Storage>(backend: Backend) {
        let post: serde_json::Value = json!({
            "type": ["h-entry"],
            "properties": {
                "content": ["Test content"],
                "author": ["https://fireburn.ru/"],
                "uid": ["https://fireburn.ru/posts/hello"],
                "url": ["https://fireburn.ru/posts/hello", "https://fireburn.ru/posts/test"]
            }
        });
        let key = post["properties"]["uid"][0].as_str().unwrap().to_string();
        let alt_url = post["properties"]["url"][1].as_str().unwrap().to_string();

        // Reading and writing
        backend
            .put_post(&post, "https://fireburn.ru/")
            .await
            .unwrap();
        if let Some(returned_post) = backend.get_post(&key).await.unwrap() {
            assert!(returned_post.is_object());
            assert_eq!(
                returned_post["type"].as_array().unwrap().len(),
                post["type"].as_array().unwrap().len()
            );
            assert_eq!(
                returned_post["type"].as_array().unwrap(),
                post["type"].as_array().unwrap()
            );
            let props: &serde_json::Map<String, serde_json::Value> =
                post["properties"].as_object().unwrap();
            for key in props.keys() {
                assert_eq!(
                    returned_post["properties"][key].as_array().unwrap(),
                    post["properties"][key].as_array().unwrap()
                )
            }
        } else {
            panic!("For some reason the backend did not return the post.")
        }
        // Check the alternative URL - it should return the same post
        if let Ok(Some(returned_post)) = backend.get_post(&alt_url).await {
            assert!(returned_post.is_object());
            assert_eq!(
                returned_post["type"].as_array().unwrap().len(),
                post["type"].as_array().unwrap().len()
            );
            assert_eq!(
                returned_post["type"].as_array().unwrap(),
                post["type"].as_array().unwrap()
            );
            let props: &serde_json::Map<String, serde_json::Value> =
                post["properties"].as_object().unwrap();
            for key in props.keys() {
                assert_eq!(
                    returned_post["properties"][key].as_array().unwrap(),
                    post["properties"][key].as_array().unwrap()
                )
            }
        } else {
            panic!("For some reason the backend did not return the post.")
        }
    }

    /// Note: this is merely a smoke check and is in no way comprehensive.
    // TODO updates for feeds must update children using special logic
    async fn test_update<Backend: Storage>(backend: Backend) {
        let post: serde_json::Value = json!({
            "type": ["h-entry"],
            "properties": {
                "content": ["Test content"],
                "author": ["https://fireburn.ru/"],
                "uid": ["https://fireburn.ru/posts/hello"],
                "url": ["https://fireburn.ru/posts/hello", "https://fireburn.ru/posts/test"]
            }
        });
        let key = post["properties"]["uid"][0].as_str().unwrap().to_string();

        // Reading and writing
        backend
            .put_post(&post, "https://fireburn.ru/")
            .await
            .unwrap();

        backend
            .update_post(
                &key,
                json!({
                    "url": &key,
                    "add": {
                        "category": ["testing"],
                    },
                    "replace": {
                        "content": ["Different test content"]
                    }
                }),
            )
            .await
            .unwrap();

        match backend.get_post(&key).await {
            Ok(Some(returned_post)) => {
                assert!(returned_post.is_object());
                assert_eq!(
                    returned_post["type"].as_array().unwrap().len(),
                    post["type"].as_array().unwrap().len()
                );
                assert_eq!(
                    returned_post["type"].as_array().unwrap(),
                    post["type"].as_array().unwrap()
                );
                assert_eq!(
                    returned_post["properties"]["content"][0].as_str().unwrap(),
                    "Different test content"
                );
                assert_eq!(
                    returned_post["properties"]["category"].as_array().unwrap(),
                    &vec![json!("testing")]
                );
            }
            something_else => {
                something_else
                    .expect("Shouldn't error")
                    .expect("Should have the post");
            }
        }
    }

    async fn test_get_channel_list<Backend: Storage>(backend: Backend) {
        let feed = json!({
            "type": ["h-feed"],
            "properties": {
                "name": ["Main Page"],
                "author": ["https://fireburn.ru/"],
                "uid": ["https://fireburn.ru/feeds/main"]
            },
            "children": []
        });
        backend
            .put_post(&feed, "https://fireburn.ru/")
            .await
            .unwrap();
        let chans = backend.get_channels("https://fireburn.ru/").await.unwrap();
        assert_eq!(chans.len(), 1);
        assert_eq!(
            chans[0],
            MicropubChannel {
                uid: "https://fireburn.ru/feeds/main".to_string(),
                name: "Main Page".to_string()
            }
        );
    }

    async fn test_settings<Backend: Storage>(backend: Backend) {
        backend
            .set_setting(
                crate::database::Settings::SiteName,
                "https://fireburn.ru/",
                "Vika's Hideout",
            )
            .await
            .unwrap();
        assert_eq!(
            backend
                .get_setting(crate::database::Settings::SiteName, "https://fireburn.ru/")
                .await
                .unwrap(),
            "Vika's Hideout"
        );
    }

    fn gen_random_post(domain: &str) -> serde_json::Value {
        use faker_rand::lorem::{Paragraphs, Word};

        let uid = format!(
            "https://{domain}/posts/{}-{}-{}",
            rand::random::<Word>(),
            rand::random::<Word>(),
            rand::random::<Word>()
        );

        let post = json!({
            "type": ["h-entry"],
            "properties": {
                "content": [rand::random::<Paragraphs>().to_string()],
                "uid": [&uid],
                "url": [&uid]
            }
        });

        post
    }

    async fn test_feed_pagination<Backend: Storage>(backend: Backend) {
        let posts = std::iter::from_fn(|| Some(gen_random_post("fireburn.ru")))
            .take(20)
            .collect::<Vec<serde_json::Value>>();

        let feed = json!({
            "type": ["h-feed"],
            "properties": {
                "name": ["Main Page"],
                "author": ["https://fireburn.ru/"],
                "uid": ["https://fireburn.ru/feeds/main"]
            },
            "children": posts.iter()
                .filter_map(|json| json["properties"]["uid"][0].as_str())
                .collect::<Vec<&str>>()
        });
        let key = feed["properties"]["uid"][0].as_str().unwrap();

        backend
            .put_post(&feed, "https://fireburn.ru/")
            .await
            .unwrap();
        println!("---");
        for (i, post) in posts.iter().enumerate() {
            backend
                .put_post(post, "https://fireburn.ru/")
                .await
                .unwrap();
            println!("posts[{}] = {}", i, post["properties"]["uid"][0]);
        }
        println!("---");
        let limit: usize = 10;
        let result = backend
            .read_feed_with_limit(key, &None, limit, &None)
            .await
            .unwrap()
            .unwrap();
        for (i, post) in result["children"].as_array().unwrap().iter().enumerate() {
            println!("feed[0][{}] = {}", i, post["properties"]["uid"][0]);
        }
        println!("---");
        assert_eq!(result["children"].as_array().unwrap()[0..10], posts[0..10]);

        let result2 = backend
            .read_feed_with_limit(
                key,
                &result["children"].as_array().unwrap().last().unwrap()["properties"]["uid"][0]
                    .as_str()
                    .map(|i| i.to_owned()),
                limit,
                &None,
            )
            .await
            .unwrap()
            .unwrap();
        for (i, post) in result2["children"].as_array().unwrap().iter().enumerate() {
            println!("feed[1][{}] = {}", i, post["properties"]["uid"][0]);
        }
        println!("---");
        assert_eq!(
            result2["children"].as_array().unwrap()[0..10],
            posts[10..20]
        );

        // Regression test for #4
        let nonsense_after = Some("1010101010".to_owned());
        let result3 = tokio::time::timeout(tokio::time::Duration::from_secs(10), async move {
            backend
                .read_feed_with_limit(key, &nonsense_after, limit, &None)
                .await
                .unwrap()
                .unwrap()
        })
        .await
        .expect("Operation should not hang: see https://gitlab.com/kittybox/kittybox/-/issues/4");
        assert!(result3["children"].as_array().unwrap().is_empty());
    }

    /// Automatically generates a test suite for
    macro_rules! test_all {
        ($func_name:ident, $mod_name:ident) => {
            mod $mod_name {
                $func_name!(test_basic_operations);
                $func_name!(test_get_channel_list);
                $func_name!(test_settings);
                $func_name!(test_update);
                $func_name!(test_feed_pagination);
            }
        };
    }
    macro_rules! file_test {
        ($func_name:ident) => {
            #[tokio::test]
            async fn $func_name() {
                test_logger::ensure_env_logger_initialized();
                let tempdir = tempdir::TempDir::new("file").expect("Failed to create tempdir");
                let backend = super::super::FileStorage::new(tempdir.into_path())
                    .await
                    .unwrap();
                super::$func_name(backend).await
            }
        };
    }

    test_all!(file_test, file);
}