From ecdb6c7db16406a20b56e7bb6e73d4c59ee246f1 Mon Sep 17 00:00:00 2001 From: Vika Date: Thu, 21 Jul 2022 13:03:37 +0300 Subject: media: improve Metadata typing content_type is now optional; if not specified, it will remain empty. `application/octet-stream` will be put on read in the frontend. Length is now represented as NonZeroUsize - why would you upload a zero-byte file when you can just conjure one from the void whenever you need one? This should save me a little bit of memory. Representing content_type as a typed MIME value would be the next logical step. --- kittybox-rs/src/media/mod.rs | 6 +++++- kittybox-rs/src/media/storage/file.rs | 18 +++++++++++------- kittybox-rs/src/media/storage/mod.rs | 14 ++++++++------ 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/kittybox-rs/src/media/mod.rs b/kittybox-rs/src/media/mod.rs index b64929d..a8ae6f9 100644 --- a/kittybox-rs/src/media/mod.rs +++ b/kittybox-rs/src/media/mod.rs @@ -76,7 +76,11 @@ pub async fn serve( let headers = r.headers_mut().unwrap(); headers.insert( "Content-Type", - HeaderValue::from_str(&metadata.content_type).unwrap() + HeaderValue::from_str( + metadata.content_type + .as_deref() + .unwrap_or("application/octet-stream") + ).unwrap() ); if let Some(length) = metadata.length { headers.insert( diff --git a/kittybox-rs/src/media/storage/file.rs b/kittybox-rs/src/media/storage/file.rs index b46cdb2..c554d9e 100644 --- a/kittybox-rs/src/media/storage/file.rs +++ b/kittybox-rs/src/media/storage/file.rs @@ -83,6 +83,8 @@ impl MediaStore for FileStore { let mut length: usize = 0; while let Some(chunk) = content.next().await { + // TODO consider getting rid of this Arc as we only need immutable references + // I don't think we quite need refcounting here let chunk = std::sync::Arc::new(chunk.map_err(|err| MediaStoreError { kind: ErrorKind::Backend, source: Some(Box::new(err)), @@ -113,8 +115,10 @@ impl MediaStore for FileStore { } hasher = _hasher; } + // Manually flush the buffer and drop the handle to close the file tempfile.flush().await?; tempfile.into_inner().sync_all().await?; + let hash = hasher.finalize(); debug!("Pending upload hash: {}", hex::encode(&hash)); let filename = format!( @@ -125,12 +129,12 @@ impl MediaStore for FileStore { hex::encode([hash[3]]), hex::encode(&hash[4..32]) ); - metadata.length = Some(length); let domain_str = domain.to_string(); let filepath = self.base.join(domain_str.as_str()).join(&filename); let metafilename = filename.clone() + ".json"; let metapath = self.base.join(domain_str.as_str()).join(&metafilename); let metatemppath = self.base.join(domain_str.as_str()).join(metafilename + ".tmp"); + metadata.length = std::num::NonZeroUsize::new(length); debug!("File path: {}, metadata: {}", filepath.display(), metapath.display()); { let parent = filepath.parent().unwrap(); @@ -174,7 +178,7 @@ impl MediaStore for FileStore { // From the logs it looks like we're reading 4KiB at a time // Buffering file contents seems to double download speed // How to benchmark this? - tokio::io::BufReader::with_capacity(BUF_CAPACITY, file) + BufReader::with_capacity(BUF_CAPACITY, file) ) // Sprinkle some salt in form of protective log wrapping .inspect_ok(|chunk| debug!("Read {} bytes from file", chunk.len())) @@ -205,7 +209,7 @@ mod tests { let stream = tokio_stream::iter(file.chunks(100).map(|i| Ok(bytes::Bytes::copy_from_slice(i)))); let metadata = Metadata { filename: Some("style.css".to_string()), - content_type: "text/css".to_string(), + content_type: Some("text/css".to_string()), length: None }; @@ -231,9 +235,9 @@ mod tests { .join("fireburn.ru") .join(filename.clone() + ".json") ).await.unwrap()).unwrap(); - assert_eq!(&meta.content_type, "text/css"); + assert_eq!(meta.content_type.as_deref(), Some("text/css")); assert_eq!(meta.filename.as_deref(), Some("style.css")); - assert_eq!(meta.length, Some(file.len())); + assert_eq!(meta.length.map(|i| i.get()), Some(file.len())); // read back the data using the interface let (metadata, read_back) = { @@ -250,9 +254,9 @@ mod tests { }; assert_eq!(read_back, file); - assert_eq!(&metadata.content_type, "text/css"); + assert_eq!(metadata.content_type.as_deref(), Some("text/css")); assert_eq!(meta.filename.as_deref(), Some("style.css")); - assert_eq!(meta.length, Some(file.len())); + assert_eq!(meta.length.map(|i| i.get()), Some(file.len())); } } diff --git a/kittybox-rs/src/media/storage/mod.rs b/kittybox-rs/src/media/storage/mod.rs index 5614437..b34da88 100644 --- a/kittybox-rs/src/media/storage/mod.rs +++ b/kittybox-rs/src/media/storage/mod.rs @@ -5,22 +5,24 @@ use bytes::Bytes; use serde::{Deserialize, Serialize}; use std::pin::Pin; use std::fmt::Debug; +use std::num::NonZeroUsize; pub mod file; #[derive(Debug, Deserialize, Serialize)] pub struct Metadata { - pub(super) content_type: String, - pub(super) filename: Option, - pub(super) length: Option + /// Content type of the file. If None, the content-type is considered undefined. + pub content_type: Option, + /// The original filename that was passed. + pub filename: Option, + /// The recorded length of the file. + pub length: Option, } impl From<&Field<'_>> for Metadata { fn from(field: &Field<'_>) -> Self { Self { content_type: field.content_type() - .map(|i| i.to_owned()) - .or_else(|| Some("application/octet-stream".to_owned())) - .unwrap(), + .map(|i| i.to_owned()), filename: field.file_name() .map(|i| i.to_owned()), length: None -- cgit 1.4.1