Skip to content

Commit

Permalink
refactor(store): rename block size methods and adjust defaults (#124)
Browse files Browse the repository at this point in the history
- Increased default chunk sizes:
  - Desired: 4KB -> 128KB
  - Min: 2KB -> 32KB
  - Max: 64KB -> 512KB
- Added DEFAULT_MAX_NODE_BLOCK_SIZE constant (1MB)
- Moved constants from chunkers to root level
- Updated stores to use new node block size limit
- Renamed methods for consistency:
  - get_node_block_max_size() -> get_max_node_block_size()
  - get_raw_block_max_size() -> get_max_raw_block_size()
  • Loading branch information
appcypher authored Feb 3, 2025
1 parent 3072a85 commit d177e6a
Show file tree
Hide file tree
Showing 12 changed files with 67 additions and 58 deletions.
2 changes: 1 addition & 1 deletion monofs/examples/flatfs_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ async fn main() -> Result<()> {
println!(" - {:?}", codec);
}

if let Some(max_size) = store.get_node_block_max_size().await? {
if let Some(max_size) = store.get_max_node_block_size().await? {
println!(" Max node block size: {} bytes", max_size);
}

Expand Down
28 changes: 16 additions & 12 deletions monofs/lib/store/flatfsstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use monoutils::SeekableReader;
use monoutils_store::{ipld::cid::Cid, FastCDCChunker, FixedSizeChunker};
use monoutils_store::{
Chunker, Codec, FlatLayout, IpldReferences, IpldStore, IpldStoreSeekable, Layout,
LayoutSeekable, RawStore, StoreError, StoreResult,
LayoutSeekable, RawStore, StoreError, StoreResult, DEFAULT_MAX_NODE_BLOCK_SIZE,
};
use serde::{de::DeserializeOwned, Serialize};
use tokio::fs::{self, File};
Expand Down Expand Up @@ -215,7 +215,7 @@ where
let bytes = serde_ipld_dagcbor::to_vec(&data).map_err(StoreError::custom)?;

// Check if the data exceeds the node maximum block size
if let Some(max_size) = self.get_node_block_max_size().await? {
if let Some(max_size) = self.get_max_node_block_size().await? {
if bytes.len() as u64 > max_size {
return Err(StoreError::NodeBlockTooLarge(bytes.len() as u64, max_size));
}
Expand Down Expand Up @@ -278,8 +278,8 @@ where
codecs
}

async fn get_node_block_max_size(&self) -> StoreResult<Option<u64>> {
Ok(self.chunker.chunk_max_size().await?)
async fn get_max_node_block_size(&self) -> StoreResult<Option<u64>> {
Ok(Some(DEFAULT_MAX_NODE_BLOCK_SIZE))
}

async fn get_block_count(&self) -> StoreResult<u64> {
Expand Down Expand Up @@ -388,7 +388,7 @@ where
{
async fn put_raw_block(&self, bytes: impl Into<Bytes> + Send) -> StoreResult<Cid> {
let bytes = bytes.into();
if let Some(max_size) = self.get_raw_block_max_size().await? {
if let Some(max_size) = self.get_max_raw_block_size().await? {
if bytes.len() as u64 > max_size {
return Err(StoreError::RawBlockTooLarge(bytes.len() as u64, max_size));
}
Expand Down Expand Up @@ -420,8 +420,12 @@ where
Ok(bytes.into())
}

async fn get_raw_block_max_size(&self) -> StoreResult<Option<u64>> {
Ok(self.chunker.chunk_max_size().await?)
async fn get_max_raw_block_size(&self) -> StoreResult<Option<u64>> {
Ok(self
.chunker
.chunk_max_size()
.await?
.max(Some(DEFAULT_MAX_NODE_BLOCK_SIZE)))
}
}

Expand All @@ -446,7 +450,7 @@ where
#[cfg(test)]
mod tests {
use monoutils_store::codetable::{Code, MultihashDigest};
use monoutils_store::DEFAULT_MAX_CHUNK_SIZE;
use monoutils_store::{DEFAULT_MAX_CHUNK_SIZE, DEFAULT_MAX_NODE_BLOCK_SIZE};
use tokio::fs;
use tokio::io::AsyncReadExt;

Expand Down Expand Up @@ -617,12 +621,12 @@ mod tests {

// Verify size limits from chunker
assert_eq!(
store.get_node_block_max_size().await?,
Some(DEFAULT_MAX_CHUNK_SIZE)
store.get_max_node_block_size().await?,
Some(DEFAULT_MAX_NODE_BLOCK_SIZE)
);
assert_eq!(
store.get_raw_block_max_size().await?,
Some(DEFAULT_MAX_CHUNK_SIZE)
store.get_max_raw_block_size().await?,
Some(DEFAULT_MAX_NODE_BLOCK_SIZE)
);

Ok(())
Expand Down
8 changes: 4 additions & 4 deletions monofs/lib/store/membufferstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,8 @@ where
self.inner.get_supported_codecs().await
}

async fn get_node_block_max_size(&self) -> StoreResult<Option<u64>> {
self.inner.get_node_block_max_size().await
async fn get_max_node_block_size(&self) -> StoreResult<Option<u64>> {
self.inner.get_max_node_block_size().await
}

async fn get_block_count(&self) -> StoreResult<u64> {
Expand All @@ -110,7 +110,7 @@ where
self.inner.get_raw_block(cid).await
}

async fn get_raw_block_max_size(&self) -> StoreResult<Option<u64>> {
self.inner.get_raw_block_max_size().await
async fn get_max_raw_block_size(&self) -> StoreResult<Option<u64>> {
self.inner.get_max_raw_block_size().await
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@
// Constants
//--------------------------------------------------------------------------------------------------

/// The default desired chunk size is 4 KiB.
pub const DEFAULT_DESIRED_CHUNK_SIZE: u64 = 4 * 1024;
/// The default desired chunk size is 128 KiB.
pub const DEFAULT_DESIRED_CHUNK_SIZE: u64 = 128 * 1024;

/// The default minimum chunk size is 2 KiB.
pub const DEFAULT_MIN_CHUNK_SIZE: u64 = 2 * 1024;
/// The default minimum chunk size is 32 KiB.
pub const DEFAULT_MIN_CHUNK_SIZE: u64 = 32 * 1024;

/// The default maximum chunk size is 64 KiB.
pub const DEFAULT_MAX_CHUNK_SIZE: u64 = 64 * 1024;
/// The default maximum chunk size is 512 KiB.
pub const DEFAULT_MAX_CHUNK_SIZE: u64 = 512 * 1024;

/// The default maximum node block size is 1 MiB.
pub const DEFAULT_MAX_NODE_BLOCK_SIZE: u64 = 1 * 1024 * 1024;

/// The gear table is used to generate the rolling hash mask.
#[rustfmt::skip]
Expand Down
8 changes: 3 additions & 5 deletions monoutils-store/lib/implementations/chunkers/fastcdc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,9 @@ use futures::stream::BoxStream;
use std::pin::pin;
use tokio::io::{AsyncRead, AsyncReadExt};

use crate::{Chunker, StoreError, StoreResult};

use super::{
constants::DEFAULT_MAX_CHUNK_SIZE, DEFAULT_DESIRED_CHUNK_SIZE, DEFAULT_GEAR_TABLE,
DEFAULT_MIN_CHUNK_SIZE,
use crate::{
Chunker, StoreError, StoreResult, DEFAULT_DESIRED_CHUNK_SIZE, DEFAULT_GEAR_TABLE,
DEFAULT_MAX_CHUNK_SIZE, DEFAULT_MIN_CHUNK_SIZE,
};

//--------------------------------------------------------------------------------------------------
Expand Down
4 changes: 1 addition & 3 deletions monoutils-store/lib/implementations/chunkers/fixed.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ use bytes::Bytes;
use futures::stream::BoxStream;
use tokio::io::{AsyncRead, AsyncReadExt};

use crate::{Chunker, StoreError, StoreResult};

use super::constants::DEFAULT_MAX_CHUNK_SIZE;
use crate::{Chunker, StoreError, StoreResult, DEFAULT_MAX_CHUNK_SIZE};

//--------------------------------------------------------------------------------------------------
// Types
Expand Down
4 changes: 1 addition & 3 deletions monoutils-store/lib/implementations/chunkers/gearcdc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@ use bytes::Bytes;
use futures::stream::BoxStream;
use tokio::io::{AsyncRead, AsyncReadExt};

use crate::{Chunker, StoreError, StoreResult};

use super::{DEFAULT_DESIRED_CHUNK_SIZE, DEFAULT_GEAR_TABLE};
use crate::{Chunker, StoreError, StoreResult, DEFAULT_DESIRED_CHUNK_SIZE, DEFAULT_GEAR_TABLE};

//--------------------------------------------------------------------------------------------------
// Types
Expand Down
2 changes: 0 additions & 2 deletions monoutils-store/lib/implementations/chunkers/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
mod constants;
mod fixed;
mod fastcdc;
mod gearcdc;
Expand All @@ -7,7 +6,6 @@ mod gearcdc;
// Exports
//--------------------------------------------------------------------------------------------------

pub use constants::*;
pub use fixed::*;
pub use fastcdc::*;
pub use gearcdc::*;
12 changes: 6 additions & 6 deletions monoutils-store/lib/implementations/stores/dualstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -214,9 +214,9 @@ where
codecs
}

async fn get_node_block_max_size(&self) -> StoreResult<Option<u64>> {
let max_size_a = self.store_a.get_node_block_max_size().await?;
let max_size_b = self.store_b.get_node_block_max_size().await?;
async fn get_max_node_block_size(&self) -> StoreResult<Option<u64>> {
let max_size_a = self.store_a.get_max_node_block_size().await?;
let max_size_b = self.store_b.get_max_node_block_size().await?;
Ok(max_size_a.max(max_size_b))
}

Expand Down Expand Up @@ -250,9 +250,9 @@ where
}
}

async fn get_raw_block_max_size(&self) -> StoreResult<Option<u64>> {
let max_size_a = self.store_a.get_raw_block_max_size().await?;
let max_size_b = self.store_b.get_raw_block_max_size().await?;
async fn get_max_raw_block_size(&self) -> StoreResult<Option<u64>> {
let max_size_a = self.store_a.get_max_raw_block_size().await?;
let max_size_b = self.store_b.get_max_raw_block_size().await?;
Ok(max_size_a.max(max_size_b))
}
}
Expand Down
25 changes: 15 additions & 10 deletions monoutils-store/lib/implementations/stores/memstore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ use tokio::{io::AsyncRead, sync::RwLock};
use crate::{
utils, Chunker, Codec, FastCDCChunker, FixedSizeChunker, FlatLayout, IpldReferences, IpldStore,
IpldStoreSeekable, Layout, LayoutSeekable, RawStore, StoreError, StoreResult,
DEFAULT_MAX_NODE_BLOCK_SIZE,
};

//--------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -135,7 +136,7 @@ where
let bytes = Bytes::from(serde_ipld_dagcbor::to_vec(&data).map_err(StoreError::custom)?);

// Check if the data exceeds the node maximum block size.
if let Some(max_size) = self.get_node_block_max_size().await? {
if let Some(max_size) = self.get_max_node_block_size().await? {
if bytes.len() as u64 > max_size {
return Err(StoreError::NodeBlockTooLarge(bytes.len() as u64, max_size));
}
Expand Down Expand Up @@ -198,8 +199,8 @@ where
codecs
}

async fn get_node_block_max_size(&self) -> StoreResult<Option<u64>> {
self.chunker.chunk_max_size().await
async fn get_max_node_block_size(&self) -> StoreResult<Option<u64>> {
Ok(Some(DEFAULT_MAX_NODE_BLOCK_SIZE))
}

async fn get_block_count(&self) -> StoreResult<u64> {
Expand All @@ -215,7 +216,7 @@ where
{
async fn put_raw_block(&self, bytes: impl Into<Bytes> + Send) -> StoreResult<Cid> {
let bytes = bytes.into();
if let Some(max_size) = self.get_raw_block_max_size().await? {
if let Some(max_size) = self.get_max_raw_block_size().await? {
if bytes.len() as u64 > max_size {
return Err(StoreError::RawBlockTooLarge(bytes.len() as u64, max_size));
}
Expand All @@ -235,8 +236,12 @@ where
}
}

async fn get_raw_block_max_size(&self) -> StoreResult<Option<u64>> {
self.chunker.chunk_max_size().await
async fn get_max_raw_block_size(&self) -> StoreResult<Option<u64>> {
Ok(self
.chunker
.chunk_max_size()
.await?
.max(Some(DEFAULT_MAX_NODE_BLOCK_SIZE)))
}
}

Expand Down Expand Up @@ -387,12 +392,12 @@ mod tests {

// Verify size limits from chunker
assert_eq!(
store.get_node_block_max_size().await?,
Some(DEFAULT_MAX_CHUNK_SIZE)
store.get_max_node_block_size().await?,
Some(DEFAULT_MAX_NODE_BLOCK_SIZE)
);
assert_eq!(
store.get_raw_block_max_size().await?,
Some(DEFAULT_MAX_CHUNK_SIZE)
store.get_max_raw_block_size().await?,
Some(DEFAULT_MAX_NODE_BLOCK_SIZE)
);

Ok(())
Expand Down
2 changes: 2 additions & 0 deletions monoutils-store/lib/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#![allow(clippy::module_inception)]

mod chunker;
mod constants;
mod error;
mod implementations;
mod layout;
Expand All @@ -18,6 +19,7 @@ pub mod utils;
//--------------------------------------------------------------------------------------------------

pub use chunker::*;
pub use constants::*;
pub use error::*;
pub use implementations::*;
pub use layout::*;
Expand Down
15 changes: 9 additions & 6 deletions monoutils-store/lib/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,7 @@ pub trait IpldStore: RawStore + Clone {
/// ## Errors
///
/// If the block is not found, `StoreError::BlockNotFound` error is returned.
async fn get_bytes(
&self,
cid: &Cid,
) -> StoreResult<Pin<Box<dyn AsyncRead + Send>>>;
async fn get_bytes(&self, cid: &Cid) -> StoreResult<Pin<Box<dyn AsyncRead + Send>>>;

/// Gets the size of all the blocks associated with the given `Cid` in bytes.
async fn get_bytes_size(&self, cid: &Cid) -> StoreResult<u64>;
Expand All @@ -101,7 +98,7 @@ pub trait IpldStore: RawStore + Clone {

/// Returns the allowed maximum block size for IPLD and merkle nodes.
/// If there is no limit, `None` is returned.
async fn get_node_block_max_size(&self) -> StoreResult<Option<u64>>;
async fn get_max_node_block_size(&self) -> StoreResult<Option<u64>>;

/// Checks if the store is empty.
async fn is_empty(&self) -> StoreResult<bool> {
Expand Down Expand Up @@ -139,6 +136,12 @@ pub trait RawStore: Clone {
/// Tries to save `bytes` as a single block to the store. Unlike [`IpldStore::put_bytes`], this
/// method does not chunk the data and does not create intermediate merkle nodes.
///
/// ## Arguments
///
/// - `bytes`: The bytes to save.
/// - `is_node`: If true, the block is considered a node block and the size is checked against
/// the node block size.
///
/// ## Important
///
/// This is a low-level API intended for code implementing an [`IpldStore`].
Expand Down Expand Up @@ -168,7 +171,7 @@ pub trait RawStore: Clone {
async fn get_raw_block(&self, cid: &Cid) -> StoreResult<Bytes>;

/// Returns the allowed maximum block size for raw bytes. If there is no limit, `None` is returned.
async fn get_raw_block_max_size(&self) -> StoreResult<Option<u64>>;
async fn get_max_raw_block_size(&self) -> StoreResult<Option<u64>>;
}

/// Helper extension to the `IpldStore` trait.
Expand Down

0 comments on commit d177e6a

Please sign in to comment.