From 6a16796681106e4085d1ad0d43d9efb31510d4d9 Mon Sep 17 00:00:00 2001 From: Kould Date: Fri, 27 Sep 2024 14:07:38 +0800 Subject: [PATCH] =?UTF-8?q?Refactor=EF=BC=9A=20use=20[fusio](https://githu?= =?UTF-8?q?b.com/tonbo-io/fusio)=20to=20implement=20the=20storage=20layer?= =?UTF-8?q?=20and=20support=20multiple=20storage=20(#163)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: use [fusio](https://github.com/tonbo-io/fusio) to implement the storage layer and support multiple storage * chore: remove println macros codegen * fix: blocking of LevelStream in `open_file` under Unix system * refactor: use fusio on Benchmark * fix: benchmark/writes.rs file not found * test: add unit tests for serdes * chore: update read & write for fusio * fix: example datafusion projection error * fix: `Fs::list` may not sorted * ci: add exmaples check * feat: enable SQL query support in datafusion example (#169) Added support for executing SQL queries using DataFusion's parser and physical plan execution. This enhancement allows querying the "music" table with SQL statements, improving flexibility and functionality. * chore: resolve review --------- Co-authored-by: Xwg --- .github/workflows/ci.yml | 17 +++ Cargo.toml | 9 +- benches/common.rs | 22 ++-- benches/criterion/writes.rs | 15 ++- benches/read_bench.rs | 6 +- benches/write_bench.rs | 3 +- examples/datafusion.rs | 48 +++++-- examples/declare.rs | 15 ++- src/compaction/mod.rs | 231 ++++++++++++++++++++++------------ src/executor.rs | 4 +- src/fs/manager.rs | 44 +++++++ src/fs/mod.rs | 47 +++---- src/fs/tokio_fs.rs | 79 ------------ src/inmem/immutable.rs | 2 +- src/inmem/mutable.rs | 54 ++++---- src/lib.rs | 234 +++++++++++++++++++++++------------ src/ondisk/scan.rs | 19 +-- src/ondisk/sstable.rs | 197 ++++++++++++++--------------- src/option.rs | 113 ++++++++++++----- src/record/mod.rs | 4 + src/scope.rs | 25 ++-- src/serdes/arc.rs | 34 ++++- src/serdes/boolean.rs | 54 +++++--- src/serdes/bytes.rs | 59 ++++++--- src/serdes/mod.rs | 41 +++--- src/serdes/num.rs | 81 +++++++++--- src/serdes/option.rs | 50 ++++++-- src/serdes/string.rs | 70 +++++++---- src/stream/level.rs | 124 ++++++++++++++----- src/stream/mem_projection.rs | 37 +++--- src/stream/merge.rs | 90 ++++++-------- src/stream/mod.rs | 35 ++---- src/stream/package.rs | 31 ++--- src/timestamp/mod.rs | 12 +- src/timestamp/timestamped.rs | 6 +- src/transaction.rs | 175 ++++++++++++++++++++------ src/version/cleaner.rs | 145 +++++++++++----------- src/version/edit.rs | 63 ++++------ src/version/mod.rs | 89 ++++++++----- src/version/set.rs | 229 +++++++++++++++++++++------------- src/wal/checksum.rs | 141 +++++++++++---------- src/wal/log.rs | 13 +- src/wal/mod.rs | 42 ++++--- src/wal/record_entry.rs | 25 ++-- tests/data_integrity.rs | 12 +- tests/macros_correctness.rs | 11 +- tonbo_macros/src/record.rs | 24 ++-- 47 files changed, 1739 insertions(+), 1142 deletions(-) create mode 100644 src/fs/manager.rs delete mode 100644 src/fs/tokio_fs.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ab443669..f70840ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -69,6 +69,23 @@ jobs: command: fmt args: -- --check + exmaples: + name: Rust exmaples + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Run datafusion example + uses: actions-rs/cargo@v1 + with: + command: run + args: --example datafusion --features=datafusion + + - name: Run declare example + uses: actions-rs/cargo@v1 + with: + command: run + args: --example declare --all-features + benchmark: name: Rust benchmark runs-on: self-hosted diff --git a/Cargo.toml b/Cargo.toml index 8eaf57a0..c429ce71 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,21 +49,23 @@ path = "benches/criterion/writes.rs" required-features = ["sled"] [dependencies] -arrow = "52" +arrow = "53" async-lock = "3" async-stream = "0.3" async-trait = { version = "0.1", optional = true } bytes = { version = "1.7", optional = true } crc32fast = "1" crossbeam-skiplist = "0.1" -datafusion = { version = "41", optional = true } +datafusion = { version = "42", optional = true } flume = { version = "0.11", features = ["async"] } +fusio = { git = "https://github.com/tonbo-io/fusio.git", package = "fusio", rev = "317b1b0621b297f52145b41b90506632f2dc7a1d", features = ["tokio", "dyn"] } +fusio-parquet = { git = "https://github.com/tonbo-io/fusio.git", package = "fusio-parquet", rev = "317b1b0621b297f52145b41b90506632f2dc7a1d" } futures-core = "0.3" futures-io = "0.3" futures-util = "0.3" lockable = "0.0.8" once_cell = "1" -parquet = { version = "52", features = ["async"] } +parquet = { version = "53", features = ["async"] } pin-project-lite = "0.2" regex = "1" thiserror = "1" @@ -74,6 +76,7 @@ tracing = "0.1" ulid = "1" # Only used for benchmarks +log = "0.4.22" redb = { version = "2", optional = true } rocksdb = { version = "0.22", optional = true } sled = { version = "0.34", optional = true } diff --git a/benches/common.rs b/benches/common.rs index 50df7b0c..d7ed5d3b 100644 --- a/benches/common.rs +++ b/benches/common.rs @@ -5,16 +5,19 @@ use std::{ fs::File, io::{BufRead, BufReader}, path::{Path, PathBuf}, + sync::Arc, }; use async_stream::stream; +use fusio::local::TokioFs; use futures_core::Stream; use futures_util::StreamExt; use parquet::data_type::AsBytes; use redb::TableDefinition; use rocksdb::{Direction, IteratorMode, TransactionDB}; use tonbo::{ - executor::tokio::TokioExecutor, stream, transaction::TransactionEntry, DbOption, Projection, + executor::tokio::TokioExecutor, fs::manager::StoreManager, stream, + transaction::TransactionEntry, DbOption, Projection, }; use tonbo_macros::Record; @@ -222,15 +225,20 @@ impl BenchDatabase for TonboBenchDataBase { } async fn build(path: impl AsRef) -> Self { - let option = DbOption::from(path.as_ref()).disable_wal(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = + DbOption::from(fusio::path::Path::from_filesystem_path(path.as_ref()).unwrap()) + .disable_wal(); - let db = tonbo::DB::new(option, TokioExecutor::new()).await.unwrap(); + let db = tonbo::DB::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); TonboBenchDataBase::new(db) } } pub struct TonboBenchReadTransaction<'a> { - txn: tonbo::transaction::Transaction<'a, Customer, TokioExecutor>, + txn: tonbo::transaction::Transaction<'a, Customer>, } impl<'db> BenchReadTransaction for TonboBenchReadTransaction<'db> { @@ -245,7 +253,7 @@ impl<'db> BenchReadTransaction for TonboBenchReadTransaction<'db> { } pub struct TonboBenchReader<'db, 'txn> { - txn: &'txn tonbo::transaction::Transaction<'db, Customer, TokioExecutor>, + txn: &'txn tonbo::transaction::Transaction<'db, Customer>, } impl BenchReader for TonboBenchReader<'_, '_> { @@ -285,7 +293,7 @@ impl BenchReader for TonboBenchReader<'_, '_> { } pub struct TonboBenchWriteTransaction<'a> { - txn: tonbo::transaction::Transaction<'a, Customer, TokioExecutor>, + txn: tonbo::transaction::Transaction<'a, Customer>, } impl<'db> BenchWriteTransaction for TonboBenchWriteTransaction<'db> { @@ -305,7 +313,7 @@ impl<'db> BenchWriteTransaction for TonboBenchWriteTransaction<'db> { } pub struct TonboBenchInserter<'db, 'txn> { - txn: &'txn mut tonbo::transaction::Transaction<'db, Customer, TokioExecutor>, + txn: &'txn mut tonbo::transaction::Transaction<'db, Customer>, } impl BenchInserter for TonboBenchInserter<'_, '_> { diff --git a/benches/criterion/writes.rs b/benches/criterion/writes.rs index d30eb0f4..8d3fcce4 100644 --- a/benches/criterion/writes.rs +++ b/benches/criterion/writes.rs @@ -1,8 +1,9 @@ use std::{iter::repeat_with, sync::Arc}; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use fusio::local::TokioFs; use mimalloc::MiMalloc; -use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB}; +use tonbo::{executor::tokio::TokioExecutor, fs::manager::StoreManager, DbOption, Record, DB}; #[global_allocator] static GLOBAL: MiMalloc = MiMalloc; @@ -55,10 +56,14 @@ fn single_write(c: &mut Criterion) { let batches = [1, 16, 128]; let _ = std::fs::remove_dir_all("/tmp/tonbo"); + let _ = std::fs::create_dir_all("/tmp/tonbo"); + for batch in batches { - let option = DbOption::from("/tmp/tonbo").disable_wal(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = DbOption::from(fusio::path::Path::from_filesystem_path("/tmp/tonbo").unwrap()) + .disable_wal(); let db = runtime - .block_on(async { DB::new(option, TokioExecutor::default()).await }) + .block_on(async { DB::new(option, TokioExecutor::default(), manager).await }) .unwrap(); group.bench_with_input(BenchmarkId::new("Tonbo", batch), &batch, |b, batch| { @@ -67,9 +72,12 @@ fn single_write(c: &mut Criterion) { .iter(|| async { tonbo_write(&db, *batch).await }); }); let _ = std::fs::remove_dir_all("/tmp/tonbo"); + let _ = std::fs::create_dir_all("/tmp/tonbo"); } let _ = std::fs::remove_dir_all("/tmp/sled"); + let _ = std::fs::create_dir_all("/tmp/sled"); + for batch in batches { let sled = sled::open("/tmp/sled").unwrap(); group.bench_with_input(BenchmarkId::new("Sled", batch), &batch, |b, batch| { @@ -78,6 +86,7 @@ fn single_write(c: &mut Criterion) { .iter(|| async { sled_write(&sled, *batch).await }); }); let _ = std::fs::remove_dir_all("/tmp/sled"); + let _ = std::fs::create_dir_all("/tmp/sled"); } group.finish(); diff --git a/benches/read_bench.rs b/benches/read_bench.rs index 7eb69b75..c849fcb9 100644 --- a/benches/read_bench.rs +++ b/benches/read_bench.rs @@ -2,15 +2,13 @@ mod common; use std::{ collections::Bound, - env::current_dir, path::{Path, PathBuf}, sync::Arc, time::{Duration, Instant}, }; use futures_util::{future::join_all, StreamExt}; -use tokio::io::AsyncWriteExt; -use tonbo::{executor::tokio::TokioExecutor, fs::FileProvider}; +use tokio::{fs, io::AsyncWriteExt}; use crate::common::{ read_tbl, BenchDatabase, BenchReadTransaction, BenchReader, RedbBenchDatabase, @@ -181,7 +179,7 @@ async fn main() { println!(); println!("{table}"); - let mut file = TokioExecutor::open("read_benchmark.md").await.unwrap(); + let mut file = fs::File::create("read_benchmark.md").await.unwrap(); file.write_all(b"Read: \n```shell\n").await.unwrap(); for line in table.lines() { file.write_all(line.as_bytes()).await.unwrap(); diff --git a/benches/write_bench.rs b/benches/write_bench.rs index cabecefb..f2f43354 100644 --- a/benches/write_bench.rs +++ b/benches/write_bench.rs @@ -12,7 +12,6 @@ use common::*; use futures_util::future::join_all; use tempfile::TempDir; use tokio::io::AsyncWriteExt; -use tonbo::{executor::tokio::TokioExecutor, fs::FileProvider}; const WRITE_TIMES: usize = 500_000; const WRITE_BATCH_TIMES: usize = 5000; @@ -227,7 +226,7 @@ async fn main() { println!(); println!("{table}"); - let mut file = TokioExecutor::open("write_benchmark.md").await.unwrap(); + let mut file = tokio::fs::File::create("write_benchmark.md").await.unwrap(); file.write_all(b"Write: \n```shell\n").await.unwrap(); for line in table.lines() { file.write_all(line.as_bytes()).await.unwrap(); diff --git a/examples/datafusion.rs b/examples/datafusion.rs index 82a44f4b..6f0a1012 100644 --- a/examples/datafusion.rs +++ b/examples/datafusion.rs @@ -17,12 +17,20 @@ use datafusion::{ error::{DataFusionError, Result}, execution::{RecordBatchStream, SendableRecordBatchStream, TaskContext}, physical_expr::EquivalenceProperties, - physical_plan::{DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties}, + physical_plan::{ + execute_stream, DisplayAs, DisplayFormatType, ExecutionMode, ExecutionPlan, PlanProperties, + }, prelude::*, + sql::parser::DFParser, }; +use fusio::{local::TokioFs, path::Path}; use futures_core::Stream; use futures_util::StreamExt; -use tonbo::{executor::tokio::TokioExecutor, inmem::immutable::ArrowArrays, record::Record, DB}; +use tokio::fs; +use tonbo::{ + executor::tokio::TokioExecutor, fs::manager::StoreManager, inmem::immutable::ArrowArrays, + record::Record, DbOption, DB, +}; use tonbo_macros::Record; #[derive(Record, Debug)] @@ -198,7 +206,13 @@ impl ExecutionPlan for MusicExec { #[tokio::main] async fn main() -> Result<()> { - let db = DB::new("./db_path/music".into(), TokioExecutor::default()) + // make sure the path exists + let _ = fs::create_dir_all("./db_path/music").await; + + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let options = DbOption::from(Path::from_filesystem_path("./db_path/music").unwrap()); + + let db = DB::new(options, TokioExecutor::default(), manager) .await .unwrap(); for (id, name, like) in [ @@ -214,9 +228,29 @@ async fn main() -> Result<()> { let provider = MusicProvider { db: Arc::new(db) }; ctx.register_table("music", Arc::new(provider))?; - let df = ctx.table("music").await?; - let df = df.select(vec![col("name")])?; - let batches = df.collect().await?; - pretty::print_batches(&batches).unwrap(); + { + let df = ctx.table("music").await?; + let df = df.select(vec![col("name")])?; + let batches = df.collect().await?; + pretty::print_batches(&batches).unwrap(); + } + + { + // support sql query for tonbo + let statements = DFParser::parse_sql("select * from music")?; + let plan = ctx + .state() + .statement_to_plan(statements.front().cloned().unwrap()) + .await?; + ctx.execute_logical_plan(plan).await?; + let df = ctx.table("music").await?; + let physical_plan = df.create_physical_plan().await?; + let mut stream = execute_stream(physical_plan, ctx.task_ctx())?; + while let Some(maybe_batch) = stream.next().await { + let batch = maybe_batch?; + pretty::print_batches(&[batch]).unwrap(); + } + } + Ok(()) } diff --git a/examples/declare.rs b/examples/declare.rs index b6ffe623..7906ee15 100644 --- a/examples/declare.rs +++ b/examples/declare.rs @@ -1,8 +1,12 @@ -use std::ops::Bound; +use std::{ops::Bound, sync::Arc}; use bytes::Bytes; +use fusio::{local::TokioFs, path::Path}; use futures_util::stream::StreamExt; -use tonbo::{executor::tokio::TokioExecutor, Projection, Record, DB}; +use tokio::fs; +use tonbo::{ + executor::tokio::TokioExecutor, fs::manager::StoreManager, DbOption, Projection, Record, DB, +}; /// Use macro to define schema of column family just like ORM /// It provides type-safe read & write API @@ -17,8 +21,13 @@ pub struct User { #[tokio::main] async fn main() { + // make sure the path exists + let _ = fs::create_dir_all("./db_path/users").await; + + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let options = DbOption::from(Path::from_filesystem_path("./db_path/users").unwrap()); // pluggable async runtime and I/O - let db = DB::new("./db_path/users".into(), TokioExecutor::default()) + let db = DB::new(options, TokioExecutor::default(), manager) .await .unwrap(); diff --git a/src/compaction/mod.rs b/src/compaction/mod.rs index 888ab25c..6a090f4f 100644 --- a/src/compaction/mod.rs +++ b/src/compaction/mod.rs @@ -1,6 +1,8 @@ use std::{cmp, collections::Bound, mem, pin::Pin, sync::Arc}; use async_lock::{RwLock, RwLockUpgradableReadGuard}; +use fusio::DynFs; +use fusio_parquet::writer::AsyncWriter; use futures_util::StreamExt; use parquet::arrow::{AsyncArrowWriter, ProjectionMask}; use thiserror::Error; @@ -8,7 +10,7 @@ use tokio::sync::oneshot; use ulid::Ulid; use crate::{ - fs::{FileId, FileProvider}, + fs::{default_open_options, manager::StoreManager, FileId}, inmem::{ immutable::{ArrowArrays, Builder, Immutable}, mutable::Mutable, @@ -30,30 +32,31 @@ pub enum CompactTask { Flush(Option>), } -pub(crate) struct Compactor +pub(crate) struct Compactor where R: Record, - FP: FileProvider, { pub(crate) option: Arc>, - pub(crate) schema: Arc>>, - pub(crate) version_set: VersionSet, + pub(crate) schema: Arc>>, + pub(crate) version_set: VersionSet, + pub(crate) manager: Arc, } -impl Compactor +impl Compactor where R: Record, - FP: FileProvider, { pub(crate) fn new( - schema: Arc>>, + schema: Arc>>, option: Arc>, - version_set: VersionSet, + version_set: VersionSet, + manager: Arc, ) -> Self { - Compactor:: { + Compactor:: { option, schema, version_set, + manager, } } @@ -75,7 +78,7 @@ where let trigger_clone = guard.trigger.clone(); let mutable = mem::replace( &mut guard.mutable, - Mutable::new(&self.option, trigger_clone).await?, + Mutable::new(&self.option, trigger_clone, self.manager.base_fs()).await?, ); let (file_id, immutable) = mutable.into_immutable().await?; @@ -89,7 +92,7 @@ where let excess = &guard.immutables[0..chunk_num]; if let Some(scope) = - Self::minor_compaction(&self.option, recover_wal_ids, excess).await? + Self::minor_compaction(&self.option, recover_wal_ids, excess, &self.manager).await? { let version_ref = self.version_set.current().await; let mut version_edits = vec![]; @@ -103,6 +106,7 @@ where &scope.max, &mut version_edits, &mut delete_gens, + &self.manager, ) .await?; } @@ -129,8 +133,12 @@ where option: &DbOption, recover_wal_ids: Option>, batches: &[(Option, Immutable)], + manager: &StoreManager, ) -> Result>, CompactionError> { if !batches.is_empty() { + let level_0_path = option.level_fs_path(0).unwrap_or(&option.base_path); + let level_0_fs = manager.get_fs(level_0_path); + let mut min = None; let mut max = None; @@ -138,7 +146,11 @@ where let mut wal_ids = Vec::with_capacity(batches.len()); let mut writer = AsyncArrowWriter::try_new( - FP::open(option.table_path(&gen)).await?, + AsyncWriter::new( + level_0_fs + .open_options(&option.table_path(&gen), default_open_options()) + .await?, + ), R::arrow_schema().clone(), Some(option.write_parquet_properties.clone()), )?; @@ -172,12 +184,13 @@ where } pub(crate) async fn major_compaction( - version: &Version, + version: &Version, option: &DbOption, mut min: &R::Key, mut max: &R::Key, version_edits: &mut Vec>, - delete_gens: &mut Vec, + delete_gens: &mut Vec<(FileId, usize)>, + manager: &StoreManager, ) -> Result<(), CompactionError> { let mut level = 0; @@ -189,14 +202,19 @@ where let (meet_scopes_ll, start_ll, end_ll) = Self::next_level_scopes(version, &mut min, &mut max, level, &meet_scopes_l)?; + let level_path = option.level_fs_path(level).unwrap_or(&option.base_path); + let level_fs = manager.get_fs(level_path); let mut streams = Vec::with_capacity(meet_scopes_l.len() + meet_scopes_ll.len()); // This Level if level == 0 { for scope in meet_scopes_l.iter() { - let file = FP::open(option.table_path(&scope.gen)).await?; + let file = level_fs + .open_options(&option.table_path(&scope.gen), default_open_options()) + .await?; streams.push(ScanStream::SsTable { inner: SsTable::open(file) + .await? .scan( (Bound::Unbounded, Bound::Unbounded), u32::MAX.into(), @@ -217,6 +235,7 @@ where u32::MAX.into(), None, ProjectionMask::all(), + level_fs.clone(), ) .ok_or(CompactionError::EmptyLevel)?; @@ -236,6 +255,7 @@ where u32::MAX.into(), None, ProjectionMask::all(), + level_fs.clone(), ) .ok_or(CompactionError::EmptyLevel)?; @@ -243,21 +263,21 @@ where inner: level_scan_ll, }); } - Self::build_tables(option, version_edits, level, streams).await?; + Self::build_tables(option, version_edits, level, streams, level_fs).await?; for scope in meet_scopes_l { version_edits.push(VersionEdit::Remove { level: level as u8, gen: scope.gen, }); - delete_gens.push(scope.gen); + delete_gens.push((scope.gen, level)); } for scope in meet_scopes_ll { version_edits.push(VersionEdit::Remove { level: (level + 1) as u8, gen: scope.gen, }); - delete_gens.push(scope.gen); + delete_gens.push((scope.gen, level)); } level += 1; } @@ -266,7 +286,7 @@ where } fn next_level_scopes<'a>( - version: &'a Version, + version: &'a Version, min: &mut &'a ::Key, max: &mut &'a ::Key, level: usize, @@ -289,8 +309,8 @@ where .max() .ok_or(CompactionError::EmptyLevel)?; - start_ll = Version::::scope_search(min, &version.level_slice[level + 1]); - end_ll = Version::::scope_search(max, &version.level_slice[level + 1]); + start_ll = Version::::scope_search(min, &version.level_slice[level + 1]); + end_ll = Version::::scope_search(max, &version.level_slice[level + 1]); let next_level_len = version.level_slice[level + 1].len(); for scope in version.level_slice[level + 1] @@ -306,13 +326,13 @@ where } fn this_level_scopes<'a>( - version: &'a Version, + version: &'a Version, min: &::Key, max: &::Key, level: usize, ) -> (Vec<&'a Scope<::Key>>, usize, usize) { let mut meet_scopes_l = Vec::new(); - let mut start_l = Version::::scope_search(min, &version.level_slice[level]); + let mut start_l = Version::::scope_search(min, &version.level_slice[level]); let mut end_l = start_l; let option = version.option(); @@ -347,12 +367,10 @@ where option: &DbOption, version_edits: &mut Vec::Key>>, level: usize, - streams: Vec>, - ) -> Result<(), CompactionError> - where - FP: 'scan, - { - let mut stream = MergeStream::::from_vec(streams, u32::MAX.into()).await?; + streams: Vec>, + fs: &Arc, + ) -> Result<(), CompactionError> { + let mut stream = MergeStream::::from_vec(streams, u32::MAX.into()).await?; // Kould: is the capacity parameter necessary? let mut builder = R::Columns::builder(8192); @@ -377,6 +395,7 @@ where &mut builder, &mut min, &mut max, + fs, ) .await?; } @@ -389,6 +408,7 @@ where &mut builder, &mut min, &mut max, + fs, ) .await?; } @@ -410,6 +430,7 @@ where builder: &mut ::Builder, min: &mut Option, max: &mut Option, + fs: &Arc, ) -> Result<(), CompactionError> { debug_assert!(min.is_some()); debug_assert!(max.is_some()); @@ -417,7 +438,10 @@ where let gen = Ulid::new(); let columns = builder.finish(None); let mut writer = AsyncArrowWriter::try_new( - FP::open(option.table_path(&gen)).await?, + AsyncWriter::new( + fs.open_options(&option.table_path(&gen), default_open_options()) + .await?, + ), R::arrow_schema().clone(), Some(option.write_parquet_properties.clone()), )?; @@ -445,6 +469,8 @@ where Io(#[from] std::io::Error), #[error("compaction parquet error: {0}")] Parquet(#[from] parquet::errors::ParquetError), + #[error("compaction fusio error: {0}")] + Fusio(#[from] fusio::Error), #[error("compaction version error: {0}")] Version(#[from] VersionError), #[error("database error: {0}")] @@ -458,13 +484,15 @@ pub(crate) mod tests { use std::sync::{atomic::AtomicU32, Arc}; use flume::bounded; - use parquet::{arrow::AsyncArrowWriter, errors::ParquetError}; + use fusio::{local::TokioFs, path::Path, DynFs}; + use fusio_parquet::writer::AsyncWriter; + use parquet::arrow::AsyncArrowWriter; use tempfile::TempDir; use crate::{ compaction::Compactor, - executor::{tokio::TokioExecutor, Executor}, - fs::{FileId, FileProvider}, + executor::tokio::TokioExecutor, + fs::{default_open_options, manager::StoreManager, FileId}, inmem::{immutable::Immutable, mutable::Mutable}, record::Record, scope::Scope, @@ -476,17 +504,17 @@ pub(crate) mod tests { DbError, DbOption, DB, }; - async fn build_immutable( + async fn build_immutable( option: &DbOption, records: Vec<(LogType, R, Timestamp)>, + fs: &Arc, ) -> Result, DbError> where R: Record + Send, - FP: FileProvider, { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mutable: Mutable = Mutable::new(option, trigger).await?; + let mutable: Mutable = Mutable::new(option, trigger, fs).await?; for (log_ty, record, ts) in records { let _ = mutable.insert(log_ty, record, ts).await?; @@ -494,20 +522,21 @@ pub(crate) mod tests { Ok(Immutable::from(mutable.data)) } - pub(crate) async fn build_parquet_table( + pub(crate) async fn build_parquet_table( option: &DbOption, gen: FileId, records: Vec<(LogType, R, Timestamp)>, + fs: &Arc, ) -> Result<(), DbError> where R: Record + Send, - FP: Executor, { - let immutable = build_immutable::(option, records).await?; + let immutable = build_immutable::(option, records, fs).await?; let mut writer = AsyncArrowWriter::try_new( - FP::open(option.table_path(&gen)) - .await - .map_err(ParquetError::from)?, + AsyncWriter::new( + fs.open_options(&option.table_path(&gen), default_open_options()) + .await?, + ), R::arrow_schema().clone(), None, )?; @@ -520,12 +549,15 @@ pub(crate) mod tests { #[tokio::test] async fn minor_compaction() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + manager + .create_dir_all(&option.wal_dir_path()) .await .unwrap(); - let batch_1 = build_immutable::( + let batch_1 = build_immutable::( &option, vec![ ( @@ -556,11 +588,12 @@ pub(crate) mod tests { 0.into(), ), ], + manager.base_fs(), ) .await .unwrap(); - let batch_2 = build_immutable::( + let batch_2 = build_immutable::( &option, vec![ ( @@ -591,17 +624,19 @@ pub(crate) mod tests { 0.into(), ), ], + manager.base_fs(), ) .await .unwrap(); - let scope = Compactor::::minor_compaction( - &DbOption::from(temp_dir.path()), + let scope = Compactor::::minor_compaction( + &DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()), None, &vec![ (Some(FileId::new()), batch_1), (Some(FileId::new()), batch_2), ], + &manager, ) .await .unwrap() @@ -613,28 +648,40 @@ pub(crate) mod tests { #[tokio::test] async fn major_compaction() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.major_threshold_with_sst_size = 2; let option = Arc::new(option); + manager + .create_dir_all(&option.version_log_dir_path()) + .await + .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); + let ((table_gen_1, table_gen_2, table_gen_3, table_gen_4, _), version) = - build_version(&option).await; + build_version(&option, &manager).await; let min = 2.to_string(); let max = 5.to_string(); let mut version_edits = Vec::new(); - Compactor::::major_compaction( + Compactor::::major_compaction( &version, &option, &min, &max, &mut version_edits, &mut vec![], + &manager, ) .await .unwrap(); + if let VersionEdit::Add { level, scope } = &version_edits[0] { assert_eq!(*level, 1); assert_eq!(scope.min, 1.to_string()); @@ -665,18 +712,21 @@ pub(crate) mod tests { pub(crate) async fn build_version( option: &Arc>, - ) -> ( - (FileId, FileId, FileId, FileId, FileId), - Version, - ) { - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + manager: &StoreManager, + ) -> ((FileId, FileId, FileId, FileId, FileId), Version) { + let level_0_fs = option + .level_fs_path(0) + .map(|path| manager.get_fs(path)) + .unwrap_or(manager.base_fs()); + let level_1_fs = option + .level_fs_path(1) + .map(|path| manager.get_fs(path)) + .unwrap_or(manager.base_fs()); // level 0 let table_gen_1 = FileId::new(); let table_gen_2 = FileId::new(); - build_parquet_table::( + build_parquet_table::( option, table_gen_1, vec![ @@ -708,10 +758,11 @@ pub(crate) mod tests { 0.into(), ), ], + level_0_fs, ) .await .unwrap(); - build_parquet_table::( + build_parquet_table::( option, table_gen_2, vec![ @@ -743,6 +794,7 @@ pub(crate) mod tests { 0.into(), ), ], + level_0_fs, ) .await .unwrap(); @@ -751,7 +803,7 @@ pub(crate) mod tests { let table_gen_3 = FileId::new(); let table_gen_4 = FileId::new(); let table_gen_5 = FileId::new(); - build_parquet_table::( + build_parquet_table::( option, table_gen_3, vec![ @@ -783,10 +835,11 @@ pub(crate) mod tests { 0.into(), ), ], + level_1_fs, ) .await .unwrap(); - build_parquet_table::( + build_parquet_table::( option, table_gen_4, vec![ @@ -818,10 +871,11 @@ pub(crate) mod tests { 0.into(), ), ], + level_1_fs, ) .await .unwrap(); - build_parquet_table::( + build_parquet_table::( option, table_gen_5, vec![ @@ -853,16 +907,14 @@ pub(crate) mod tests { 0.into(), ), ], + level_1_fs, ) .await .unwrap(); let (sender, _) = bounded(1); - let mut version = Version::::new( - option.clone(), - sender, - Arc::new(AtomicU32::default()), - ); + let mut version = + Version::::new(option.clone(), sender, Arc::new(AtomicU32::default())); version.level_slice[0].push(Scope { min: 1.to_string(), max: 3.to_string(), @@ -909,13 +961,29 @@ pub(crate) mod tests { #[tokio::test] pub(crate) async fn major_panic() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.major_threshold_with_sst_size = 1; option.level_sst_magnification = 1; - TokioExecutor::create_dir_all(&option.wal_dir_path()) + + manager + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); + + let level_0_fs = option + .level_fs_path(0) + .map(|path| manager.get_fs(path)) + .unwrap_or(manager.base_fs()); + let level_1_fs = option + .level_fs_path(1) + .map(|path| manager.get_fs(path)) + .unwrap_or(manager.base_fs()); let table_gen0 = FileId::new(); let table_gen1 = FileId::new(); @@ -937,20 +1005,17 @@ pub(crate) mod tests { records1.push(record); } } - build_parquet_table::(&option, table_gen0, records0) + build_parquet_table::(&option, table_gen0, records0, level_0_fs) .await .unwrap(); - build_parquet_table::(&option, table_gen1, records1) + build_parquet_table::(&option, table_gen1, records1, level_1_fs) .await .unwrap(); let option = Arc::new(option); let (sender, _) = bounded(1); - let mut version = Version::::new( - option.clone(), - sender, - Arc::new(AtomicU32::default()), - ); + let mut version = + Version::::new(option.clone(), sender, Arc::new(AtomicU32::default())); version.level_slice[0].push(Scope { min: 0.to_string(), max: 4.to_string(), @@ -968,13 +1033,14 @@ pub(crate) mod tests { let min = 6.to_string(); let max = 9.to_string(); - Compactor::::major_compaction( + Compactor::::major_compaction( &version, &option, &min, &max, &mut version_edits, &mut vec![], + &manager, ) .await .unwrap(); @@ -984,8 +1050,9 @@ pub(crate) mod tests { #[tokio::test] async fn test_flush_major_level_sort() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.immutable_chunk_num = 1; option.immutable_chunk_max_num = 0; option.major_threshold_with_sst_size = 2; @@ -995,7 +1062,9 @@ pub(crate) mod tests { option.major_default_oldest_table_num = 1; option.trigger_type = TriggerType::Length(5); - let db: DB = DB::new(option, TokioExecutor::new()).await.unwrap(); + let db: DB = DB::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); for i in 5..9 { let item = Test { diff --git a/src/executor.rs b/src/executor.rs index 1ee88dbe..79cc6daa 100644 --- a/src/executor.rs +++ b/src/executor.rs @@ -1,8 +1,6 @@ use std::future::Future; -use crate::fs::FileProvider; - -pub trait Executor: FileProvider { +pub trait Executor { fn spawn(&self, future: F) where F: Future + Send + 'static; diff --git a/src/fs/manager.rs b/src/fs/manager.rs new file mode 100644 index 00000000..04188804 --- /dev/null +++ b/src/fs/manager.rs @@ -0,0 +1,44 @@ +use std::{collections::HashMap, sync::Arc}; + +use fusio::{dynamic::DynFs, path::Path, Error}; + +pub struct StoreManager { + base_fs: Arc, + fs_map: HashMap>>, +} + +impl StoreManager { + pub fn new(base_fs: Arc, levels_fs: Vec<(Path, Option>)>) -> Self { + let mut fs_map = HashMap::with_capacity(levels_fs.len()); + + for (path, fs) in levels_fs { + fs_map.entry(path).or_insert(fs); + } + + StoreManager { base_fs, fs_map } + } + + pub async fn create_dir_all(&self, path: &Path) -> Result<(), Error> { + self.base_fs.create_dir_all(path).await?; + for (_, fs) in self.fs_map.iter() { + if let Some(fs) = fs { + fs.create_dir_all(path).await?; + } + } + + Ok(()) + } + + pub fn base_fs(&self) -> &Arc { + &self.base_fs + } + + pub fn get_fs(&self, path: &Path) -> &Arc { + self.fs_map + .get(path) + .and_then(Option::as_ref) + .unwrap_or(&self.base_fs) + } +} + +// TODO: TestCases diff --git a/src/fs/mod.rs b/src/fs/mod.rs index e3f0e90e..8bd0f162 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,16 +1,12 @@ -#[cfg(any(test, feature = "tokio"))] -pub mod tokio_fs; +pub mod manager; use std::{ fmt::{Display, Formatter}, - future::Future, - io, - path::Path, + str::FromStr, }; -use futures_core::Stream; -use tokio::io::{AsyncRead, AsyncSeek, AsyncWrite}; -use ulid::Ulid; +use fusio::{fs::OpenOptions, path::Path}; +use ulid::{DecodeError, Ulid}; pub(crate) type FileId = Ulid; @@ -20,26 +16,6 @@ pub enum FileType { Log, } -pub trait AsyncFile: AsyncRead + AsyncWrite + AsyncSeek + Send + Sync + Unpin + 'static {} - -impl AsyncFile for T where T: AsyncRead + AsyncWrite + AsyncSeek + Send + Sync + Unpin + 'static {} - -pub trait FileProvider { - type File: AsyncFile; - - fn create_dir_all(path: impl AsRef) -> impl Future>; - - fn open(path: impl AsRef + Send) -> impl Future> + Send; - - fn remove(path: impl AsRef + Send) -> impl Future> + Send; - - fn list( - dir_path: impl AsRef + Send, - file_type: FileType, - is_reverse: bool, - ) -> io::Result>>; -} - impl Display for FileType { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { match self { @@ -49,3 +25,18 @@ impl Display for FileType { } } } + +pub(crate) fn default_open_options() -> OpenOptions { + OpenOptions::default().create().append().read() +} + +pub(crate) fn parse_file_id(path: &Path, suffix: FileType) -> Result, DecodeError> { + path.filename() + .map(|file_name| { + let file_id = file_name + .strip_suffix(&format!(".{}", suffix)) + .unwrap_or(file_name); + FileId::from_str(file_id) + }) + .transpose() +} diff --git a/src/fs/tokio_fs.rs b/src/fs/tokio_fs.rs deleted file mode 100644 index 1774e69a..00000000 --- a/src/fs/tokio_fs.rs +++ /dev/null @@ -1,79 +0,0 @@ -use std::{fs, fs::DirEntry, io, path::Path}; - -use async_stream::stream; -use futures_core::Stream; -use regex::Regex; -use tokio::fs::{create_dir_all, remove_file, File, OpenOptions}; - -use super::{FileId, FileProvider, FileType}; -use crate::executor::tokio::TokioExecutor; - -impl FileProvider for TokioExecutor { - type File = File; - - async fn create_dir_all(path: impl AsRef) -> io::Result<()> { - create_dir_all(path).await - } - - async fn open(path: impl AsRef + Send) -> io::Result { - OpenOptions::new() - .truncate(false) - .create(true) - .write(true) - .read(true) - .open(path) - .await - } - - async fn remove(path: impl AsRef + Send) -> io::Result<()> { - remove_file(path).await - } - - fn list( - dir_path: impl AsRef + Send, - file_type: FileType, - is_reverse: bool, - ) -> io::Result>> { - let dir_path = dir_path.as_ref().to_path_buf(); - let mut entries: Vec = - fs::read_dir(&dir_path)?.collect::, io::Error>>()?; - entries.sort_by_key(|entry| entry.file_name()); - - if is_reverse { - entries.reverse(); - } - Ok(stream! { - for entry in entries { - let path = entry.path(); - if path.is_file() { - if let Some(filename) = path.file_name().and_then(|s| s.to_str()) { - if Regex::new(format!("^[0123456789ABCDEFGHJKMNPQRSTVWXYZ]{{26}}.{}$", file_type).as_str()).unwrap().is_match(filename) { - // SAFETY: Checked on WAL_REGEX - let file_id = FileId::from_string(filename - .split('.') - .next() - .unwrap()).unwrap(); - yield Ok((Self::open(dir_path.join(filename)).await?, file_id)) - } - } - } - } - }) - } -} - -#[cfg(test)] -impl TokioExecutor { - pub(crate) async fn file_exist(path: impl AsRef + Send) -> io::Result { - match tokio::fs::metadata(path).await { - Ok(_) => Ok(true), - Err(err) => { - if err.kind() == io::ErrorKind::NotFound { - Ok(false) - } else { - Err(err) - } - } - } - } -} diff --git a/src/inmem/immutable.rs b/src/inmem/immutable.rs index 8f29c4bd..6850f44c 100644 --- a/src/inmem/immutable.rs +++ b/src/inmem/immutable.rs @@ -14,7 +14,7 @@ use crate::{ timestamp::{Timestamp, Timestamped, TimestampedRef, EPOCH}, }; -pub trait ArrowArrays: Sized { +pub trait ArrowArrays: Sized + Sync { type Record: Record; type Builder: Builder; diff --git a/src/inmem/mutable.rs b/src/inmem/mutable.rs index fa81ceec..132ff201 100644 --- a/src/inmem/mutable.rs +++ b/src/inmem/mutable.rs @@ -5,11 +5,11 @@ use crossbeam_skiplist::{ map::{Entry, Range}, SkipMap, }; -use futures_util::io; +use fusio::{dynamic::DynFile, DynFs}; use ulid::Ulid; use crate::{ - fs::{FileId, FileProvider}, + fs::{default_open_options, FileId}, inmem::immutable::Immutable, record::{Key, KeyRef, Record}, timestamp::{ @@ -32,30 +32,30 @@ pub(crate) type MutableScan<'scan, R> = Range< Option, >; -#[derive(Debug)] -pub struct Mutable +pub struct Mutable where R: Record, - FP: FileProvider, { pub(crate) data: SkipMap, Option>, - wal: Option>>, + wal: Option, R>>>, pub(crate) trigger: Arc + Send + Sync>>, } -impl Mutable +impl Mutable where - FP: FileProvider, R: Record, { pub async fn new( option: &DbOption, trigger: Arc + Send + Sync>>, - ) -> io::Result { + fs: &Arc, + ) -> Result { let mut wal = None; if option.use_wal { let file_id = Ulid::new(); - let file = FP::open(option.wal_path(&file_id)).await?; + let file = fs + .open_options(&option.wal_path(&file_id), default_open_options()) + .await?; wal = Some(Mutex::new(WalFile::new(file, file_id))); }; @@ -68,10 +68,9 @@ where } } -impl Mutable +impl Mutable where R: Record + Send, - FP: FileProvider, { pub(crate) async fn insert( &self, @@ -168,7 +167,7 @@ where pub(crate) async fn into_immutable( self, - ) -> io::Result<(Option, Immutable)> { + ) -> Result<(Option, Immutable), fusio::Error> { let mut file_id = None; if let Some(wal) = self.wal { @@ -181,10 +180,9 @@ where } } -impl Mutable +impl Mutable where R: Record, - FP: FileProvider, { #[allow(unused)] pub(crate) fn len(&self) -> usize { @@ -196,10 +194,10 @@ where mod tests { use std::{ops::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path, DynFs}; + use super::Mutable; use crate::{ - executor::tokio::TokioExecutor, - fs::FileProvider, record::Record, tests::{Test, TestRef}, timestamp::Timestamped, @@ -214,15 +212,12 @@ mod tests { let key_2 = "key_2".to_owned(); let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mem_table = Mutable::::new(&option, trigger) - .await - .unwrap(); + let mem_table = Mutable::::new(&option, trigger, &fs).await.unwrap(); mem_table .insert( @@ -265,16 +260,13 @@ mod tests { #[tokio::test] async fn range() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mutable = Mutable::::new(&option, trigger) - .await - .unwrap(); + let mutable = Mutable::::new(&option, trigger, &fs).await.unwrap(); mutable .insert(LogType::Full, "1".into(), 0_u32.into()) diff --git a/src/lib.rs b/src/lib.rs index 3f8ff229..649428ea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -31,10 +31,15 @@ //! # Examples //! //! ```no_run -//! use std::ops::Bound; +//! use std::{ops::Bound, sync::Arc}; //! +//! use fusio::{local::TokioFs, path::Path}; //! use futures_util::stream::StreamExt; -//! use tonbo::{executor::tokio::TokioExecutor, Projection, Record, DB}; +//! use tokio::fs; +//! use tokio_util::bytes::Bytes; +//! use tonbo::{ +//! executor::tokio::TokioExecutor, fs::manager::StoreManager, DbOption, Projection, Record, DB, +//! }; //! //! // use macro to define schema of column family just like ORM //! // it provides type safety read & write API @@ -48,11 +53,15 @@ //! //! #[tokio::main] //! async fn main() { +//! // make sure the path exists +//! let _ = fs::create_dir_all("./db_path/users").await; +//! +//! let manager = StoreManager::new(Arc::new(TokioFs), vec![]); +//! let options = DbOption::from(Path::from_filesystem_path("./db_path/users").unwrap()); //! // pluggable async runtime and I/O -//! let db = DB::new("./db_path/users".into(), TokioExecutor::default()) +//! let db = DB::new(options, TokioExecutor::default(), manager) //! .await //! .unwrap(); -//! //! // insert with owned value //! db.insert(User { //! name: "Alice".into(), @@ -130,7 +139,7 @@ pub use arrow; use async_lock::RwLock; use async_stream::stream; use flume::{bounded, Sender}; -use fs::FileProvider; +use fusio::dynamic::DynFile; use futures_core::Stream; use futures_util::StreamExt; use inmem::{immutable::Immutable, mutable::Mutable}; @@ -153,7 +162,7 @@ pub use crate::option::*; use crate::{ compaction::{CompactTask, Compactor}, executor::Executor, - fs::{FileId, FileType}, + fs::{default_open_options, manager::StoreManager, parse_file_id, FileId, FileType}, serdes::Decode, stream::{ mem_projection::MemProjectionStream, merge::MergeStream, package::PackageStream, Entry, @@ -170,9 +179,10 @@ where R: Record, E: Executor, { - schema: Arc>>, - version_set: VersionSet, + schema: Arc>>, + version_set: VersionSet, lock_map: LockMap, + manager: Arc, _p: PhantomData, } @@ -187,22 +197,36 @@ where /// according to the configuration of [`DbOption`]. /// /// For more configurable options, please refer to [`DbOption`]. - pub async fn new(option: DbOption, executor: E) -> Result> { + pub async fn new( + option: DbOption, + executor: E, + manager: StoreManager, + ) -> Result> { let option = Arc::new(option); - E::create_dir_all(&option.path).await?; - E::create_dir_all(&option.wal_dir_path()).await?; - E::create_dir_all(&option.version_log_dir_path()).await?; + let manager = Arc::new(manager); + + { + let base_fs = manager.base_fs(); + + // FIXME: error handle + let _ = base_fs.create_dir_all(&option.wal_dir_path()).await; + let _ = base_fs.create_dir_all(&option.version_log_dir_path()).await; + } let (task_tx, task_rx) = bounded(1); - let (mut cleaner, clean_sender) = Cleaner::::new(option.clone()); + let (mut cleaner, clean_sender) = Cleaner::::new(option.clone(), manager.clone()); - let version_set = VersionSet::new(clean_sender, option.clone()).await?; + let version_set = VersionSet::new(clean_sender, option.clone(), manager.clone()).await?; let schema = Arc::new(RwLock::new( - Schema::new(option.clone(), task_tx, &version_set).await?, + Schema::new(option.clone(), task_tx, &version_set, &manager).await?, )); - let mut compactor = - Compactor::::new(schema.clone(), option.clone(), version_set.clone()); + let mut compactor = Compactor::::new( + schema.clone(), + option.clone(), + version_set.clone(), + manager.clone(), + ); executor.spawn(async move { if let Err(err) = cleaner.listen().await { @@ -226,16 +250,18 @@ where schema, version_set, lock_map: Arc::new(Default::default()), + manager, _p: Default::default(), }) } /// open an optimistic ACID transaction - pub async fn transaction(&self) -> Transaction<'_, R, E> { + pub async fn transaction(&self) -> Transaction<'_, R> { Transaction::new( self.version_set.current().await, self.schema.read().await, self.lock_map.clone(), + self.manager.clone(), ) } @@ -286,6 +312,7 @@ where .await .get( &*self.version_set.current().await, + &self.manager, key, self.version_set.load_ts(), Projection::All, @@ -302,9 +329,11 @@ where ) -> impl Stream>> + 'scan { stream! { let schema = self.schema.read().await; + let manager = &self.manager; let current = self.version_set.current().await; let mut scan = Scan::new( &schema, + manager, range, self.version_set.load_ts(), &*current, @@ -358,44 +387,61 @@ where } } -pub(crate) struct Schema +pub(crate) struct Schema where R: Record, - FP: FileProvider, { - mutable: Mutable, - immutables: Vec<(Option, Immutable)>, + pub mutable: Mutable, + pub immutables: Vec<(Option, Immutable)>, compaction_tx: Sender, recover_wal_ids: Option>, trigger: Arc + Send + Sync>>, } -impl Schema +impl Schema where R: Record + Send, - FP: FileProvider, { async fn new( option: Arc>, compaction_tx: Sender, - version_set: &VersionSet, + version_set: &VersionSet, + manager: &StoreManager, ) -> Result> { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); let mut schema = Schema { - mutable: Mutable::new(&option, trigger.clone()).await?, + mutable: Mutable::new(&option, trigger.clone(), manager.base_fs()).await?, immutables: Default::default(), compaction_tx, recover_wal_ids: None, trigger, }; + let base_fs = manager.base_fs(); + let wal_dir_path = option.wal_dir_path(); let mut transaction_map = HashMap::new(); - let mut wal_stream = pin!(FP::list(option.wal_dir_path(), FileType::Wal, false)?); let mut wal_ids = Vec::new(); - while let Some(wal) = wal_stream.next().await { - let (file, wal_id) = wal?; - let mut wal = WalFile::::new(file, wal_id); + let wal_metas = { + let mut wal_metas = Vec::new(); + let mut wal_stream = base_fs.list(&wal_dir_path).await?; + + while let Some(file_meta) = wal_stream.next().await { + wal_metas.push(file_meta?); + } + wal_metas.sort_by(|meta_a, meta_b| meta_a.path.cmp(&meta_b.path)); + wal_metas + }; + + for wal_meta in wal_metas { + let wal_path = wal_meta.path; + + let file = base_fs + .open_options(&wal_path, default_open_options()) + .await?; + // SAFETY: wal_stream return only file name + let wal_id = parse_file_id(&wal_path, FileType::Wal)?.unwrap(); + let mut wal = WalFile::, R>::new(file, wal_id); wal_ids.push(wal_id); let mut recover_stream = pin!(wal.recover()); @@ -465,14 +511,12 @@ where async fn get<'get>( &'get self, - version: &'get Version, + version: &'get Version, + manager: &StoreManager, key: &'get R::Key, ts: Timestamp, projection: Projection, - ) -> Result>, DbError> - where - FP: FileProvider, - { + ) -> Result>, DbError> { if let Some(entry) = self.mutable.get(key, ts) { return Ok(Some(Entry::Mutable(entry))); } @@ -500,7 +544,7 @@ where } Ok(version - .query(TimestampedRef::new(key, ts), projection) + .query(manager, TimestampedRef::new(key, ts), projection) .await? .map(|entry| Entry::RecordBatch(entry))) } @@ -516,41 +560,42 @@ where } /// scan configuration intermediate structure -pub struct Scan<'scan, R, FP> +pub struct Scan<'scan, R> where R: Record, - FP: FileProvider, { - schema: &'scan Schema, + schema: &'scan Schema, + manager: &'scan StoreManager, lower: Bound<&'scan R::Key>, upper: Bound<&'scan R::Key>, ts: Timestamp, - version: &'scan Version, + version: &'scan Version, fn_pre_stream: - Box) -> Option> + 'scan>, + Box) -> Option> + Send + 'scan>, limit: Option, projection_indices: Option>, projection: ProjectionMask, } -impl<'scan, R, FP> Scan<'scan, R, FP> +impl<'scan, R> Scan<'scan, R> where R: Record + Send, - FP: FileProvider, { fn new( - schema: &'scan Schema, + schema: &'scan Schema, + manager: &'scan StoreManager, (lower, upper): (Bound<&'scan R::Key>, Bound<&'scan R::Key>), ts: Timestamp, - version: &'scan Version, + version: &'scan Version, fn_pre_stream: Box< - dyn FnOnce(Option) -> Option> + 'scan, + dyn FnOnce(Option) -> Option> + Send + 'scan, >, ) -> Self { Self { schema, + manager, lower, upper, ts, @@ -627,6 +672,7 @@ where } self.version .streams( + self.manager, &mut streams, (self.lower, self.upper), self.ts, @@ -678,6 +724,7 @@ where } self.version .streams( + self.manager, &mut streams, (self.lower, self.upper), self.ts, @@ -706,12 +753,18 @@ where Version(#[from] VersionError), #[error("write parquet error: {0}")] Parquet(#[from] ParquetError), + #[error("write ulid decode error: {0}")] + UlidDecode(#[from] ulid::DecodeError), + #[error("write fusio error: {0}")] + Fusio(#[from] fusio::Error), // #[error("write encode error: {0}")] // Encode(<::Ref as Encode>::Error), #[error("write recover error: {0}")] Recover(#[from] RecoverError<::Error>), #[error("wal write error: {0}")] WalWrite(Box), + #[error("exceeds the maximum level(0-6)")] + ExceedsMaxLevel, } type LockMap = Arc>; @@ -735,16 +788,16 @@ pub(crate) mod tests { }; use async_lock::RwLock; use flume::{bounded, Receiver}; + use fusio::{local::TokioFs, path::Path, DynFs, Read, Write}; use once_cell::sync::Lazy; use parquet::{arrow::ProjectionMask, format::SortingColumn, schema::types::ColumnPath}; use tempfile::TempDir; - use tokio::io; use tracing::error; use crate::{ compaction::{CompactTask, Compactor}, executor::{tokio::TokioExecutor, Executor}, - fs::{FileId, FileProvider}, + fs::{manager::StoreManager, FileId}, inmem::{immutable::tests::TestImmutableArrays, mutable::Mutable}, record::{internal::InternalRecordRef, RecordDecodeError, RecordEncodeError, RecordRef}, serdes::{Decode, Encode}, @@ -766,7 +819,7 @@ pub(crate) mod tests { async fn decode(reader: &mut R) -> Result where - R: tokio::io::AsyncRead + Unpin, + R: Read + Unpin, { let vstring = String::decode(reader) @@ -868,7 +921,7 @@ pub(crate) mod tests { async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: io::AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.vstring .encode(writer) @@ -970,8 +1023,10 @@ pub(crate) mod tests { pub(crate) async fn get_test_record_batch( option: DbOption, executor: E, + manager: StoreManager, ) -> RecordBatch { - let db: DB = DB::new(option.clone(), executor).await.unwrap(); + let base_fs = manager.base_fs().clone(); + let db: DB = DB::new(option.clone(), executor, manager).await.unwrap(); db.write( Test { @@ -999,7 +1054,7 @@ pub(crate) mod tests { let trigger = schema.trigger.clone(); let mutable = mem::replace( &mut schema.mutable, - Mutable::new(&option, trigger).await.unwrap(), + Mutable::new(&option, trigger, &base_fs).await.unwrap(), ); Immutable::<::Columns>::from(mutable.data) @@ -1009,10 +1064,11 @@ pub(crate) mod tests { pub(crate) async fn build_schema( option: Arc>, - ) -> io::Result<(crate::Schema, Receiver)> { + fs: &Arc, + ) -> Result<(crate::Schema, Receiver), fusio::Error> { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mutable = Mutable::new(&option, trigger.clone()).await?; + let mutable = Mutable::new(&option, trigger.clone(), fs).await?; mutable .insert( @@ -1054,8 +1110,7 @@ pub(crate) mod tests { let immutables = { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mutable: Mutable = - Mutable::new(&option, trigger.clone()).await?; + let mutable: Mutable = Mutable::new(&option, trigger.clone(), fs).await?; mutable .insert( @@ -1115,25 +1170,33 @@ pub(crate) mod tests { option: Arc>, compaction_rx: Receiver, executor: E, - schema: crate::Schema, - version: Version, + schema: crate::Schema, + version: Version, + manager: Arc, ) -> Result, DbError> where R: Record + Send + Sync, R::Columns: Send + Sync, E: Executor + Send + Sync + 'static, { - E::create_dir_all(&option.path).await?; - E::create_dir_all(&option.version_log_dir_path()) - .await - .unwrap(); + { + let base_fs = manager.base_fs(); + + let _ = base_fs.create_dir_all(&option.wal_dir_path()).await; + let _ = base_fs.create_dir_all(&option.version_log_dir_path()).await; + } let schema = Arc::new(RwLock::new(schema)); - let (mut cleaner, clean_sender) = Cleaner::::new(option.clone()); - let version_set = build_version_set(version, clean_sender, option.clone()).await?; - let mut compactor = - Compactor::::new(schema.clone(), option.clone(), version_set.clone()); + let (mut cleaner, clean_sender) = Cleaner::::new(option.clone(), manager.clone()); + let version_set = + build_version_set(version, clean_sender, option.clone(), manager.clone()).await?; + let mut compactor = Compactor::::new( + schema.clone(), + option.clone(), + version_set.clone(), + manager.clone(), + ); executor.spawn(async move { if let Err(err) = cleaner.listen().await { @@ -1157,6 +1220,7 @@ pub(crate) mod tests { schema, version_set, lock_map: Arc::new(Default::default()), + manager, _p: Default::default(), }) } @@ -1369,8 +1433,9 @@ pub(crate) mod tests { #[tokio::test(flavor = "multi_thread")] async fn read_from_disk() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.immutable_chunk_num = 1; option.immutable_chunk_max_num = 1; option.major_threshold_with_sst_size = 3; @@ -1379,12 +1444,16 @@ pub(crate) mod tests { option.major_default_oldest_table_num = 1; option.trigger_type = TriggerType::Length(/* max_mutable_len */ 5); - let db: DB = DB::new(option, TokioExecutor::new()).await.unwrap(); + let db: DB = DB::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); - for item in test_items() { + for (i, item) in test_items().into_iter().enumerate() { db.write(item, 0.into()).await.unwrap(); + if i % 5 == 0 { + db.flush().await.unwrap(); + } } - let _ = db.flush().await; let tx = db.transaction().await; let key = 20.to_string(); @@ -1403,8 +1472,9 @@ pub(crate) mod tests { #[tokio::test] async fn test_flush() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.immutable_chunk_num = 1; option.immutable_chunk_max_num = 1; option.major_threshold_with_sst_size = 3; @@ -1413,7 +1483,9 @@ pub(crate) mod tests { option.major_default_oldest_table_num = 1; option.trigger_type = TriggerType::Length(/* max_mutable_len */ 5); - let db: DB = DB::new(option, TokioExecutor::new()).await.unwrap(); + let db: DB = DB::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); for item in &test_items()[0..10] { db.write(item.clone(), 0.into()).await.unwrap(); @@ -1430,16 +1502,18 @@ pub(crate) mod tests { #[tokio::test] async fn schema_recover() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let (task_tx, _task_rx) = bounded(1); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let schema: crate::Schema = crate::Schema { - mutable: Mutable::new(&option, trigger.clone()).await.unwrap(), + let schema: crate::Schema = crate::Schema { + mutable: Mutable::new(&option, trigger.clone(), &fs).await.unwrap(), immutables: Default::default(), compaction_tx: task_tx.clone(), recover_wal_ids: None, @@ -1456,8 +1530,8 @@ pub(crate) mod tests { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let schema: crate::Schema = crate::Schema { - mutable: Mutable::new(&option, trigger.clone()).await.unwrap(), + let schema: crate::Schema = crate::Schema { + mutable: Mutable::new(&option, trigger.clone(), &fs).await.unwrap(), immutables: Default::default(), compaction_tx: task_tx, recover_wal_ids: None, diff --git a/src/ondisk/scan.rs b/src/ondisk/scan.rs index cc693f3d..4adc4042 100644 --- a/src/ondisk/scan.rs +++ b/src/ondisk/scan.rs @@ -4,36 +4,30 @@ use std::{ task::{Context, Poll}, }; +use fusio_parquet::reader::AsyncReader; use futures_core::{ready, Stream}; use parquet::arrow::{async_reader::ParquetRecordBatchStream, ProjectionMask}; use pin_project_lite::pin_project; use crate::{ - fs::FileProvider, record::Record, stream::record_batch::{RecordBatchEntry, RecordBatchIterator}, }; pin_project! { #[derive(Debug)] - pub struct SsTableScan<'scan, R, FP> - where - FP: FileProvider, - { + pub struct SsTableScan<'scan, R>{ #[pin] - stream: ParquetRecordBatchStream, + stream: ParquetRecordBatchStream, iter: Option>, projection_mask: ProjectionMask, _marker: PhantomData<&'scan ()> } } -impl SsTableScan<'_, R, FP> -where - FP: FileProvider, -{ +impl SsTableScan<'_, R> { pub fn new( - stream: ParquetRecordBatchStream, + stream: ParquetRecordBatchStream, projection_mask: ProjectionMask, ) -> Self { SsTableScan { @@ -45,10 +39,9 @@ where } } -impl<'scan, R, FP> Stream for SsTableScan<'scan, R, FP> +impl<'scan, R> Stream for SsTableScan<'scan, R> where R: Record, - FP: FileProvider, { type Item = Result, parquet::errors::ParquetError>; diff --git a/src/ondisk/sstable.rs b/src/ondisk/sstable.rs index e9333175..1283f39d 100644 --- a/src/ondisk/sstable.rs +++ b/src/ondisk/sstable.rs @@ -1,84 +1,48 @@ use std::{marker::PhantomData, ops::Bound}; +use fusio::{dynamic::DynFile, DynRead}; +use fusio_parquet::reader::AsyncReader; use futures_util::StreamExt; -use parquet::{ - arrow::{ - arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions}, - arrow_writer::ArrowWriterOptions, - async_reader::AsyncReader, - AsyncArrowWriter, ParquetRecordBatchStreamBuilder, ProjectionMask, - }, - basic::{Compression, ZstdLevel}, - file::properties::WriterProperties, +use parquet::arrow::{ + arrow_reader::{ArrowReaderBuilder, ArrowReaderOptions}, + ParquetRecordBatchStreamBuilder, ProjectionMask, }; use super::{arrows::get_range_filter, scan::SsTableScan}; use crate::{ - fs::{AsyncFile, FileProvider}, record::Record, stream::record_batch::RecordBatchEntry, timestamp::{Timestamp, TimestampedRef}, }; -pub(crate) struct SsTable +pub(crate) struct SsTable where R: Record, - FP: FileProvider, { - reader: FP::File, + reader: AsyncReader, _marker: PhantomData, } -impl SsTable +impl SsTable where R: Record, - FP: FileProvider, { - pub(crate) fn open(file: FP::File) -> Self { - SsTable { - reader: file, - _marker: PhantomData, - } - } - - #[allow(unused)] - fn create_writer(&mut self) -> AsyncArrowWriter<&mut dyn AsyncFile> { - // TODO: expose writer options - let options = ArrowWriterOptions::new().with_properties( - WriterProperties::builder() - .set_created_by(concat!("tonbo version ", env!("CARGO_PKG_VERSION")).to_owned()) - .set_compression(Compression::ZSTD(ZstdLevel::try_new(3).unwrap())) - .build(), - ); - AsyncArrowWriter::try_new_with_options( - (&mut self.reader as &mut dyn AsyncFile), - R::arrow_schema().clone(), - options, - ) - .expect("Failed to create writer") - } + pub(crate) async fn open(file: Box) -> Result { + let size = DynRead::size(&file).await?; - #[cfg(test)] - async fn write( - &mut self, - record_batch: arrow::array::RecordBatch, - ) -> parquet::errors::Result<()> { - let mut writer = self.create_writer(); - writer.write(&record_batch).await?; - - if writer.in_progress_size() > (1 << 20) - 1 { - writer.flush().await?; - } - - writer.close().await?; - Ok(()) + Ok(SsTable { + reader: AsyncReader::new(file, size), + _marker: PhantomData, + }) } async fn into_parquet_builder( self, limit: Option, projection_mask: ProjectionMask, - ) -> parquet::errors::Result>> { + ) -> parquet::errors::Result< + ArrowReaderBuilder>, + > { let mut builder = ParquetRecordBatchStreamBuilder::new_with_options( self.reader, ArrowReaderOptions::default().with_page_index(true), @@ -113,7 +77,7 @@ where ts: Timestamp, limit: Option, projection_mask: ProjectionMask, - ) -> Result, parquet::errors::ParquetError> { + ) -> Result, parquet::errors::ParquetError> { let builder = self .into_parquet_builder(limit, projection_mask.clone()) .await?; @@ -133,83 +97,97 @@ where #[cfg(test)] pub(crate) mod tests { - use std::{borrow::Borrow, ops::Bound, path::PathBuf}; + use std::{borrow::Borrow, fs::File, ops::Bound, sync::Arc}; + use arrow::array::RecordBatch; + use fusio::{dynamic::DynFile, local::TokioFs, path::Path, DynFs}; + use fusio_parquet::writer::AsyncWriter; use futures_util::StreamExt; - use parquet::arrow::{arrow_to_parquet_schema, ProjectionMask}; + use parquet::{ + arrow::{ + arrow_to_parquet_schema, arrow_writer::ArrowWriterOptions, AsyncArrowWriter, + ProjectionMask, + }, + basic::{Compression, ZstdLevel}, + file::properties::WriterProperties, + }; use super::SsTable; use crate::{ executor::tokio::TokioExecutor, - fs::FileProvider, + fs::{default_open_options, manager::StoreManager}, record::Record, - tests::{get_test_record_batch, Test, TestRef}, + tests::{get_test_record_batch, Test}, timestamp::Timestamped, DbOption, }; - pub(crate) async fn open_sstable(path: &PathBuf) -> SsTable - where - R: Record, - FP: FileProvider, - { - SsTable::open(FP::open(path).await.unwrap()) - } - - #[tokio::test(flavor = "multi_thread", worker_threads = 2)] - async fn write_sstable() { - let temp_dir = tempfile::tempdir().unwrap(); - let record_batch = get_test_record_batch::( - DbOption::from(temp_dir.path()), - TokioExecutor::new(), + async fn write_record_batch( + file: Box, + record_batch: &RecordBatch, + ) -> Result<(), parquet::errors::ParquetError> { + // TODO: expose writer options + let options = ArrowWriterOptions::new().with_properties( + WriterProperties::builder() + .set_created_by(concat!("tonbo version ", env!("CARGO_PKG_VERSION")).to_owned()) + .set_compression(Compression::ZSTD(ZstdLevel::try_new(3).unwrap())) + .build(), + ); + let mut writer = AsyncArrowWriter::try_new_with_options( + AsyncWriter::new(file), + Test::arrow_schema().clone(), + options, ) - .await; - let table_path = temp_dir.path().join("write_test.parquet"); + .expect("Failed to create writer"); + writer.write(record_batch).await?; - open_sstable::(&table_path) - .await - .write(record_batch) - .await - .unwrap(); + if writer.in_progress_size() > (1 << 20) - 1 { + writer.flush().await?; + } - let key = Timestamped::new("hello".to_owned(), 1.into()); + writer.close().await?; + Ok(()) + } - assert_eq!( - open_sstable::(&table_path) - .await - .get(key.borrow(), ProjectionMask::all()) + pub(crate) async fn open_sstable(store: &Arc, path: &Path) -> SsTable + where + R: Record, + { + SsTable::open( + store + .open_options(path, default_open_options()) .await - .unwrap() - .unwrap() - .get(), - Some(TestRef { - vstring: "hello", - vu32: Some(12), - vbool: Some(true), - }) - ); + .unwrap(), + ) + .await + .unwrap() } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn projection_query() { let temp_dir = tempfile::tempdir().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let base_fs = manager.base_fs().clone(); let record_batch = get_test_record_batch::( - DbOption::from(temp_dir.path()), + DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()), TokioExecutor::new(), + manager, ) .await; let table_path = temp_dir.path().join("projection_query_test.parquet"); + let _ = File::create(&table_path).unwrap(); + let table_path = Path::from_filesystem_path(table_path).unwrap(); - open_sstable::(&table_path) - .await - .write(record_batch) + let file = base_fs + .open_options(&table_path, default_open_options()) .await .unwrap(); + write_record_batch(file, &record_batch).await.unwrap(); let key = Timestamped::new("hello".to_owned(), 1.into()); { - let test_ref_1 = open_sstable::(&table_path) + let test_ref_1 = open_sstable::(&base_fs, &table_path) .await .get( key.borrow(), @@ -226,7 +204,7 @@ pub(crate) mod tests { assert_eq!(test_ref_1.get().unwrap().vbool, None); } { - let test_ref_2 = open_sstable::(&table_path) + let test_ref_2 = open_sstable::(&base_fs, &table_path) .await .get( key.borrow(), @@ -243,7 +221,7 @@ pub(crate) mod tests { assert_eq!(test_ref_2.get().unwrap().vbool, Some(true)); } { - let test_ref_3 = open_sstable::(&table_path) + let test_ref_3 = open_sstable::(&base_fs, &table_path) .await .get( key.borrow(), @@ -264,21 +242,26 @@ pub(crate) mod tests { #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn projection_scan() { let temp_dir = tempfile::tempdir().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let base_fs = manager.base_fs().clone(); let record_batch = get_test_record_batch::( - DbOption::from(temp_dir.path()), + DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()), TokioExecutor::new(), + manager, ) .await; let table_path = temp_dir.path().join("projection_scan_test.parquet"); + let _ = File::create(&table_path).unwrap(); + let table_path = Path::from_filesystem_path(table_path).unwrap(); - open_sstable::(&table_path) - .await - .write(record_batch) + let file = base_fs + .open_options(&table_path, default_open_options()) .await .unwrap(); + write_record_batch(file, &record_batch).await.unwrap(); { - let mut test_ref_1 = open_sstable::(&table_path) + let mut test_ref_1 = open_sstable::(&base_fs, &table_path) .await .scan( (Bound::Unbounded, Bound::Unbounded), @@ -303,7 +286,7 @@ pub(crate) mod tests { assert_eq!(entry_1.get().unwrap().vbool, None); } { - let mut test_ref_2 = open_sstable::(&table_path) + let mut test_ref_2 = open_sstable::(&base_fs, &table_path) .await .scan( (Bound::Unbounded, Bound::Unbounded), @@ -328,7 +311,7 @@ pub(crate) mod tests { assert_eq!(entry_1.get().unwrap().vbool, None); } { - let mut test_ref_3 = open_sstable::(&table_path) + let mut test_ref_3 = open_sstable::(&base_fs, &table_path) .await .scan( (Bound::Unbounded, Bound::Unbounded), diff --git a/src/option.rs b/src/option.rs index afd132b9..ee2a0f82 100644 --- a/src/option.rs +++ b/src/option.rs @@ -1,21 +1,30 @@ -use std::{marker::PhantomData, path::PathBuf}; +use std::{ + fmt::{Debug, Formatter}, + marker::PhantomData, + sync::Arc, +}; +use fusio::{path::Path, DynFs}; use parquet::{ basic::Compression, file::properties::{EnabledStatistics, WriterProperties}, }; use crate::{ - fs::{FileId, FileProvider, FileType}, + fs::{FileId, FileType}, record::Record, trigger::TriggerType, - version::Version, + version::{Version, MAX_LEVEL}, + DbError, }; /// configure the operating parameters of each component in the [`DB`](crate::DB) -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct DbOption { pub(crate) clean_channel_buffer: usize, + pub(crate) base_path: Path, + // TODO: DEBUG + pub(crate) level_paths: Vec)>>, pub(crate) immutable_chunk_num: usize, pub(crate) immutable_chunk_max_num: usize, pub(crate) level_sst_magnification: usize, @@ -23,7 +32,6 @@ pub struct DbOption { pub(crate) major_l_selection_table_max_num: usize, pub(crate) major_threshold_with_sst_size: usize, pub(crate) max_sst_file_size: usize, - pub(crate) path: PathBuf, pub(crate) version_log_snapshot_threshold: u32, pub(crate) trigger_type: TriggerType, pub(crate) use_wal: bool, @@ -31,22 +39,21 @@ pub struct DbOption { _p: PhantomData, } -impl From

for DbOption +impl From for DbOption where - P: Into, R: Record, { /// build the default configured [`DbOption`] based on the passed path - fn from(path: P) -> Self { + fn from(base_path: Path) -> Self { let (column_paths, sorting_columns) = R::primary_key_path(); DbOption { - path: path.into(), immutable_chunk_num: 3, immutable_chunk_max_num: 5, major_threshold_with_sst_size: 4, level_sst_magnification: 10, max_sst_file_size: 256 * 1024 * 1024, clean_channel_buffer: 10, + base_path, write_parquet_properties: WriterProperties::builder() .set_compression(Compression::LZ4) .set_column_statistics_enabled(column_paths.clone(), EnabledStatistics::Page) @@ -61,6 +68,7 @@ where trigger_type: TriggerType::SizeOfMem(64 * 1024 * 1024), _p: Default::default(), version_log_snapshot_threshold: 200, + level_paths: vec![None; MAX_LEVEL], } } } @@ -70,9 +78,9 @@ where R: Record, { /// build the [`DB`](crate::DB) storage directory based on the passed path - pub fn path(self, path: impl Into) -> Self { + pub fn path(self, path: impl Into) -> Self { DbOption { - path: path.into(), + base_path: path.into(), ..self } } @@ -152,43 +160,88 @@ where ..self } } + + pub fn level_path( + mut self, + level: usize, + path: Path, + store: Arc, + ) -> Result> { + if level >= MAX_LEVEL { + Err(DbError::ExceedsMaxLevel)?; + } + self.level_paths[level] = Some((path, store)); + Ok(self) + } } impl DbOption where R: Record, { - pub(crate) fn table_path(&self, gen: &FileId) -> PathBuf { - self.path.join(format!("{}.{}", gen, FileType::Parquet)) + pub(crate) fn table_path(&self, gen: &FileId) -> Path { + self.base_path + .child(format!("{}.{}", gen, FileType::Parquet)) } - pub(crate) fn wal_dir_path(&self) -> PathBuf { - self.path.join("wal") + pub(crate) fn wal_dir_path(&self) -> Path { + self.base_path.child("wal") } - pub(crate) fn wal_path(&self, gen: &FileId) -> PathBuf { + pub(crate) fn wal_path(&self, gen: &FileId) -> Path { self.wal_dir_path() - .join(format!("{}.{}", gen, FileType::Wal)) + .child(format!("{}.{}", gen, FileType::Wal)) } - pub(crate) fn version_log_dir_path(&self) -> PathBuf { - self.path.join("version") + pub(crate) fn version_log_dir_path(&self) -> Path { + self.base_path.child("version") } - pub(crate) fn version_log_path(&self, gen: &FileId) -> PathBuf { + pub(crate) fn version_log_path(&self, gen: &FileId) -> Path { self.version_log_dir_path() - .join(format!("{}.{}", gen, FileType::Log)) + .child(format!("{}.{}", gen, FileType::Log)) } - pub(crate) fn is_threshold_exceeded_major( - &self, - version: &Version, - level: usize, - ) -> bool - where - E: FileProvider, - { - Version::::tables_len(version, level) + pub(crate) fn level_fs_path(&self, level: usize) -> Option<&Path> { + self.level_paths[level].as_ref().map(|(path, _)| path) + } + + pub(crate) fn is_threshold_exceeded_major(&self, version: &Version, level: usize) -> bool { + Version::::tables_len(version, level) >= (self.major_threshold_with_sst_size * self.level_sst_magnification.pow(level as u32)) } } + +impl Debug for DbOption { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("DbOption") + .field("clean_channel_buffer", &self.clean_channel_buffer) + .field("base_path", &self.base_path) + // TODO + // .field("level_paths", &self.level_paths) + .field("immutable_chunk_num", &self.immutable_chunk_num) + .field("immutable_chunk_max_num", &self.immutable_chunk_max_num) + .field("level_sst_magnification", &self.level_sst_magnification) + .field( + "major_default_oldest_table_num", + &self.major_default_oldest_table_num, + ) + .field( + "major_l_selection_table_max_num", + &self.major_l_selection_table_max_num, + ) + .field( + "major_threshold_with_sst_size", + &self.major_threshold_with_sst_size, + ) + .field("max_sst_file_size", &self.max_sst_file_size) + .field( + "version_log_snapshot_threshold", + &self.version_log_snapshot_threshold, + ) + .field("trigger_type", &self.trigger_type) + .field("use_wal", &self.use_wal) + .field("write_parquet_properties", &self.write_parquet_properties) + .finish() + } +} diff --git a/src/record/mod.rs b/src/record/mod.rs index 16511670..8de2926d 100644 --- a/src/record/mod.rs +++ b/src/record/mod.rs @@ -63,6 +63,8 @@ pub enum RecordEncodeError { }, #[error("record io error: {0}")] Io(#[from] io::Error), + #[error("record fusio error: {0}")] + Fusio(#[from] fusio::Error), } #[derive(Debug, Error)] @@ -74,4 +76,6 @@ pub enum RecordDecodeError { }, #[error("record io error: {0}")] Io(#[from] io::Error), + #[error("record fusio error: {0}")] + Fusio(#[from] fusio::Error), } diff --git a/src/scope.rs b/src/scope.rs index f970d058..1eead71d 100644 --- a/src/scope.rs +++ b/src/scope.rs @@ -1,6 +1,6 @@ use std::ops::Bound; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use crate::{ fs::FileId, @@ -79,12 +79,13 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.min.encode(writer).await?; self.max.encode(writer).await?; - writer.write_all(&self.gen.to_bytes()).await?; + let (result, _) = writer.write_all(&self.gen.to_bytes()[..]).await; + result?; match &self.wal_ids { None => { @@ -94,7 +95,8 @@ where 1u8.encode(writer).await?; (ids.len() as u32).encode(writer).await?; for id in ids { - writer.write_all(&id.to_bytes()).await?; + let (result, _) = writer.write_all(&id.to_bytes()[..]).await; + result?; } } } @@ -113,14 +115,15 @@ where { type Error = ::Error; - async fn decode(reader: &mut R) -> Result { + async fn decode(reader: &mut R) -> Result { + let mut buf = vec![0u8; 16]; let min = K::decode(reader).await?; let max = K::decode(reader).await?; let gen = { - let mut slice = [0; 16]; - reader.read_exact(&mut slice).await?; - FileId::from_bytes(slice) + buf = reader.read_exact(buf).await?; + // SAFETY + FileId::from_bytes(buf.as_slice().try_into().unwrap()) }; let wal_ids = match u8::decode(reader).await? { 0 => None, @@ -129,9 +132,9 @@ where let mut ids = Vec::with_capacity(len); for _ in 0..len { - let mut slice = [0; 16]; - reader.read_exact(&mut slice).await?; - ids.push(FileId::from_bytes(slice)); + buf = reader.read_exact(buf).await?; + // SAFETY + ids.push(FileId::from_bytes(buf.as_slice().try_into().unwrap())); } Some(ids) } diff --git a/src/serdes/arc.rs b/src/serdes/arc.rs index 55b06ee5..7cd94c47 100644 --- a/src/serdes/arc.rs +++ b/src/serdes/arc.rs @@ -1,6 +1,6 @@ use std::sync::Arc; -use tokio::io::{AsyncRead, AsyncWrite}; +use fusio::{Read, Write}; use super::{Decode, Encode}; @@ -12,7 +12,7 @@ where async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { Ok(Arc::from(T::decode(reader).await?)) } @@ -26,7 +26,7 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.as_ref().encode(writer).await } @@ -35,3 +35,31 @@ where Encode::size(self.as_ref()) } } + +#[cfg(test)] +mod tests { + use std::{io::Cursor, sync::Arc}; + + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source_0 = Arc::new(1u64); + let source_1 = Arc::new("Hello! Tonbo".to_string()); + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source_0.encode(&mut cursor).await.unwrap(); + source_1.encode(&mut cursor).await.unwrap(); + + cursor.seek(0).await.unwrap(); + let decoded_0 = Arc::::decode(&mut cursor).await.unwrap(); + let decoded_1 = Arc::::decode(&mut cursor).await.unwrap(); + + assert_eq!(source_0, decoded_0); + assert_eq!(source_1, decoded_1); + } +} diff --git a/src/serdes/boolean.rs b/src/serdes/boolean.rs index c3323074..5be3aa86 100644 --- a/src/serdes/boolean.rs +++ b/src/serdes/boolean.rs @@ -1,16 +1,14 @@ -use std::{io, mem::size_of}; +use std::mem::size_of; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use crate::serdes::{Decode, Encode}; impl Encode for bool { - type Error = io::Error; + type Error = fusio::Error; - async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { - writer - .write_all(&if *self { 1u8 } else { 0u8 }.to_le_bytes()) - .await + async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { + if *self { 1u8 } else { 0u8 }.encode(writer).await } fn size(&self) -> usize { @@ -19,15 +17,41 @@ impl Encode for bool { } impl Decode for bool { - type Error = io::Error; + type Error = fusio::Error; - async fn decode(reader: &mut R) -> Result { - let buf = { - let mut buf = [0; size_of::()]; - reader.read_exact(&mut buf).await?; - buf - }; + async fn decode(reader: &mut R) -> Result { + Ok(u8::decode(reader).await? == 1u8) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source_0 = true; + let source_1 = false; + let source_2 = true; + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source_0.encode(&mut cursor).await.unwrap(); + source_1.encode(&mut cursor).await.unwrap(); + source_2.encode(&mut cursor).await.unwrap(); + + cursor.seek(0).await.unwrap(); + let decoded_0 = bool::decode(&mut cursor).await.unwrap(); + let decoded_1 = bool::decode(&mut cursor).await.unwrap(); + let decoded_2 = bool::decode(&mut cursor).await.unwrap(); - Ok(u8::from_le_bytes(buf) == 1u8) + assert_eq!(source_0, decoded_0); + assert_eq!(source_1, decoded_1); + assert_eq!(source_2, decoded_2); } } diff --git a/src/serdes/bytes.rs b/src/serdes/bytes.rs index 27bb4eea..7b0085da 100644 --- a/src/serdes/bytes.rs +++ b/src/serdes/bytes.rs @@ -1,15 +1,17 @@ -use std::io; - use bytes::Bytes; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{IoBuf, Read, Write}; use crate::serdes::{Decode, Encode}; impl Encode for &[u8] { - type Error = io::Error; + type Error = fusio::Error; + + async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { + (self.len() as u32).encode(writer).await?; + let (result, _) = writer.write_all(*self).await; + result?; - async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { - writer.write_all(self).await + Ok(()) } fn size(&self) -> usize { @@ -18,10 +20,14 @@ impl Encode for &[u8] { } impl Encode for Bytes { - type Error = io::Error; + type Error = fusio::Error; - async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { - writer.write_all(self).await + async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { + (self.len() as u32).encode(writer).await?; + let (result, _) = writer.write_all(self.as_slice()).await; + result?; + + Ok(()) } fn size(&self) -> usize { @@ -30,12 +36,37 @@ impl Encode for Bytes { } impl Decode for Bytes { - type Error = io::Error; + type Error = fusio::Error; + + async fn decode(reader: &mut R) -> Result { + let len = u32::decode(reader).await?; + let buf = reader.read_exact(vec![0u8; len as usize]).await?; + + Ok(buf.as_bytes()) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use bytes::Bytes; + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source = Bytes::from_static(b"hello! Tonbo"); + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source.encode(&mut cursor).await.unwrap(); - async fn decode(reader: &mut R) -> Result { - let mut buf = Vec::new(); - reader.read_exact(&mut buf).await?; + cursor.seek(0).await.unwrap(); + let decoded = Bytes::decode(&mut cursor).await.unwrap(); - Ok(Bytes::from(buf)) + assert_eq!(source, decoded); } } diff --git a/src/serdes/mod.rs b/src/serdes/mod.rs index 69b9dd32..a88790fc 100644 --- a/src/serdes/mod.rs +++ b/src/serdes/mod.rs @@ -6,16 +6,16 @@ mod num; pub(crate) mod option; mod string; -use std::{future::Future, io}; +use std::future::Future; -use tokio::io::{AsyncRead, AsyncWrite}; +use fusio::{Read, Write}; pub trait Encode { - type Error: From + std::error::Error + Send + Sync + 'static; + type Error: From + std::error::Error + Send + Sync + 'static; fn encode(&self, writer: &mut W) -> impl Future> + Send where - W: AsyncWrite + Unpin + Send; + W: Write + Unpin + Send; fn size(&self) -> usize; } @@ -25,7 +25,7 @@ impl Encode for &T { async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { Encode::encode(*self, writer).await } @@ -36,16 +36,18 @@ impl Encode for &T { } pub trait Decode: Sized { - type Error: From + std::error::Error + Send + Sync + 'static; + type Error: From + std::error::Error + Send + Sync + 'static; fn decode(reader: &mut R) -> impl Future> where - R: AsyncRead + Unpin; + R: Read + Unpin; } #[cfg(test)] mod tests { - use tokio::io::AsyncWriteExt; + use std::io; + + use fusio::{Read, Seek}; use super::*; @@ -55,13 +57,14 @@ mod tests { struct TestStruct(u32); impl Encode for TestStruct { - type Error = io::Error; + type Error = fusio::Error; async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { - writer.write_u32(self.0).await?; + self.0.encode(writer).await?; + Ok(()) } @@ -71,25 +74,23 @@ mod tests { } impl Decode for TestStruct { - type Error = io::Error; + type Error = fusio::Error; async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { - let value = tokio::io::AsyncReadExt::read_u32(reader).await?; - Ok(TestStruct(value)) + Ok(TestStruct(u32::decode(reader).await?)) } } // Test encoding and decoding let original = TestStruct(42); - let mut buffer = Vec::new(); - - original.encode(&mut buffer).await.unwrap(); - assert_eq!(buffer.len(), original.size()); + let mut buf = Vec::new(); + let mut cursor = io::Cursor::new(&mut buf); + original.encode(&mut cursor).await.unwrap(); - let mut cursor = std::io::Cursor::new(buffer); + cursor.seek(0).await.unwrap(); let decoded = TestStruct::decode(&mut cursor).await.unwrap(); assert_eq!(original.0, decoded.0); diff --git a/src/serdes/num.rs b/src/serdes/num.rs index 274b4882..c240c3a6 100644 --- a/src/serdes/num.rs +++ b/src/serdes/num.rs @@ -1,6 +1,6 @@ -use std::{io, mem::size_of}; +use std::mem::size_of; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use super::{Decode, Encode}; @@ -8,13 +8,13 @@ use super::{Decode, Encode}; macro_rules! implement_encode_decode { ($struct_name:ident) => { impl Encode for $struct_name { - type Error = io::Error; + type Error = fusio::Error; - async fn encode( - &self, - writer: &mut W, - ) -> Result<(), Self::Error> { - writer.write_all(&self.to_le_bytes()).await + async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> { + let (result, _) = writer.write_all(&self.to_le_bytes()[..]).await; + result?; + + Ok(()) } fn size(&self) -> usize { @@ -23,16 +23,13 @@ macro_rules! implement_encode_decode { } impl Decode for $struct_name { - type Error = io::Error; + type Error = fusio::Error; - async fn decode(reader: &mut R) -> Result { - let buf = { - let mut buf = [0; size_of::()]; - reader.read_exact(&mut buf).await?; - buf - }; + async fn decode(reader: &mut R) -> Result { + let mut bytes = [0u8; size_of::()]; + let _ = reader.read_exact(&mut bytes[..]).await?; - Ok(Self::from_le_bytes(buf)) + Ok(Self::from_le_bytes(bytes)) } } }; @@ -46,3 +43,55 @@ implement_encode_decode!(u8); implement_encode_decode!(u16); implement_encode_decode!(u32); implement_encode_decode!(u64); + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source_0 = 8u8; + let source_1 = 16u16; + let source_2 = 32u32; + let source_3 = 64u64; + let source_4 = 8i8; + let source_5 = 16i16; + let source_6 = 32i32; + let source_7 = 64i64; + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source_0.encode(&mut cursor).await.unwrap(); + source_1.encode(&mut cursor).await.unwrap(); + source_2.encode(&mut cursor).await.unwrap(); + source_3.encode(&mut cursor).await.unwrap(); + source_4.encode(&mut cursor).await.unwrap(); + source_5.encode(&mut cursor).await.unwrap(); + source_6.encode(&mut cursor).await.unwrap(); + source_7.encode(&mut cursor).await.unwrap(); + + cursor.seek(0).await.unwrap(); + let decoded_0 = u8::decode(&mut cursor).await.unwrap(); + let decoded_1 = u16::decode(&mut cursor).await.unwrap(); + let decoded_2 = u32::decode(&mut cursor).await.unwrap(); + let decoded_3 = u64::decode(&mut cursor).await.unwrap(); + let decoded_4 = i8::decode(&mut cursor).await.unwrap(); + let decoded_5 = i16::decode(&mut cursor).await.unwrap(); + let decoded_6 = i32::decode(&mut cursor).await.unwrap(); + let decoded_7 = i64::decode(&mut cursor).await.unwrap(); + + assert_eq!(source_0, decoded_0); + assert_eq!(source_1, decoded_1); + assert_eq!(source_2, decoded_2); + assert_eq!(source_3, decoded_3); + assert_eq!(source_4, decoded_4); + assert_eq!(source_5, decoded_5); + assert_eq!(source_6, decoded_6); + assert_eq!(source_7, decoded_7); + } +} diff --git a/src/serdes/option.rs b/src/serdes/option.rs index 74d8f114..f540d99c 100644 --- a/src/serdes/option.rs +++ b/src/serdes/option.rs @@ -1,7 +1,7 @@ use std::io; +use fusio::{Read, Write}; use thiserror::Error; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use super::{Decode, Encode}; @@ -13,6 +13,8 @@ where { #[error("io error: {0}")] Io(#[from] io::Error), + #[error("fusio error: {0}")] + Fusio(#[from] fusio::Error), #[error("inner error: {0}")] Inner(#[source] E), } @@ -25,6 +27,8 @@ where { #[error("io error: {0}")] Io(#[from] io::Error), + #[error("fusio error: {0}")] + Fusio(#[from] fusio::Error), #[error("inner error: {0}")] Inner(#[source] E), } @@ -37,12 +41,12 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { match self { - None => writer.write_all(&[0]).await?, + None => 0u8.encode(writer).await?, Some(v) => { - writer.write_all(&[1]).await?; + 1u8.encode(writer).await?; v.encode(writer).await.map_err(EncodeError::Inner)?; } } @@ -63,13 +67,43 @@ where { type Error = DecodeError; - async fn decode(reader: &mut R) -> Result { - let mut o = [0]; - reader.read_exact(&mut o).await?; - match o[0] { + async fn decode(reader: &mut R) -> Result { + match u8::decode(reader).await? { 0 => Ok(None), 1 => Ok(Some(V::decode(reader).await.map_err(DecodeError::Inner)?)), _ => panic!("invalid option tag"), } } } + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source_0 = Some(1u64); + let source_1 = None; + let source_2 = Some("Hello! Tonbo".to_string()); + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source_0.encode(&mut cursor).await.unwrap(); + source_1.encode(&mut cursor).await.unwrap(); + source_2.encode(&mut cursor).await.unwrap(); + + cursor.seek(0).await.unwrap(); + let decoded_0 = Option::::decode(&mut cursor).await.unwrap(); + let decoded_1 = Option::::decode(&mut cursor).await.unwrap(); + let decoded_2 = Option::::decode(&mut cursor).await.unwrap(); + + assert_eq!(source_0, decoded_0); + assert_eq!(source_1, decoded_1); + assert_eq!(source_2, decoded_2); + } +} diff --git a/src/serdes/string.rs b/src/serdes/string.rs index bc5ff0b7..10e3e9c4 100644 --- a/src/serdes/string.rs +++ b/src/serdes/string.rs @@ -1,18 +1,21 @@ -use std::{io, mem::size_of}; +use std::mem::size_of; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use super::{Decode, Encode}; impl<'r> Encode for &'r str { - type Error = io::Error; + type Error = fusio::Error; async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin, + W: Write + Unpin, { - writer.write_all(&(self.len() as u16).to_le_bytes()).await?; - writer.write_all(self.as_bytes()).await + (self.len() as u16).encode(writer).await?; + let (result, _) = writer.write_all(self.as_bytes()).await; + result?; + + Ok(()) } fn size(&self) -> usize { @@ -21,11 +24,11 @@ impl<'r> Encode for &'r str { } impl Encode for String { - type Error = io::Error; + type Error = fusio::Error; async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.as_str().encode(writer).await } @@ -36,21 +39,40 @@ impl Encode for String { } impl Decode for String { - type Error = io::Error; - - async fn decode(reader: &mut R) -> Result { - let len = { - let mut len = [0; size_of::()]; - reader.read_exact(&mut len).await?; - u16::from_le_bytes(len) as usize - }; - - let vec = { - let mut vec = vec![0; len]; - reader.read_exact(&mut vec).await?; - vec - }; - - Ok(unsafe { String::from_utf8_unchecked(vec) }) + type Error = fusio::Error; + + async fn decode(reader: &mut R) -> Result { + let len = u16::decode(reader).await?; + let buf = reader.read_exact(vec![0u8; len as usize]).await?; + + Ok(unsafe { String::from_utf8_unchecked(buf.as_slice().to_vec()) }) + } +} + +#[cfg(test)] +mod tests { + use std::io::Cursor; + + use fusio::Seek; + + use crate::serdes::{Decode, Encode}; + + #[tokio::test] + async fn test_encode_decode() { + let source_0 = "Hello! World"; + let source_1 = "Hello! Tonbo".to_string(); + + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + source_0.encode(&mut cursor).await.unwrap(); + source_1.encode(&mut cursor).await.unwrap(); + + cursor.seek(0).await.unwrap(); + let decoded_0 = String::decode(&mut cursor).await.unwrap(); + let decoded_1 = String::decode(&mut cursor).await.unwrap(); + + assert_eq!(source_0, decoded_0); + assert_eq!(source_1, decoded_1); } } diff --git a/src/stream/level.rs b/src/stream/level.rs index c6867d62..655a78e9 100644 --- a/src/stream/level.rs +++ b/src/stream/level.rs @@ -1,17 +1,21 @@ use std::{ collections::{Bound, VecDeque}, future::Future, - io, pin::Pin, sync::Arc, task::{Context, Poll}, }; +use fusio::{ + dynamic::{DynFile, MaybeSendFuture}, + path::Path, + DynFs, Error, +}; use futures_core::Stream; use parquet::{arrow::ProjectionMask, errors::ParquetError}; use crate::{ - fs::{FileId, FileProvider}, + fs::{default_open_options, FileId}, ondisk::{scan::SsTableScan, sstable::SsTable}, record::Record, scope::Scope, @@ -21,29 +25,22 @@ use crate::{ DbOption, }; -enum FutureStatus<'level, R, FP> +enum FutureStatus<'level, R> where R: Record, - FP: FileProvider, { Init(FileId), - Ready(SsTableScan<'level, R, FP>), - OpenFile(Pin> + Send + 'level>>), + Ready(SsTableScan<'level, R>), + OpenFile(Pin, Error>> + 'level>>), + OpenSst(Pin, Error>> + Send + 'level>>), LoadStream( - Pin< - Box< - dyn Future, ParquetError>> - + Send - + 'level, - >, - >, + Pin, ParquetError>> + Send + 'level>>, ), } -pub(crate) struct LevelStream<'level, R, FP> +pub(crate) struct LevelStream<'level, R> where R: Record, - FP: FileProvider, { lower: Bound<&'level R::Key>, upper: Bound<&'level R::Key>, @@ -52,18 +49,19 @@ where gens: VecDeque, limit: Option, projection_mask: ProjectionMask, - status: FutureStatus<'level, R, FP>, + status: FutureStatus<'level, R>, + fs: Arc, + path: Option, } -impl<'level, R, FP> LevelStream<'level, R, FP> +impl<'level, R> LevelStream<'level, R> where R: Record, - FP: FileProvider, { // Kould: only used by Compaction now, and the start and end of the sstables range are known #[allow(clippy::too_many_arguments)] pub(crate) fn new( - version: &Version, + version: &Version, level: usize, start: usize, end: usize, @@ -71,6 +69,7 @@ where ts: Timestamp, limit: Option, projection_mask: ProjectionMask, + fs: Arc, ) -> Option { let (lower, upper) = range; let mut gens: VecDeque = version.level_slice[level][start..end + 1] @@ -89,14 +88,15 @@ where limit, projection_mask, status, + fs, + path: None, }) } } -impl<'level, R, FP> Stream for LevelStream<'level, R, FP> +impl<'level, R> Stream for LevelStream<'level, R> where R: Record, - FP: FileProvider + 'level, { type Item = Result, ParquetError>; @@ -105,17 +105,49 @@ where return match &mut self.status { FutureStatus::Init(gen) => { let gen = *gen; - self.status = - FutureStatus::OpenFile(Box::pin(FP::open(self.option.table_path(&gen)))); + self.path = Some(self.option.table_path(&gen)); + + let reader = self + .fs + .open_options(self.path.as_ref().unwrap(), default_open_options()); + #[allow(clippy::missing_transmute_annotations)] + let reader = unsafe { + std::mem::transmute::< + _, + Pin< + Box< + dyn MaybeSendFuture, Error>> + + 'static, + >, + >, + >(reader) + }; + self.status = FutureStatus::OpenFile(reader); continue; } FutureStatus::Ready(stream) => match Pin::new(stream).poll_next(cx) { Poll::Ready(None) => match self.gens.pop_front() { None => Poll::Ready(None), Some(gen) => { - self.status = FutureStatus::OpenFile(Box::pin(FP::open( - self.option.table_path(&gen), - ))); + self.path = Some(self.option.table_path(&gen)); + + let reader = self + .fs + .open_options(self.path.as_ref().unwrap(), default_open_options()); + #[allow(clippy::missing_transmute_annotations)] + let reader = unsafe { + std::mem::transmute::< + _, + Pin< + Box< + dyn MaybeSendFuture< + Output = Result, Error>, + > + 'static, + >, + >, + >(reader) + }; + self.status = FutureStatus::OpenFile(reader); continue; } }, @@ -129,7 +161,17 @@ where }, FutureStatus::OpenFile(file_future) => match Pin::new(file_future).poll(cx) { Poll::Ready(Ok(file)) => { - self.status = FutureStatus::LoadStream(Box::pin(SsTable::open(file).scan( + self.status = FutureStatus::OpenSst(Box::pin(SsTable::open(file))); + continue; + } + Poll::Ready(Err(err)) => { + Poll::Ready(Some(Err(ParquetError::External(Box::new(err))))) + } + Poll::Pending => Poll::Pending, + }, + FutureStatus::OpenSst(sst_future) => match Pin::new(sst_future).poll(cx) { + Poll::Ready(Ok(sst)) => { + self.status = FutureStatus::LoadStream(Box::pin(sst.scan( (self.lower, self.upper), self.ts, self.limit, @@ -137,7 +179,9 @@ where ))); continue; } - Poll::Ready(Err(err)) => Poll::Ready(Some(Err(ParquetError::from(err)))), + Poll::Ready(Err(err)) => { + Poll::Ready(Some(Err(ParquetError::External(Box::new(err))))) + } Poll::Pending => Poll::Pending, }, FutureStatus::LoadStream(stream_future) => match Pin::new(stream_future).poll(cx) { @@ -157,21 +201,34 @@ where mod tests { use std::{collections::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path}; use futures_util::StreamExt; use parquet::arrow::{arrow_to_parquet_schema, ProjectionMask}; use tempfile::TempDir; use crate::{ - compaction::tests::build_version, record::Record, stream::level::LevelStream, tests::Test, - DbOption, + compaction::tests::build_version, fs::manager::StoreManager, record::Record, + stream::level::LevelStream, tests::Test, DbOption, }; #[tokio::test] async fn projection_scan() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); + + manager + .create_dir_all(&option.version_log_dir_path()) + .await + .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); - let (_, version) = build_version(&option).await; + let (_, version) = build_version(&option, &manager).await; { let mut level_stream_1 = LevelStream::new( @@ -186,6 +243,7 @@ mod tests { &arrow_to_parquet_schema(Test::arrow_schema()).unwrap(), [0, 1, 2, 3], ), + manager.base_fs().clone(), ) .unwrap(); @@ -221,6 +279,7 @@ mod tests { &arrow_to_parquet_schema(Test::arrow_schema()).unwrap(), [0, 1, 2, 4], ), + manager.base_fs().clone(), ) .unwrap(); @@ -256,6 +315,7 @@ mod tests { &arrow_to_parquet_schema(Test::arrow_schema()).unwrap(), [0, 1, 2], ), + manager.base_fs().clone(), ) .unwrap(); diff --git a/src/stream/mem_projection.rs b/src/stream/mem_projection.rs index 87024240..02f53719 100644 --- a/src/stream/mem_projection.rs +++ b/src/stream/mem_projection.rs @@ -9,31 +9,25 @@ use parquet::{arrow::ProjectionMask, errors::ParquetError}; use pin_project_lite::pin_project; use crate::{ - fs::FileProvider, record::Record, stream::{Entry, ScanStream}, }; pin_project! { - pub struct MemProjectionStream<'projection, R, FP> + pub struct MemProjectionStream<'projection, R> where R: Record, - FP: FileProvider, { - stream: Box>, + stream: Box>, projection_mask: Arc, } } -impl<'projection, R, FP> MemProjectionStream<'projection, R, FP> +impl<'projection, R> MemProjectionStream<'projection, R> where R: Record, - FP: FileProvider + 'projection, { - pub(crate) fn new( - stream: ScanStream<'projection, R, FP>, - projection_mask: ProjectionMask, - ) -> Self { + pub(crate) fn new(stream: ScanStream<'projection, R>, projection_mask: ProjectionMask) -> Self { Self { stream: Box::new(stream), projection_mask: Arc::new(projection_mask), @@ -41,10 +35,9 @@ where } } -impl<'projection, R, FP> Stream for MemProjectionStream<'projection, R, FP> +impl<'projection, R> Stream for MemProjectionStream<'projection, R> where R: Record, - FP: FileProvider + 'projection, { type Item = Result, ParquetError>; @@ -65,28 +58,26 @@ where mod tests { use std::{ops::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path, DynFs}; use futures_util::StreamExt; use parquet::arrow::{arrow_to_parquet_schema, ProjectionMask}; use crate::{ - executor::tokio::TokioExecutor, fs::FileProvider, inmem::mutable::Mutable, record::Record, - stream::mem_projection::MemProjectionStream, tests::Test, trigger::TriggerFactory, - wal::log::LogType, DbOption, + inmem::mutable::Mutable, record::Record, stream::mem_projection::MemProjectionStream, + tests::Test, trigger::TriggerFactory, wal::log::LogType, DbOption, }; #[tokio::test] async fn merge_mutable() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let mutable = Mutable::::new(&option, trigger) - .await - .unwrap(); + let mutable = Mutable::::new(&option, trigger, &fs).await.unwrap(); mutable .insert( @@ -130,7 +121,7 @@ mod tests { vec![0, 1, 2, 4], ); - let mut stream = MemProjectionStream::::new( + let mut stream = MemProjectionStream::::new( mutable .scan((Bound::Unbounded, Bound::Unbounded), 6.into()) .into(), diff --git a/src/stream/merge.rs b/src/stream/merge.rs index acf89ce8..4fbb9532 100644 --- a/src/stream/merge.rs +++ b/src/stream/merge.rs @@ -10,15 +10,14 @@ use futures_util::stream::StreamExt; use pin_project_lite::pin_project; use super::{Entry, ScanStream}; -use crate::{fs::FileProvider, record::Record, timestamp::Timestamp}; +use crate::{record::Record, timestamp::Timestamp}; pin_project! { - pub struct MergeStream<'merge, R, FP> + pub struct MergeStream<'merge, R> where R: Record, - FP: FileProvider, { - streams: Vec>, + streams: Vec>, peeked: BinaryHeap>, buf: Option>, ts: Timestamp, @@ -26,13 +25,12 @@ pin_project! { } } -impl<'merge, R, FP> MergeStream<'merge, R, FP> +impl<'merge, R> MergeStream<'merge, R> where R: Record, - FP: FileProvider + 'merge, { pub(crate) async fn from_vec( - mut streams: Vec>, + mut streams: Vec>, ts: Timestamp, ) -> Result { let mut peeked = BinaryHeap::with_capacity(streams.len()); @@ -64,10 +62,9 @@ where } } -impl<'merge, R, FP> Stream for MergeStream<'merge, R, FP> +impl<'merge, R> Stream for MergeStream<'merge, R> where R: Record, - FP: FileProvider + 'merge, { type Item = Result, parquet::errors::ParquetError>; @@ -161,27 +158,26 @@ where mod tests { use std::{ops::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path, DynFs}; use futures_util::StreamExt; use super::MergeStream; use crate::{ - executor::tokio::TokioExecutor, fs::FileProvider, inmem::mutable::Mutable, stream::Entry, - trigger::TriggerFactory, wal::log::LogType, DbOption, + inmem::mutable::Mutable, stream::Entry, trigger::TriggerFactory, wal::log::LogType, + DbOption, }; #[tokio::test] async fn merge_mutable() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m1 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m1 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m1.remove(LogType::Full, "b".into(), 3.into()) .await @@ -195,9 +191,7 @@ mod tests { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m2 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m2 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m2.insert(LogType::Full, "a".into(), 1.into()) .await .unwrap(); @@ -210,9 +204,7 @@ mod tests { let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m3 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m3 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m3.insert(LogType::Full, "e".into(), 4.into()) .await .unwrap(); @@ -220,7 +212,7 @@ mod tests { let lower = "a".to_string(); let upper = "e".to_string(); let bound = (Bound::Included(&lower), Bound::Included(&upper)); - let mut merge = MergeStream::::from_vec( + let mut merge = MergeStream::::from_vec( vec![ m1.scan(bound, 6.into()).into(), m2.scan(bound, 6.into()).into(), @@ -272,16 +264,14 @@ mod tests { #[tokio::test] async fn merge_mutable_remove_duplicates() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m1 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m1 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m1.insert(LogType::Full, "1".into(), 0_u32.into()) .await .unwrap(); @@ -301,12 +291,10 @@ mod tests { let lower = "1".to_string(); let upper = "4".to_string(); let bound = (Bound::Included(&lower), Bound::Included(&upper)); - let mut merge = MergeStream::::from_vec( - vec![m1.scan(bound, 0.into()).into()], - 0.into(), - ) - .await - .unwrap(); + let mut merge = + MergeStream::::from_vec(vec![m1.scan(bound, 0.into()).into()], 0.into()) + .await + .unwrap(); if let Some(Ok(Entry::Mutable(entry))) = merge.next().await { assert_eq!(entry.key().value, "1"); @@ -331,12 +319,10 @@ mod tests { let lower = "1".to_string(); let upper = "4".to_string(); let bound = (Bound::Included(&lower), Bound::Included(&upper)); - let mut merge = MergeStream::::from_vec( - vec![m1.scan(bound, 1.into()).into()], - 1.into(), - ) - .await - .unwrap(); + let mut merge = + MergeStream::::from_vec(vec![m1.scan(bound, 1.into()).into()], 1.into()) + .await + .unwrap(); if let Some(Ok(Entry::Mutable(entry))) = merge.next().await { assert_eq!(entry.key().value, "1"); @@ -364,16 +350,14 @@ mod tests { #[tokio::test] async fn merge_mutable_limit() { let temp_dir = tempfile::tempdir().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(&option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m1 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m1 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m1.insert(LogType::Full, "1".into(), 0_u32.into()) .await .unwrap(); @@ -387,7 +371,7 @@ mod tests { let lower = "1".to_string(); let upper = "3".to_string(); { - let mut merge = MergeStream::::from_vec( + let mut merge = MergeStream::::from_vec( vec![m1 .scan((Bound::Included(&lower), Bound::Included(&upper)), 0.into()) .into()], @@ -407,7 +391,7 @@ mod tests { assert!(merge.next().await.is_none()); } { - let mut merge = MergeStream::::from_vec( + let mut merge = MergeStream::::from_vec( vec![m1 .scan((Bound::Included(&lower), Bound::Included(&upper)), 0.into()) .into()], diff --git a/src/stream/mod.rs b/src/stream/mod.rs index cac1c40f..fa0b5afe 100644 --- a/src/stream/mod.rs +++ b/src/stream/mod.rs @@ -19,7 +19,6 @@ use pin_project_lite::pin_project; use record_batch::RecordBatchEntry; use crate::{ - fs::FileProvider, inmem::{immutable::ImmutableScan, mutable::MutableScan}, ondisk::scan::SsTableScan, record::{Key, Record, RecordRef}, @@ -101,10 +100,9 @@ where pin_project! { #[project = ScanStreamProject] - pub enum ScanStream<'scan, R, FP> + pub enum ScanStream<'scan, R> where R: Record, - FP: FileProvider, { Transaction { #[pin] @@ -120,23 +118,22 @@ pin_project! { }, SsTable { #[pin] - inner: SsTableScan<'scan, R, FP>, + inner: SsTableScan<'scan, R>, }, Level { #[pin] - inner: LevelStream<'scan, R, FP>, + inner: LevelStream<'scan, R>, }, MemProjection { #[pin] - inner: MemProjectionStream<'scan, R, FP>, + inner: MemProjectionStream<'scan, R>, } } } -impl<'scan, R, FP> From> for ScanStream<'scan, R, FP> +impl<'scan, R> From> for ScanStream<'scan, R> where R: Record, - FP: FileProvider, { fn from(inner: TransactionScan<'scan, R>) -> Self { ScanStream::Transaction { @@ -145,10 +142,9 @@ where } } -impl<'scan, R, FP> From> for ScanStream<'scan, R, FP> +impl<'scan, R> From> for ScanStream<'scan, R> where R: Record, - FP: FileProvider, { fn from(inner: MutableScan<'scan, R>) -> Self { ScanStream::Mutable { @@ -157,10 +153,9 @@ where } } -impl<'scan, R, FP> From> for ScanStream<'scan, R, FP> +impl<'scan, R> From> for ScanStream<'scan, R> where R: Record, - FP: FileProvider, { fn from(inner: ImmutableScan<'scan, R>) -> Self { ScanStream::Immutable { @@ -169,30 +164,27 @@ where } } -impl<'scan, R, FP> From> for ScanStream<'scan, R, FP> +impl<'scan, R> From> for ScanStream<'scan, R> where R: Record, - FP: FileProvider, { - fn from(inner: SsTableScan<'scan, R, FP>) -> Self { + fn from(inner: SsTableScan<'scan, R>) -> Self { ScanStream::SsTable { inner } } } -impl<'scan, R, FP> From> for ScanStream<'scan, R, FP> +impl<'scan, R> From> for ScanStream<'scan, R> where R: Record, - FP: FileProvider, { - fn from(inner: MemProjectionStream<'scan, R, FP>) -> Self { + fn from(inner: MemProjectionStream<'scan, R>) -> Self { ScanStream::MemProjection { inner } } } -impl fmt::Debug for ScanStream<'_, R, FP> +impl fmt::Debug for ScanStream<'_, R> where R: Record, - FP: FileProvider, { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { match self { @@ -206,10 +198,9 @@ where } } -impl<'scan, R, FP> Stream for ScanStream<'scan, R, FP> +impl<'scan, R> Stream for ScanStream<'scan, R> where R: Record, - FP: FileProvider + 'scan, { type Item = Result, parquet::errors::ParquetError>; diff --git a/src/stream/package.rs b/src/stream/package.rs index 314c4ebb..80ea10e5 100644 --- a/src/stream/package.rs +++ b/src/stream/package.rs @@ -7,34 +7,31 @@ use futures_core::Stream; use pin_project_lite::pin_project; use crate::{ - fs::FileProvider, inmem::immutable::{ArrowArrays, Builder}, record::Record, stream::merge::MergeStream, }; pin_project! { - pub struct PackageStream<'package, R, FP> + pub struct PackageStream<'package, R> where R: Record, - FP: FileProvider, { row_count: usize, batch_size: usize, - inner: MergeStream<'package, R, FP>, + inner: MergeStream<'package, R>, builder: ::Builder, projection_indices: Option>, } } -impl<'package, R, FP> PackageStream<'package, R, FP> +impl<'package, R> PackageStream<'package, R> where R: Record, - FP: FileProvider + 'package, { pub(crate) fn new( batch_size: usize, - merge: MergeStream<'package, R, FP>, + merge: MergeStream<'package, R>, projection_indices: Option>, ) -> Self { Self { @@ -47,10 +44,9 @@ where } } -impl<'package, R, FP> Stream for PackageStream<'package, R, FP> +impl<'package, R> Stream for PackageStream<'package, R> where R: Record, - FP: FileProvider + 'package, { type Item = Result; @@ -85,12 +81,11 @@ mod tests { use std::{collections::Bound, sync::Arc}; use arrow::array::{BooleanArray, RecordBatch, StringArray, UInt32Array}; + use fusio::{local::TokioFs, path::Path, DynFs}; use futures_util::StreamExt; use tempfile::TempDir; use crate::{ - executor::tokio::TokioExecutor, - fs::FileProvider, inmem::{ immutable::{tests::TestImmutableArrays, ArrowArrays}, mutable::Mutable, @@ -106,16 +101,14 @@ mod tests { #[tokio::test] async fn iter() { let temp_dir = TempDir::new().unwrap(); - let option = DbOption::from(temp_dir.path()); - TokioExecutor::create_dir_all(option.wal_dir_path()) - .await - .unwrap(); + let fs = Arc::new(TokioFs) as Arc; + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); + + fs.create_dir_all(&option.wal_dir_path()).await.unwrap(); let trigger = Arc::new(TriggerFactory::create(option.trigger_type)); - let m1 = Mutable::::new(&option, trigger) - .await - .unwrap(); + let m1 = Mutable::::new(&option, trigger, &fs).await.unwrap(); m1.insert( LogType::Full, Test { @@ -183,7 +176,7 @@ mod tests { .await .unwrap(); - let merge = MergeStream::::from_vec( + let merge = MergeStream::::from_vec( vec![m1 .scan((Bound::Unbounded, Bound::Unbounded), 6.into()) .into()], diff --git a/src/timestamp/mod.rs b/src/timestamp/mod.rs index 1a141099..26d265e9 100644 --- a/src/timestamp/mod.rs +++ b/src/timestamp/mod.rs @@ -1,12 +1,10 @@ pub mod timestamped; -use std::io; - use arrow::{ array::{PrimitiveArray, Scalar}, datatypes::UInt32Type, }; -use tokio::io::{AsyncRead, AsyncWrite}; +use fusio::{Read, Write}; pub(crate) use self::timestamped::*; use crate::serdes::{Decode, Encode}; @@ -36,10 +34,10 @@ impl Timestamp { } impl Encode for Timestamp { - type Error = io::Error; + type Error = fusio::Error; async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.0.encode(writer).await } @@ -48,10 +46,10 @@ impl Encode for Timestamp { } } impl Decode for Timestamp { - type Error = io::Error; + type Error = fusio::Error; async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { u32::decode(reader).await.map(Timestamp) } diff --git a/src/timestamp/timestamped.rs b/src/timestamp/timestamped.rs index bed001f9..9ef18c08 100644 --- a/src/timestamp/timestamped.rs +++ b/src/timestamp/timestamped.rs @@ -1,6 +1,6 @@ use std::{borrow::Borrow, cmp::Ordering, marker::PhantomData, mem::size_of, ptr}; -use tokio::io::{AsyncRead, AsyncWrite}; +use fusio::{Read, Write}; use crate::{ serdes::{Decode, Encode}, @@ -150,7 +150,7 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { self.ts.encode(writer).await?; self.value.encode(writer).await @@ -169,7 +169,7 @@ where async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { let ts = Timestamp::decode(reader).await?; let value = V::decode(reader).await?; diff --git a/src/transaction.rs b/src/transaction.rs index 1d7cb28a..d0c8f3c0 100644 --- a/src/transaction.rs +++ b/src/transaction.rs @@ -5,6 +5,7 @@ use std::{ }, io, mem::transmute, + sync::Arc, }; use async_lock::RwLockReadGuard; @@ -15,7 +16,7 @@ use thiserror::Error; use crate::{ compaction::CompactTask, - fs::FileProvider, + fs::manager::StoreManager, record::{Key, KeyRef}, stream, stream::mem_projection::MemProjectionStream, @@ -44,27 +45,27 @@ where } /// optimistic ACID transaction, open with /// [`DB::transaction`](crate::DB::transaction) method -pub struct Transaction<'txn, R, FP> +pub struct Transaction<'txn, R> where R: Record, - FP: FileProvider, { ts: Timestamp, local: BTreeMap>, - share: RwLockReadGuard<'txn, Schema>, - version: VersionRef, + share: RwLockReadGuard<'txn, Schema>, + version: VersionRef, lock_map: LockMap, + manager: Arc, } -impl<'txn, R, FP> Transaction<'txn, R, FP> +impl<'txn, R> Transaction<'txn, R> where R: Record + Send, - FP: FileProvider, { pub(crate) fn new( - version: VersionRef, - share: RwLockReadGuard<'txn, Schema>, + version: VersionRef, + share: RwLockReadGuard<'txn, Schema>, lock_map: LockMap, + manager: Arc, ) -> Self { Self { ts: version.load_ts(), @@ -72,6 +73,7 @@ where share, version, lock_map, + manager, } } @@ -86,7 +88,7 @@ where Some(v) => Some(TransactionEntry::Local(v.as_record_ref())), None => self .share - .get(&self.version, key, self.ts, projection) + .get(&self.version, &self.manager, key, self.ts, projection) .await? .and_then(|entry| { if entry.value().is_none() { @@ -102,9 +104,10 @@ where pub fn scan<'scan>( &'scan self, range: (Bound<&'scan R::Key>, Bound<&'scan R::Key>), - ) -> Scan<'scan, R, FP> { + ) -> Scan<'scan, R> { Scan::new( &self.share, + &self.manager, range, self.ts, &self.version, @@ -191,7 +194,7 @@ where } async fn append( - schema: &Schema, + schema: &Schema, log_ty: LogType, key: ::Key, record: Option, @@ -250,12 +253,14 @@ where mod tests { use std::{collections::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path}; use futures_util::StreamExt; use tempfile::TempDir; use crate::{ compaction::tests::build_version, executor::tokio::TokioExecutor, + fs::manager::StoreManager, tests::{build_db, build_schema, Test}, transaction::CommitError, version::TransactionTs, @@ -265,11 +270,15 @@ mod tests { #[tokio::test] async fn transaction_read_write() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); - let db = - DB::::new(DbOption::from(temp_dir.path()), TokioExecutor::new()) - .await - .unwrap(); + let db = DB::::new( + DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()), + TokioExecutor::new(), + manager, + ) + .await + .unwrap(); { let mut txn1 = db.transaction().await; txn1.insert("foo".to_string()); @@ -299,13 +308,34 @@ mod tests { #[tokio::test] async fn transaction_get() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); - let (_, version) = build_version(&option).await; - let (schema, compaction_rx) = build_schema(option.clone()).await.unwrap(); - let db = build_db(option, compaction_rx, TokioExecutor::new(), schema, version) + manager + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); + + let (_, version) = build_version(&option, &manager).await; + let (schema, compaction_rx) = build_schema(option.clone(), manager.base_fs()) + .await + .unwrap(); + let db = build_db( + option, + compaction_rx, + TokioExecutor::new(), + schema, + version, + manager, + ) + .await + .unwrap(); { let _ = db.version_set.increase_ts(); @@ -367,11 +397,12 @@ mod tests { #[tokio::test] async fn write_conflicts() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); - let db = - DB::::new(DbOption::from(temp_dir.path()), TokioExecutor::new()) - .await - .unwrap(); + let db = DB::::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); let mut txn = db.transaction().await; txn.insert(0.to_string()); @@ -400,11 +431,12 @@ mod tests { #[tokio::test] async fn transaction_projection() { let temp_dir = TempDir::new().unwrap(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); - let db = - DB::::new(DbOption::from(temp_dir.path()), TokioExecutor::new()) - .await - .unwrap(); + let db = DB::::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); let mut txn1 = db.transaction().await; txn1.insert(Test { @@ -427,13 +459,34 @@ mod tests { #[tokio::test] async fn transaction_scan() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); + + manager + .create_dir_all(&option.version_log_dir_path()) + .await + .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); - let (_, version) = build_version(&option).await; - let (schema, compaction_rx) = build_schema(option.clone()).await.unwrap(); - let db = build_db(option, compaction_rx, TokioExecutor::new(), schema, version) + let (_, version) = build_version(&option, &manager).await; + let (schema, compaction_rx) = build_schema(option.clone(), manager.base_fs()) .await .unwrap(); + let db = build_db( + option, + compaction_rx, + TokioExecutor::new(), + schema, + version, + manager, + ) + .await + .unwrap(); { // to increase timestamps to 1 because the data ts built in advance is 1 @@ -499,13 +552,34 @@ mod tests { #[tokio::test] async fn test_transaction_scan_bound() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); + + manager + .create_dir_all(&option.version_log_dir_path()) + .await + .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); - let (_, version) = build_version(&option).await; - let (schema, compaction_rx) = build_schema(option.clone()).await.unwrap(); - let db = build_db(option, compaction_rx, TokioExecutor::new(), schema, version) + let (_, version) = build_version(&option, &manager).await; + let (schema, compaction_rx) = build_schema(option.clone(), manager.base_fs()) .await .unwrap(); + let db = build_db( + option, + compaction_rx, + TokioExecutor::new(), + schema, + version, + manager, + ) + .await + .unwrap(); { // to increase timestamps to 1 because the data ts built in advance is 1 db.version_set.increase_ts(); @@ -652,13 +726,34 @@ mod tests { #[tokio::test] async fn test_transaction_scan_limit() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); - let (_, version) = build_version(&option).await; - let (schema, compaction_rx) = build_schema(option.clone()).await.unwrap(); - let db = build_db(option, compaction_rx, TokioExecutor::new(), schema, version) + manager + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); + manager + .create_dir_all(&option.wal_dir_path()) + .await + .unwrap(); + + let (_, version) = build_version(&option, &manager).await; + let (schema, compaction_rx) = build_schema(option.clone(), manager.base_fs()) + .await + .unwrap(); + let db = build_db( + option, + compaction_rx, + TokioExecutor::new(), + schema, + version, + manager, + ) + .await + .unwrap(); let txn = db.transaction().await; txn.commit().await.unwrap(); diff --git a/src/version/cleaner.rs b/src/version/cleaner.rs index fc84d5ed..5fa157e0 100644 --- a/src/version/cleaner.rs +++ b/src/version/cleaner.rs @@ -1,37 +1,45 @@ -use std::{collections::BTreeMap, io, marker::PhantomData, sync::Arc}; +use std::{collections::BTreeMap, sync::Arc}; use flume::{Receiver, Sender}; use crate::{ - fs::{FileId, FileProvider}, + fs::{manager::StoreManager, FileId}, record::Record, timestamp::Timestamp, - DbOption, + DbError, DbOption, }; pub enum CleanTag { - Add { ts: Timestamp, gens: Vec }, - Clean { ts: Timestamp }, - RecoverClean { gen: FileId }, + Add { + ts: Timestamp, + gens: Vec<(FileId, usize)>, + }, + Clean { + ts: Timestamp, + }, + RecoverClean { + wal_id: FileId, + }, } -pub(crate) struct Cleaner +pub(crate) struct Cleaner where R: Record, - FP: FileProvider, { tag_recv: Receiver, - gens_map: BTreeMap, bool)>, + gens_map: BTreeMap, bool)>, option: Arc>, - _p: PhantomData, + manager: Arc, } -impl Cleaner +impl Cleaner where R: Record, - FP: FileProvider, { - pub(crate) fn new(option: Arc>) -> (Self, Sender) { + pub(crate) fn new( + option: Arc>, + manager: Arc, + ) -> (Self, Sender) { let (tag_send, tag_recv) = flume::bounded(option.clean_channel_buffer); ( @@ -39,13 +47,13 @@ where tag_recv, gens_map: Default::default(), option, - _p: Default::default(), + manager, }, tag_send, ) } - pub(crate) async fn listen(&mut self) -> Result<(), io::Error> { + pub(crate) async fn listen(&mut self) -> Result<(), DbError> { while let Ok(tag) = self.tag_recv.recv_async().await { match tag { CleanTag::Add { ts, gens } => { @@ -60,13 +68,23 @@ where let _ = self.gens_map.insert(first_version, (gens, false)); break; } - for gen in gens { - FP::remove(self.option.table_path(&gen)).await?; + for (gen, level) in gens { + let fs = self + .option + .level_fs_path(level) + .map(|path| self.manager.get_fs(path)) + .unwrap_or(self.manager.base_fs()); + fs.remove(&self.option.table_path(&gen)).await?; } } } - CleanTag::RecoverClean { gen } => { - FP::remove(self.option.table_path(&gen)).await?; + CleanTag::RecoverClean { wal_id: gen } => { + let fs = self + .option + .level_fs_path(0) + .map(|path| self.manager.get_fs(path)) + .unwrap_or(self.manager.base_fs()); + fs.remove(&self.option.table_path(&gen)).await?; } } } @@ -79,13 +97,17 @@ where pub(crate) mod tests { use std::{sync::Arc, time::Duration}; + use fusio::{ + local::TokioFs, + path::{path_to_local, Path}, + }; use tempfile::TempDir; use tokio::time::sleep; use tracing::error; use crate::{ executor::{tokio::TokioExecutor, Executor}, - fs::{FileId, FileProvider}, + fs::{default_open_options, manager::StoreManager, FileId}, tests::Test, version::cleaner::{CleanTag, Cleaner}, DbOption, @@ -94,28 +116,35 @@ pub(crate) mod tests { #[tokio::test] async fn test_cleaner() { let temp_dir = TempDir::new().unwrap(); - let option = Arc::new(DbOption::from(temp_dir.path())); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); let gen_0 = FileId::new(); let gen_1 = FileId::new(); let gen_2 = FileId::new(); let gen_3 = FileId::new(); + let fs = option + .level_fs_path(0) + .map(|path| manager.get_fs(path)) + .unwrap_or(manager.base_fs()); { - TokioExecutor::open(option.table_path(&gen_0)) + fs.open_options(&option.table_path(&gen_0), default_open_options()) .await .unwrap(); - TokioExecutor::open(option.table_path(&gen_1)) + fs.open_options(&option.table_path(&gen_1), default_open_options()) .await .unwrap(); - TokioExecutor::open(option.table_path(&gen_2)) + fs.open_options(&option.table_path(&gen_2), default_open_options()) .await .unwrap(); - TokioExecutor::open(option.table_path(&gen_3)) + fs.open_options(&option.table_path(&gen_3), default_open_options()) .await .unwrap(); } - let (mut cleaner, tx) = Cleaner::::new(option.clone()); + let (mut cleaner, tx) = Cleaner::::new(option.clone(), manager.clone()); let executor = TokioExecutor::new(); @@ -127,19 +156,19 @@ pub(crate) mod tests { tx.send_async(CleanTag::Add { ts: 1.into(), - gens: vec![gen_1], + gens: vec![(gen_1, 0)], }) .await .unwrap(); tx.send_async(CleanTag::Add { ts: 0.into(), - gens: vec![gen_0], + gens: vec![(gen_0, 0)], }) .await .unwrap(); tx.send_async(CleanTag::Add { ts: 2.into(), - gens: vec![gen_2], + gens: vec![(gen_2, 0)], }) .await .unwrap(); @@ -147,56 +176,34 @@ pub(crate) mod tests { tx.send_async(CleanTag::Clean { ts: 2.into() }) .await .unwrap(); - assert!(TokioExecutor::file_exist(option.table_path(&gen_0)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_1)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_2)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_3)) - .await - .unwrap()); + + // FIXME + assert!(path_to_local(&option.table_path(&gen_0)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_1)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_2)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_3)).unwrap().exists()); tx.send_async(CleanTag::Clean { ts: 0.into() }) .await .unwrap(); - sleep(Duration::from_millis(1)).await; - assert!(!TokioExecutor::file_exist(option.table_path(&gen_0)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_1)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_2)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_3)) - .await - .unwrap()); + sleep(Duration::from_millis(10)).await; + assert!(!path_to_local(&option.table_path(&gen_0)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_1)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_2)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_3)).unwrap().exists()); tx.send_async(CleanTag::Clean { ts: 1.into() }) .await .unwrap(); - sleep(Duration::from_millis(1)).await; - assert!(!TokioExecutor::file_exist(option.table_path(&gen_1)) - .await - .unwrap()); - assert!(!TokioExecutor::file_exist(option.table_path(&gen_2)) - .await - .unwrap()); - assert!(TokioExecutor::file_exist(option.table_path(&gen_3)) - .await - .unwrap()); + sleep(Duration::from_millis(10)).await; + assert!(!path_to_local(&option.table_path(&gen_1)).unwrap().exists()); + assert!(!path_to_local(&option.table_path(&gen_2)).unwrap().exists()); + assert!(path_to_local(&option.table_path(&gen_3)).unwrap().exists()); - tx.send_async(CleanTag::RecoverClean { gen: gen_3 }) + tx.send_async(CleanTag::RecoverClean { wal_id: gen_3 }) .await .unwrap(); - sleep(Duration::from_millis(1)).await; - assert!(!TokioExecutor::file_exist(option.table_path(&gen_3)) - .await - .unwrap()); + sleep(Duration::from_millis(10)).await; + assert!(!path_to_local(&option.table_path(&gen_3)).unwrap().exists()); } } diff --git a/src/version/edit.rs b/src/version/edit.rs index 93cce830..d591eb1f 100644 --- a/src/version/edit.rs +++ b/src/version/edit.rs @@ -1,6 +1,6 @@ use std::mem::size_of; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use crate::{ fs::FileId, @@ -21,7 +21,7 @@ impl VersionEdit where K: Decode, { - pub(crate) async fn recover(reader: &mut R) -> Vec> { + pub(crate) async fn recover(reader: &mut R) -> Vec> { let mut edits = Vec::new(); while let Ok(edit) = VersionEdit::decode(reader).await { @@ -39,25 +39,26 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { match self { VersionEdit::Add { scope, level } => { - writer.write_all(&0u8.to_le_bytes()).await?; - writer.write_all(&level.to_le_bytes()).await?; + 0u8.encode(writer).await?; + level.encode(writer).await?; scope.encode(writer).await?; } VersionEdit::Remove { gen, level } => { - writer.write_all(&1u8.to_le_bytes()).await?; - writer.write_all(&level.to_le_bytes()).await?; - writer.write_all(&gen.to_bytes()).await?; + 1u8.encode(writer).await?; + level.encode(writer).await?; + let (result, _) = writer.write_all(&gen.to_bytes()[..]).await; + result?; } VersionEdit::LatestTimeStamp { ts } => { - writer.write_all(&2u8.to_le_bytes()).await?; + 2u8.encode(writer).await?; ts.encode(writer).await?; } VersionEdit::NewLogLength { len } => { - writer.write_all(&3u8.to_le_bytes()).await?; + 3u8.encode(writer).await?; len.encode(writer).await?; } } @@ -83,34 +84,22 @@ where { type Error = ::Error; - async fn decode(reader: &mut R) -> Result { - let edit_type = { - let mut len = [0; size_of::()]; - reader.read_exact(&mut len).await?; - u8::from_le_bytes(len) as usize - }; + async fn decode(reader: &mut R) -> Result { + let edit_type = u8::decode(reader).await?; Ok(match edit_type { 0 => { - let level = { - let mut level = [0; size_of::()]; - reader.read_exact(&mut level).await?; - u8::from_le_bytes(level) - }; + let level = u8::decode(reader).await?; let scope = Scope::::decode(reader).await?; VersionEdit::Add { level, scope } } 1 => { - let level = { - let mut level = [0; size_of::()]; - reader.read_exact(&mut level).await?; - u8::from_le_bytes(level) - }; + let level = u8::decode(reader).await?; let gen = { - let mut slice = [0; 16]; - reader.read_exact(&mut slice).await?; - FileId::from_bytes(slice) + let buf = reader.read_exact(vec![0u8; 16]).await?; + // SAFETY + FileId::from_bytes(buf.as_slice().try_into().unwrap()) }; VersionEdit::Remove { level, gen } } @@ -131,6 +120,8 @@ where mod tests { use std::io::Cursor; + use fusio::Seek; + use crate::{fs::FileId, scope::Scope, serdes::Encode, version::edit::VersionEdit}; #[tokio::test] @@ -153,17 +144,15 @@ mod tests { VersionEdit::NewLogLength { len: 233 }, ]; - let bytes = { - let mut cursor = Cursor::new(vec![]); + let mut buf = Vec::new(); + let mut cursor = Cursor::new(&mut buf); - for edit in edits.clone() { - edit.encode(&mut cursor).await.unwrap(); - } - cursor.into_inner() - }; + for edit in edits.clone() { + edit.encode(&mut cursor).await.unwrap(); + } let decode_edits = { - let mut cursor = Cursor::new(bytes); + cursor.seek(0).await.unwrap(); VersionEdit::::recover(&mut cursor).await }; diff --git a/src/version/mod.rs b/src/version/mod.rs index fdc2c94d..95d319b2 100644 --- a/src/version/mod.rs +++ b/src/version/mod.rs @@ -3,7 +3,6 @@ pub(crate) mod edit; pub(crate) mod set; use std::{ - marker::PhantomData, ops::Bound, sync::{ atomic::{AtomicU32, Ordering}, @@ -12,12 +11,13 @@ use std::{ }; use flume::{SendError, Sender}; +use fusio::DynFs; use parquet::arrow::ProjectionMask; use thiserror::Error; use tracing::error; use crate::{ - fs::{FileId, FileProvider}, + fs::{default_open_options, manager::StoreManager, FileId}, ondisk::sstable::SsTable, record::Record, scope::Scope, @@ -30,7 +30,7 @@ use crate::{ pub(crate) const MAX_LEVEL: usize = 7; -pub(crate) type VersionRef = Arc>; +pub(crate) type VersionRef = Arc>; pub(crate) trait TransactionTs { fn load_ts(&self) -> Timestamp; @@ -39,10 +39,9 @@ pub(crate) trait TransactionTs { } #[derive(Debug)] -pub(crate) struct Version +pub(crate) struct Version where R: Record, - FP: FileProvider, { ts: Timestamp, pub(crate) level_slice: [Vec>; MAX_LEVEL], @@ -50,13 +49,11 @@ where option: Arc>, timestamp: Arc, log_length: u32, - _p: PhantomData, } -impl Version +impl Version where R: Record, - FP: FileProvider, { #[cfg(test)] pub(crate) fn new( @@ -71,7 +68,6 @@ where option: option.clone(), timestamp, log_length: 0, - _p: Default::default(), } } @@ -80,10 +76,9 @@ where } } -impl TransactionTs for Version +impl TransactionTs for Version where R: Record, - FP: FileProvider, { fn load_ts(&self) -> Timestamp { self.timestamp.load(Ordering::Acquire).into() @@ -94,10 +89,9 @@ where } } -impl Clone for Version +impl Clone for Version where R: Record, - FP: FileProvider, { fn clone(&self) -> Self { let mut level_slice = [const { Vec::new() }; MAX_LEVEL]; @@ -113,42 +107,56 @@ where option: self.option.clone(), timestamp: self.timestamp.clone(), log_length: self.log_length, - _p: Default::default(), } } } -impl Version +impl Version where R: Record, - FP: FileProvider, { pub(crate) async fn query( &self, + manager: &StoreManager, key: &TimestampedRef, projection_mask: ProjectionMask, ) -> Result>, VersionError> { + let level_0_path = self + .option + .level_fs_path(0) + .unwrap_or(&self.option.base_path); + let level_0_fs = manager.get_fs(level_0_path); for scope in self.level_slice[0].iter().rev() { if !scope.contains(key.value()) { continue; } if let Some(entry) = self - .table_query(key, &scope.gen, projection_mask.clone()) + .table_query(level_0_fs, key, &scope.gen, projection_mask.clone()) .await? { return Ok(Some(entry)); } } - for level in self.level_slice[1..6].iter() { - if level.is_empty() { + for (i, sort_runs) in self.level_slice[1..MAX_LEVEL].iter().enumerate() { + let level_path = self + .option + .level_fs_path(i + 1) + .unwrap_or(&self.option.base_path); + let level_fs = manager.get_fs(level_path); + if sort_runs.is_empty() { continue; } - let index = Self::scope_search(key.value(), level); - if !level[index].contains(key.value()) { + let index = Self::scope_search(key.value(), sort_runs); + if !sort_runs[index].contains(key.value()) { continue; } if let Some(entry) = self - .table_query(key, &level[index].gen, projection_mask.clone()) + .table_query( + level_fs, + key, + &sort_runs[index].gen, + projection_mask.clone(), + ) .await? { return Ok(Some(entry)); @@ -160,14 +168,17 @@ where async fn table_query( &self, + store: &Arc, key: &TimestampedRef<::Key>, gen: &FileId, projection_mask: ProjectionMask, ) -> Result>, VersionError> { - let file = FP::open(self.option.table_path(gen)) + let file = store + .open_options(&self.option.table_path(gen), default_open_options()) .await - .map_err(VersionError::Io)?; - SsTable::::open(file) + .map_err(VersionError::Fusio)?; + SsTable::::open(file) + .await? .get(key, projection_mask) .await .map_err(VersionError::Parquet) @@ -185,20 +196,27 @@ where pub(crate) async fn streams<'streams>( &self, - streams: &mut Vec>, + manager: &StoreManager, + streams: &mut Vec>, range: (Bound<&'streams R::Key>, Bound<&'streams R::Key>), ts: Timestamp, limit: Option, projection_mask: ProjectionMask, ) -> Result<(), VersionError> { + let level_0_path = self + .option + .level_fs_path(0) + .unwrap_or(&self.option.base_path); + let level_0_fs = manager.get_fs(level_0_path); for scope in self.level_slice[0].iter() { if !scope.meets_range(range) { continue; } - let file = FP::open(self.option.table_path(&scope.gen)) + let file = level_0_fs + .open_options(&self.option.table_path(&scope.gen), default_open_options()) .await - .map_err(VersionError::Io)?; - let table = SsTable::open(file); + .map_err(VersionError::Fusio)?; + let table = SsTable::open(file).await?; streams.push(ScanStream::SsTable { inner: table @@ -211,6 +229,11 @@ where if scopes.is_empty() { continue; } + let level_path = self + .option + .level_fs_path(i + 1) + .unwrap_or(&self.option.base_path); + let level_fs = manager.get_fs(level_path); let (mut start, mut end) = (None, None); @@ -237,6 +260,7 @@ where ts, limit, projection_mask.clone(), + level_fs.clone(), ) .unwrap(), }); @@ -261,10 +285,9 @@ where } } -impl Drop for Version +impl Drop for Version where R: Record, - FP: FileProvider, { fn drop(&mut self) { if let Err(err) = self.clean_sender.send(CleanTag::Clean { ts: self.ts }) { @@ -284,6 +307,10 @@ where Io(#[from] std::io::Error), #[error("version parquet error: {0}")] Parquet(#[from] parquet::errors::ParquetError), + #[error("version fusio error: {0}")] + Fusio(#[from] fusio::Error), + #[error("version ulid decode error: {0}")] + UlidDecode(#[from] ulid::DecodeError), #[error("version send error: {0}")] Send(#[from] SendError), } diff --git a/src/version/set.rs b/src/version/set.rs index 3d5f65af..3c3b2bca 100644 --- a/src/version/set.rs +++ b/src/version/set.rs @@ -1,7 +1,6 @@ use std::{ - io::SeekFrom, + collections::BinaryHeap, mem, - pin::pin, sync::{ atomic::{AtomicU32, Ordering}, Arc, @@ -10,12 +9,12 @@ use std::{ use async_lock::RwLock; use flume::Sender; +use fusio::{dynamic::DynFile, fs::FileMeta, Seek}; use futures_util::StreamExt; -use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use super::{TransactionTs, MAX_LEVEL}; use crate::{ - fs::{FileId, FileProvider, FileType}, + fs::{default_open_options, manager::StoreManager, parse_file_id, FileId, FileType}, record::Record, serdes::Encode, timestamp::Timestamp, @@ -23,30 +22,50 @@ use crate::{ DbOption, }; -pub(crate) struct VersionSetInner +struct CmpMeta(FileMeta); + +impl Eq for CmpMeta {} + +impl PartialEq for CmpMeta { + fn eq(&self, other: &Self) -> bool { + self.0.path.eq(&other.0.path) + } +} + +impl PartialOrd for CmpMeta { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for CmpMeta { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.path.cmp(&other.0.path) + } +} + +pub(crate) struct VersionSetInner where R: Record, - FP: FileProvider, { - current: VersionRef, - log_with_id: (FP::File, FileId), + current: VersionRef, + log_with_id: (Box, FileId), } -pub(crate) struct VersionSet +pub(crate) struct VersionSet where R: Record, - FP: FileProvider, { - inner: Arc>>, + inner: Arc>>, clean_sender: Sender, timestamp: Arc, option: Arc>, + manager: Arc, } -impl Clone for VersionSet +impl Clone for VersionSet where R: Record, - FP: FileProvider, { fn clone(&self) -> Self { VersionSet { @@ -54,14 +73,14 @@ where clean_sender: self.clean_sender.clone(), timestamp: self.timestamp.clone(), option: self.option.clone(), + manager: self.manager.clone(), } } } -impl TransactionTs for VersionSet +impl TransactionTs for VersionSet where R: Record, - FP: FileProvider, { fn load_ts(&self) -> Timestamp { self.timestamp.load(Ordering::Acquire).into() @@ -72,88 +91,90 @@ where } } -impl VersionSet +impl VersionSet where R: Record, - FP: FileProvider, { pub(crate) async fn new( clean_sender: Sender, option: Arc>, + manager: Arc, ) -> Result> { - let mut log_stream = pin!(FP::list( - option.version_log_dir_path(), - FileType::Log, - true - )?); - let mut first_log_id = None; - let mut version_log_id = None; - let mut version_log = None; + let fs = manager.base_fs(); + let version_dir = option.version_log_dir_path(); + let mut log_stream = fs.list(&version_dir).await?; + let mut log_binary_heap = BinaryHeap::with_capacity(3); // when there are multiple logs, this means that a downtime occurred during the // `version_log_snap_shot` process, the second newest file has the highest data // integrity, so it is used as the version log, and the older log is deleted first // to avoid midway downtime, which will cause the second newest file to become the // first newest after restart. - let mut i = 0; while let Some(result) = log_stream.next().await { - let (log, log_id) = result?; + let file_meta = result?; - if i <= 1 { - version_log = Some(log); - first_log_id = mem::replace(&mut version_log_id, Some(log_id)); - } else { - FP::remove(option.version_log_path(&log_id)).await?; - } + log_binary_heap.push(CmpMeta(file_meta)); - i += 1; + if log_binary_heap.len() > 2 { + if let Some(old_meta) = log_binary_heap.pop() { + fs.remove(&old_meta.0.path).await?; + } + } } - if let Some(log_id) = first_log_id { - FP::remove(option.version_log_path(&log_id)).await?; + + let second_log_id = log_binary_heap.pop(); + let latest_log_id = log_binary_heap.pop(); + + if let (Some(log_id), Some(_)) = (&latest_log_id, &second_log_id) { + fs.remove(&log_id.0.path).await?; } - let (mut log, log_id) = if let (Some(log), Some(log_id)) = (version_log, version_log_id) { - (log, log_id) - } else { - let log_id = FileId::new(); - let log = FP::open(option.version_log_path(&log_id)).await?; - (log, log_id) - }; + let log_id = second_log_id + .or(latest_log_id) + .map(|file_meta| parse_file_id(&file_meta.0.path, FileType::Log)) + .transpose()? + .flatten() + .unwrap_or_else(FileId::new); + + let mut log = fs + .open_options(&option.version_log_path(&log_id), default_open_options()) + .await?; + log.seek(0).await.unwrap(); let edits = VersionEdit::recover(&mut log).await; - log.seek(SeekFrom::End(0)).await?; let timestamp = Arc::new(AtomicU32::default()); - let set = VersionSet:: { + drop(log_stream); + let set = VersionSet:: { inner: Arc::new(RwLock::new(VersionSetInner { - current: Arc::new(Version:: { + current: Arc::new(Version:: { ts: Timestamp::from(0), level_slice: [const { Vec::new() }; MAX_LEVEL], clean_sender: clean_sender.clone(), option: option.clone(), timestamp: timestamp.clone(), log_length: 0, - _p: Default::default(), }), log_with_id: (log, log_id), })), clean_sender, timestamp, option, + manager, }; set.apply_edits(edits, None, true).await?; Ok(set) } - pub(crate) async fn current(&self) -> VersionRef { + pub(crate) async fn current(&self) -> VersionRef { self.inner.read().await.current.clone() } pub(crate) async fn apply_edits( &self, mut version_edits: Vec>, - delete_gens: Option>, + delete_gens: Option>, is_recover: bool, ) -> Result<(), VersionError> { let timestamp = &self.timestamp; @@ -178,7 +199,11 @@ where if let Some(wal_ids) = scope.wal_ids.take() { for wal_id in wal_ids { // may have been removed after multiple starts - let _ = FP::remove(option.wal_path(&wal_id)).await; + let _ = self + .manager + .base_fs() + .remove(&option.wal_path(&wal_id)) + .await; } } if level == 0 { @@ -203,7 +228,7 @@ where // issue: https://github.com/tonbo-io/tonbo/issues/123 new_version .clean_sender - .send_async(CleanTag::RecoverClean { gen }) + .send_async(CleanTag::RecoverClean { wal_id: gen }) .await .map_err(VersionError::Send)?; } @@ -229,17 +254,22 @@ where .await .map_err(VersionError::Send)?; } - log.flush().await?; + log.sync_all().await?; if edit_len >= option.version_log_snapshot_threshold { + let fs = self.manager.base_fs(); let old_log_id = mem::replace(log_id, FileId::new()); - let _ = mem::replace(log, FP::open(option.version_log_path(log_id)).await?); + let new_log = fs + .open_options(&option.version_log_path(log_id), default_open_options()) + .await?; + let mut old_log = mem::replace(log, new_log); + old_log.close().await?; new_version.log_length = 0; for new_edit in new_version.to_edits() { new_edit.encode(log).await.map_err(VersionError::Encode)?; } - log.flush().await?; - FP::remove(option.version_log_path(&old_log_id)).await?; + log.sync_all().await?; + fs.remove(&option.version_log_path(&old_log_id)).await?; } guard.current = Arc::new(new_version); Ok(()) @@ -248,17 +278,16 @@ where #[cfg(test)] pub(crate) mod tests { - use std::{io::SeekFrom, pin::pin, sync::Arc}; + use std::sync::Arc; use async_lock::RwLock; use flume::{bounded, Sender}; + use fusio::{local::TokioFs, path::Path}; use futures_util::StreamExt; use tempfile::TempDir; - use tokio::io::AsyncSeekExt; use crate::{ - executor::tokio::TokioExecutor, - fs::{FileId, FileProvider, FileType}, + fs::{default_open_options, manager::StoreManager, FileId}, record::Record, scope::Scope, version::{ @@ -270,21 +299,23 @@ pub(crate) mod tests { DbOption, }; - pub(crate) async fn build_version_set( - version: Version, + pub(crate) async fn build_version_set( + version: Version, clean_sender: Sender, option: Arc>, - ) -> Result, VersionError> + manager: Arc, + ) -> Result, VersionError> where R: Record, - FP: FileProvider, { let log_id = FileId::new(); - let mut log = FP::open(option.version_log_path(&log_id)).await?; - log.seek(SeekFrom::End(0)).await?; - + let log = manager + .base_fs() + .open_options(&option.version_log_path(&log_id), default_open_options()) + .await?; let timestamp = version.timestamp.clone(); - Ok(VersionSet:: { + + Ok(VersionSet:: { inner: Arc::new(RwLock::new(VersionSetInner { current: Arc::new(version), log_with_id: (log, log_id), @@ -292,20 +323,26 @@ pub(crate) mod tests { clean_sender, timestamp, option, + manager, }) } #[tokio::test] async fn timestamp_persistence() { let temp_dir = TempDir::new().unwrap(); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); let (sender, _) = bounded(1); - let option = Arc::new(DbOption::from(temp_dir.path())); - TokioExecutor::create_dir_all(&option.version_log_dir_path()) + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); + manager + .base_fs() + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); - let version_set: VersionSet = - VersionSet::new(sender.clone(), option.clone()) + let version_set: VersionSet = + VersionSet::new(sender.clone(), option.clone(), manager.clone()) .await .unwrap(); @@ -320,8 +357,8 @@ pub(crate) mod tests { drop(version_set); - let version_set: VersionSet = - VersionSet::new(sender.clone(), option.clone()) + let version_set: VersionSet = + VersionSet::new(sender.clone(), option.clone(), manager) .await .unwrap(); assert_eq!(version_set.load_ts(), 20_u32.into()); @@ -330,17 +367,19 @@ pub(crate) mod tests { #[tokio::test] async fn version_log_snap_shot() { let temp_dir = TempDir::new().unwrap(); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); let (sender, _) = bounded(1); - let mut option = DbOption::from(temp_dir.path()); + let mut option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); option.version_log_snapshot_threshold = 4; let option = Arc::new(option); - TokioExecutor::create_dir_all(&option.version_log_dir_path()) + manager + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); - let version_set: VersionSet = - VersionSet::new(sender.clone(), option.clone()) + let version_set: VersionSet = + VersionSet::new(sender.clone(), option.clone(), manager.clone()) .await .unwrap(); let gen_0 = FileId::new(); @@ -395,7 +434,7 @@ pub(crate) mod tests { let mut guard = version_set.inner.write().await; let log = &mut guard.log_with_id.0; - log.seek(SeekFrom::Start(0)).await.unwrap(); + log.seek(0).await.unwrap(); let edits = VersionEdit::::recover(log).await; assert_eq!(edits.len(), 3); @@ -418,12 +457,20 @@ pub(crate) mod tests { drop(guard); drop(version_set); - let (mut log, _) = - pin!(TokioExecutor::list(option.version_log_dir_path(), FileType::Log, true).unwrap()) - .next() - .await - .unwrap() - .unwrap(); + let version_dir_path = option.version_log_dir_path(); + let mut stream = manager.base_fs().list(&version_dir_path).await.unwrap(); + let mut logs = Vec::new(); + + while let Some(log) = stream.next().await { + logs.push(log.unwrap()); + } + logs.sort_by(|meta_a, meta_b| meta_a.path.cmp(&meta_b.path)); + + let mut log = manager + .base_fs() + .open_options(&logs.pop().unwrap().path, default_open_options()) + .await + .unwrap(); let edits = VersionEdit::::recover(&mut log).await; assert_eq!(edits.len(), 3); @@ -448,16 +495,20 @@ pub(crate) mod tests { #[tokio::test] async fn version_level_sort() { let temp_dir = TempDir::new().unwrap(); - let option = DbOption::from(temp_dir.path()); - let option = Arc::new(option); + let manager = Arc::new(StoreManager::new(Arc::new(TokioFs), vec![])); + let option = Arc::new(DbOption::from( + Path::from_filesystem_path(temp_dir.path()).unwrap(), + )); let (sender, _) = bounded(1); - TokioExecutor::create_dir_all(&option.version_log_dir_path()) + manager + .base_fs() + .create_dir_all(&option.version_log_dir_path()) .await .unwrap(); - let version_set: VersionSet = - VersionSet::new(sender.clone(), option.clone()) + let version_set: VersionSet = + VersionSet::new(sender.clone(), option.clone(), manager) .await .unwrap(); let gen_0 = FileId::new(); diff --git a/src/wal/checksum.rs b/src/wal/checksum.rs index c3a1191f..2dabbd8e 100644 --- a/src/wal/checksum.rs +++ b/src/wal/checksum.rs @@ -1,24 +1,15 @@ -use std::{ - hash::Hasher, - io, - io::Error, - pin::Pin, - task::{Context, Poll}, -}; - -use futures_core::ready; -use pin_project_lite::pin_project; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt, ReadBuf}; - -pin_project! { - pub(crate) struct HashWriter { - hasher: crc32fast::Hasher, - #[pin] - writer: W, - } +use std::{future::Future, hash::Hasher}; + +use fusio::{Error, IoBuf, IoBufMut, MaybeSend, Read, Write}; + +use crate::serdes::{Decode, Encode}; + +pub(crate) struct HashWriter { + hasher: crc32fast::Hasher, + writer: W, } -impl HashWriter { +impl HashWriter { pub(crate) fn new(writer: W) -> Self { Self { hasher: crc32fast::Hasher::new(), @@ -26,46 +17,39 @@ impl HashWriter { } } - pub(crate) async fn eol(mut self) -> io::Result { - self.writer.write(&self.hasher.finish().to_le_bytes()).await + pub(crate) async fn eol(mut self) -> Result<(), fusio::Error> { + let i = self.hasher.finish(); + i.encode(&mut self.writer).await } } -impl AsyncWrite for HashWriter { - fn poll_write( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &[u8], - ) -> Poll> { - let this = self.project(); - - Poll::Ready(match ready!(this.writer.poll_write(cx, buf)) { - Ok(n) => { - this.hasher.write(&buf[..n]); - Ok(n) - } - e => e, - }) +impl Write for HashWriter { + async fn write_all(&mut self, buf: B) -> (Result<(), Error>, B) { + let (result, buf) = self.writer.write_all(buf).await; + self.hasher.write(buf.as_slice()); + + (result, buf) } - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.project().writer.poll_flush(cx) + fn sync_data(&self) -> impl Future> + MaybeSend { + self.writer.sync_data() } - fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - self.project().writer.poll_shutdown(cx) + fn sync_all(&self) -> impl Future> + MaybeSend { + self.writer.sync_all() } -} -pin_project! { - pub(crate) struct HashReader { - hasher: crc32fast::Hasher, - #[pin] - reader: R, + fn close(&mut self) -> impl Future> + MaybeSend { + self.writer.close() } } -impl HashReader { +pub(crate) struct HashReader { + hasher: crc32fast::Hasher, + reader: R, +} + +impl HashReader { pub(crate) fn new(reader: R) -> Self { Self { hasher: crc32fast::Hasher::new(), @@ -73,28 +57,55 @@ impl HashReader { } } - pub(crate) async fn checksum(mut self) -> io::Result { - let mut hash = [0; 8]; - self.reader.read_exact(&mut hash).await?; - let checksum = u64::from_le_bytes(hash); + pub(crate) async fn checksum(mut self) -> Result { + let checksum = u64::decode(&mut self.reader).await?; Ok(self.hasher.finish() == checksum) } } -impl AsyncRead for HashReader { - fn poll_read( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - buf: &mut ReadBuf<'_>, - ) -> Poll> { - let this = self.project(); - Poll::Ready(match ready!(this.reader.poll_read(cx, buf)) { - Ok(()) => { - this.hasher.write(buf.filled()); - Ok(()) - } - e => e, - }) +impl Read for HashReader { + async fn read_exact(&mut self, buf: B) -> Result { + let bytes = self.reader.read_exact(buf).await?; + self.hasher.write(bytes.as_slice()); + + Ok(bytes) + } + + async fn size(&self) -> Result { + self.reader.size().await + } +} + +#[cfg(test)] +pub(crate) mod tests { + use std::io::Cursor; + + use fusio::Seek; + + use crate::{ + serdes::{Decode, Encode}, + wal::checksum::{HashReader, HashWriter}, + }; + + #[tokio::test] + async fn test_encode_decode() { + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + + let mut writer = HashWriter::new(&mut cursor); + 4_u64.encode(&mut writer).await.unwrap(); + 3_u32.encode(&mut writer).await.unwrap(); + 2_u16.encode(&mut writer).await.unwrap(); + 1_u8.encode(&mut writer).await.unwrap(); + writer.eol().await.unwrap(); + + cursor.seek(0).await.unwrap(); + let mut reader = HashReader::new(&mut cursor); + assert_eq!(u64::decode(&mut reader).await.unwrap(), 4); + assert_eq!(u32::decode(&mut reader).await.unwrap(), 3); + assert_eq!(u16::decode(&mut reader).await.unwrap(), 2); + assert_eq!(u8::decode(&mut reader).await.unwrap(), 1); + assert!(reader.checksum().await.unwrap()); } } diff --git a/src/wal/log.rs b/src/wal/log.rs index c27285c6..10a816b9 100644 --- a/src/wal/log.rs +++ b/src/wal/log.rs @@ -1,6 +1,6 @@ use std::mem::size_of; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use fusio::{Read, Write}; use crate::serdes::{Decode, Encode}; @@ -45,9 +45,9 @@ where async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { - writer.write_all(&[self.log_type as u8]).await?; + (self.log_type as u8).encode(writer).await?; self.record.encode(writer).await } @@ -64,12 +64,9 @@ where async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { - let mut log_type = [0]; - reader.read_exact(&mut log_type).await?; - let log_type = LogType::from(log_type[0]); - + let log_type = LogType::from(u8::decode(reader).await?); let log = Re::decode(reader).await?; Ok(Self { diff --git a/src/wal/mod.rs b/src/wal/mod.rs index 0c59d256..fd5dcf6d 100644 --- a/src/wal/mod.rs +++ b/src/wal/mod.rs @@ -2,14 +2,14 @@ mod checksum; pub(crate) mod log; pub(crate) mod record_entry; -use std::{io, marker::PhantomData}; +use std::marker::PhantomData; use async_stream::stream; use checksum::{HashReader, HashWriter}; +use fusio::{Read, Write}; use futures_core::Stream; use log::Log; use thiserror::Error; -use tokio::io::{AsyncBufReadExt, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader}; use crate::{ fs::FileId, @@ -42,7 +42,7 @@ impl WalFile { impl WalFile where - F: AsyncWrite + Unpin + Send, + F: Write + Unpin + Send, R: Record, { pub(crate) async fn write<'r>( @@ -59,14 +59,14 @@ where Ok(()) } - pub(crate) async fn flush(&mut self) -> io::Result<()> { - self.file.flush().await + pub(crate) async fn flush(&mut self) -> Result<(), fusio::Error> { + self.file.close().await } } impl WalFile where - F: AsyncRead + Unpin, + F: Read + Unpin, R: Record, { pub(crate) fn recover( @@ -78,17 +78,13 @@ where >, > + '_ { stream! { - let mut file = BufReader::new(&mut self.file); - loop { - if file.buffer().is_empty() && file.fill_buf().await?.is_empty() { - return; - } - - let mut reader = HashReader::new(&mut file); - - let record = Log::>::decode(&mut reader).await.map_err(RecoverError::Io)?; + let mut reader = HashReader::new(&mut self.file); + let record = match Log::>::decode(&mut reader).await { + Ok(record) => record, + Err(_) => return, + }; if !reader.checksum().await? { yield Err(RecoverError::Checksum); return; @@ -111,12 +107,15 @@ pub enum RecoverError { Checksum, #[error("wal recover io error")] Io(#[from] std::io::Error), + #[error("wal recover fusio error")] + Fusio(#[from] fusio::Error), } #[cfg(test)] mod tests { use std::{io::Cursor, pin::pin}; + use fusio::Seek; use futures_util::StreamExt; use super::{log::LogType, FileId, WalFile}; @@ -124,9 +123,10 @@ mod tests { #[tokio::test] async fn write_and_recover() { - let mut file = Vec::new(); + let mut bytes = Vec::new(); + let mut file = Cursor::new(&mut bytes); { - let mut wal = WalFile::<_, String>::new(Cursor::new(&mut file), FileId::new()); + let mut wal = WalFile::<_, String>::new(&mut file, FileId::new()); wal.write( LogType::Full, Timestamped::new("hello", 0.into()), @@ -137,7 +137,8 @@ mod tests { wal.flush().await.unwrap(); } { - let mut wal = WalFile::<_, String>::new(Cursor::new(&mut file), FileId::new()); + file.seek(0).await.unwrap(); + let mut wal = WalFile::<_, String>::new(&mut file, FileId::new()); { let mut stream = pin!(wal.recover()); @@ -146,6 +147,8 @@ mod tests { assert_eq!(value, Some("hello".to_string())); } + let mut wal = WalFile::<_, String>::new(&mut file, FileId::new()); + wal.write( LogType::Full, Timestamped::new("world", 1.into()), @@ -156,7 +159,8 @@ mod tests { } { - let mut wal = WalFile::<_, String>::new(Cursor::new(&mut file), FileId::new()); + file.seek(0).await.unwrap(); + let mut wal = WalFile::<_, String>::new(&mut file, FileId::new()); { let mut stream = pin!(wal.recover()); diff --git a/src/wal/record_entry.rs b/src/wal/record_entry.rs index 47517c45..1bbf1429 100644 --- a/src/wal/record_entry.rs +++ b/src/wal/record_entry.rs @@ -1,6 +1,4 @@ -use std::io; - -use tokio::io::{AsyncRead, AsyncWrite}; +use fusio::{Read, Write}; use crate::{ record::{Key, Record}, @@ -20,11 +18,11 @@ impl Encode for RecordEntry<'_, R> where R: Record, { - type Error = io::Error; + type Error = fusio::Error; async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: AsyncWrite + Unpin + Send, + W: Write + Unpin + Send, { if let RecordEntry::Encode((key, recode_ref)) = self { key.encode(writer).await.unwrap(); @@ -47,11 +45,11 @@ impl Decode for RecordEntry<'_, Re> where Re: Record, { - type Error = io::Error; + type Error = fusio::Error; async fn decode(reader: &mut R) -> Result where - R: AsyncRead + Unpin, + R: Read + Unpin, { let key = Timestamped::::decode(reader).await.unwrap(); let record = Option::::decode(reader).await.unwrap(); @@ -64,6 +62,8 @@ where mod tests { use std::io::Cursor; + use fusio::Seek; + use crate::{ serdes::{Decode, Encode}, timestamp::Timestamped, @@ -74,15 +74,12 @@ mod tests { async fn encode_and_decode() { let entry: RecordEntry<'static, String> = RecordEntry::Encode((Timestamped::new("hello", 0.into()), Some("hello"))); - let bytes = { - let mut cursor = Cursor::new(vec![]); - - entry.encode(&mut cursor).await.unwrap(); - cursor.into_inner() - }; + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); + entry.encode(&mut cursor).await.unwrap(); let decode_entry = { - let mut cursor = Cursor::new(bytes); + cursor.seek(0).await.unwrap(); RecordEntry::<'static, String>::decode(&mut cursor) .await diff --git a/tests/data_integrity.rs b/tests/data_integrity.rs index 4343c22a..2e4449f0 100644 --- a/tests/data_integrity.rs +++ b/tests/data_integrity.rs @@ -1,10 +1,11 @@ #[cfg(test)] mod tests { - use std::{hash::Hasher, ops::Bound}; + use std::{hash::Hasher, ops::Bound, sync::Arc}; + use fusio::{local::TokioFs, path::Path}; use futures_util::StreamExt; use tempfile::TempDir; - use tonbo::{executor::tokio::TokioExecutor, DbOption, Record, DB}; + use tonbo::{executor::tokio::TokioExecutor, fs::manager::StoreManager, DbOption, Record, DB}; const WRITE_TIMES: usize = 500_000; const STRING_SIZE: usize = 50; @@ -68,10 +69,13 @@ mod tests { let mut primary_key_count = 0; let mut write_hasher = crc32fast::Hasher::new(); + let manager = StoreManager::new(Arc::new(TokioFs), vec![]); let temp_dir = TempDir::new().unwrap(); - let option = DbOption::from(temp_dir.path()); + let option = DbOption::from(Path::from_filesystem_path(temp_dir.path()).unwrap()); - let db: DB = DB::new(option, TokioExecutor::new()).await.unwrap(); + let db: DB = DB::new(option, TokioExecutor::new(), manager) + .await + .unwrap(); for _ in 0..WRITE_TIMES { let customer = gen_record(&mut rng, &mut primary_key_count); diff --git a/tests/macros_correctness.rs b/tests/macros_correctness.rs index 135ab550..d0310f78 100644 --- a/tests/macros_correctness.rs +++ b/tests/macros_correctness.rs @@ -10,9 +10,10 @@ pub struct User { #[cfg(test)] mod tests { - use std::sync::Arc; + use std::{io::Cursor, sync::Arc}; use arrow::array::{BooleanArray, RecordBatch, StringArray, UInt32Array, UInt8Array}; + use fusio::Seek; use parquet::{ arrow::{arrow_to_parquet_schema, ProjectionMask}, format::SortingColumn, @@ -184,14 +185,14 @@ mod tests { age: 32, }; let original_ref = original.as_record_ref(); - let mut buffer = Vec::new(); + let mut bytes = Vec::new(); + let mut cursor = Cursor::new(&mut bytes); assert_eq!(original_ref.size(), 26); - original_ref.encode(&mut buffer).await.unwrap(); + original_ref.encode(&mut cursor).await.unwrap(); - let mut cursor = std::io::Cursor::new(buffer); + cursor.seek(0).await.unwrap(); let decoded = User::decode(&mut cursor).await.unwrap(); - assert_eq!(original, decoded); } diff --git a/tonbo_macros/src/record.rs b/tonbo_macros/src/record.rs index 8b0e0249..5f562df7 100644 --- a/tonbo_macros/src/record.rs +++ b/tonbo_macros/src/record.rs @@ -310,7 +310,7 @@ fn trait_decode_codegen(struct_name: &Ident, fields: &[RecordStructFieldOpt]) -> async fn decode(reader: &mut R) -> Result where - R: ::tokio::io::AsyncRead + Unpin, + R: ::fusio::Read + Unpin, { #(#decode_method_fields)* @@ -498,7 +498,7 @@ fn trait_encode_codegen(struct_name: &Ident, fields: &[RecordStructFieldOpt]) -> async fn encode(&self, writer: &mut W) -> Result<(), Self::Error> where - W: ::tokio::io::AsyncWrite + Unpin + Send, + W: ::fusio::Write + Unpin + Send, { #(#encode_method_fields)* @@ -691,15 +691,21 @@ fn struct_builder_codegen( self.#field_name.append_null(); }); } else { - builder_push_some_fields.push(quote! { - self.#field_name.append_value(row.#field_name.unwrap()); - }); - builder_push_none_fields.push(if is_string { - quote!(self.#field_name.append_value("");) + let append_default = if is_string { + quote!(self.#field_name.append_value("")) } else if is_bytes { - quote!(self.#field_name.append_value(&[]);) + quote!(self.#field_name.append_value(&[])) } else { - quote!(self.#field_name.append_value(Default::default());) + quote!(self.#field_name.append_value(Default::default())) + }; + builder_push_some_fields.push(quote! { + match row.#field_name { + Some(#field_name) => self.#field_name.append_value(#field_name), + None => #append_default, + } + }); + builder_push_none_fields.push(quote! { + #append_default; }); } }