From 7921bfa5022ba134df206b6ba94580c1818caed6 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 23 Mar 2023 10:15:24 +0800 Subject: [PATCH 1/6] Status get_msg bugfix --- src/db/version_edit_test.rs | 17 +++++++++++++---- src/util/status.rs | 7 +++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 2c2efec..54f7836 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -26,13 +26,22 @@ mod test { } #[test] - fn test_version_edit_decode_from() { + fn test_version_edit_decode_from_default() { let source = Slice::from("a"); let mut version_edit = VersionEdit::new(); let status = version_edit.decode_from(&source); - println!("status: {}.", status.get_error()); - // todo - // assert_eq!(target.len(), 2); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + + #[test] + fn test_version_edit_decode_from() { + let source = Slice::from("a"); + + let mut version_edit = VersionEdit::new_with_log_number(6); + let status = version_edit.decode_from(&source); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); } } \ No newline at end of file diff --git a/src/util/status.rs b/src/util/status.rs index eedb41f..97b33d6 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -103,10 +103,13 @@ impl Status { self.err.is_invalid_argument() } - pub fn get_error_string(&self) -> String { - self.err.to_string() + pub fn get_msg(&self) -> String { + let msg = &self.msg; + + String::from(msg.as_str()) } + /// 得到 LevelError /// 请注意, err 的所有权会发生转移!!! pub fn get_error(self) -> LevelError { self.err -- Gitee From 772d41240d2948113aef0d4ded3232e8d1373f3e Mon Sep 17 00:00:00 2001 From: fengyang Date: Wed, 29 Mar 2023 18:23:59 +0800 Subject: [PATCH 2/6] Status get_msg bugfix --- src/db/version_edit.rs | 44 ++++++++++++++++++++++++++----------- src/db/version_edit_test.rs | 7 ++++++ 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index c779677..792d00a 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -276,6 +276,8 @@ impl VersionEdit { let version_edit = VersionEdit::new(); let msg : Option = Option::None; + + // todo Coding::get_varint32 存在问题。开发暂停 while msg.is_none() && Coding::get_varint32(source) != 0_u32 { let tag_value = Coding::get_varint32(source); let tag = Tag::from_value(tag_value); @@ -284,31 +286,47 @@ impl VersionEdit { return LevelError::corruption_string("VersionEdit", "unknown tag"); } - // match tag { - // Tag::k_comparator => 1, - // Tag::kLogNumber => 2, - // Tag::kNextFileNumber => 3, - // Tag::kLastSequence => 4, - // Tag::kCompactPointer => 5, - // Tag::kDeletedFile => 6, - // Tag::kNewFile => 7, - // Tag::kPrevLogNumber => 9, - // _ => 0 - // }; } todo!() } /// VersionEdit 输出调试信息 pub fn debug_string(&self) -> Slice { - todo!() + let debug_str = String::from("VersionEdit {"); + + let mut has_comparator_str = String::default(); + if(self.has_comparator_){ + has_comparator_str.push_str(format!("\n Comparator: {}", self.comparator_.as_str()).as_str()); + } + + let mut has_log_number__str = String::default(); + // if(self.has_log_number_){ + // todo + // // let append_log_number = logging.AppendNumberTo(&r, self.log_number_); + // let append_log_number = self.log_number_ + "".as_ref(); + // has_log_number__str.push_str(format!("\n LogNumber: {}", append_log_number).as_str()); + // } + + let rs = format!("{}{}{}", debug_str, has_log_number__str, "\n}\n"); + + Slice::from(rs) } } /// 静态方法 impl<'a> VersionEdit { - pub fn get_internal_key(inout: Slice) -> Result { + pub fn get_internal_key(input: Slice) -> Result { + let key= InternalKey::default(); + todo!() + + // Slice str; + // if (GetLengthPrefixedSlice(input, &str)) { + // dst->DecodeFrom(str); + // return true; + // } else { + // return false; + // } } /// 从 Slice 中解出 level 值 diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 54f7836..b5c06d8 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -44,4 +44,11 @@ mod test { assert!(&status.is_corruption()); assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); } + + #[test] + fn test_version_edit_debug_string() { + let mut version_edit = VersionEdit::new_with_log_number(6); + let debug_str = version_edit.debug_string(); + println!("debug_str: \n {}", debug_str); + } } \ No newline at end of file -- Gitee From fbd7bad138c31d0253a8adb50eec9d2dbe3eff27 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 13:03:02 +0800 Subject: [PATCH 3/6] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 +- src/db/version_set.rs | 4 +- src/table/filter_block.rs | 181 ++++++++++++++++++++++++++++-- src/table/filter_block_test.rs | 50 +++++++++ src/table/mod.rs | 1 + src/traits/filter_policy_trait.rs | 2 + src/util/filter_policy.rs | 2 +- src/util/options.rs | 8 +- 8 files changed, 234 insertions(+), 18 deletions(-) create mode 100644 src/table/filter_block_test.rs diff --git a/README.md b/README.md index 97f5fa6..52b2659 100644 --- a/README.md +++ b/README.md @@ -76,8 +76,8 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | | util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | -| FilterBlock, FilterBlockReader | colagy | 0% | +| table.Block, BlockBuilder, FilterBlockBuilder | colagy,fengyang | 20% | +| FilterBlock, FilterBlockReader | colagy,fengyang | 80% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 100% | diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 831183d..0a58d3a 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -93,7 +93,9 @@ pub struct Compaction { // size_t level_ptrs_[config::kNumLevels]; } -// .h line 68 - 71 +/// Lookup the value for key. If found, store it in *val and +/// return OK. Else return a non-OK status. Fills *stats. +/// REQUIRES: lock is not held struct GetStats { seek_file: Rc, seek_file_level: i32 diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 524c324..b6fb16c 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,10 +1,35 @@ +use std::sync::Arc; +use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::slice::Slice; use crate::util::Result; -pub struct FilterBlockBuilder {} +/// +/// meta block 构建器 +/// +pub trait FilterBlock { + + /// + /// 构造一个 FilterBlockBuilder + /// + /// # Arguments + /// + /// * `policy`: + /// + /// returns: Self + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use level_db_rust::util::filter_policy::BloomFilterPolicy; + /// + /// let policy = Arc::new(BloomFilterPolicy::new(2)); + /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + /// ``` + #[inline] + fn new_with_policy(policy: Arc) -> Self; -impl FilterBlockBuilder { /// 设置block的起始位置 /// /// # Arguments @@ -18,9 +43,8 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.start_block(1024_u64); /// ``` - pub fn start_block(&mut self, _block_offset: u64) { - todo!() - } + #[inline] + fn start_block(&mut self, block_offset: u64); /// 添加key到builder /// @@ -35,9 +59,8 @@ impl FilterBlockBuilder { /// ``` /// /// ``` - pub fn add_key(&mut self, _key: &Slice) { - todo!() - } + fn add_key(&mut self, key: &Slice); + /// 构造filterBlock /// /// # Examples @@ -45,15 +68,149 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.finish(); /// ``` - pub fn finish(&mut self) -> Result { + fn finish(&mut self) -> Result; + + fn get_policy(&self) -> Box<&FP>; + + fn get_key(&self) -> &str; + + fn get_start(&self) -> Vec; + + fn get_result(&self) -> &str; + + fn get_tmp_keys(&self) -> Vec; + + fn get_tmp_filter_offsets(&self) -> Vec; +} + +/// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 +pub struct FilterBlockBuilder { + policy: Arc, + // Flattened key contents + key: String, + // Starting index in keys_ of each key + start: Vec, + // Filter data computed so far + result: String, + // policy_->CreateFilter() argument + tmp_keys: Vec, + filter_offsets: Vec, +} + +pub struct FilterBlockReader { + policy: Arc, + // Pointer to filter data (at block-start) + data: String, + // Pointer to beginning of offset array (at block-end) + offset: String, + // Number of entries in offset array + num: usize, + // Encoding parameter (see kFilterBaseLg in .cc file) + base_lg: usize +} + +impl FilterBlock for FilterBlockBuilder { + fn new_with_policy(policy: Arc) -> Self { + let key = String::new(); + let start:Vec = vec![]; + let result = String::new(); + let tmp_keys:Vec = vec![]; + let filter_offsets:Vec = vec![]; + + Self { + policy, + key, + start, + result, + tmp_keys, + filter_offsets + } + } + + fn start_block(&mut self, block_offset: u64) { + self.generate_filter(); + + todo!() + } + + fn add_key(&mut self, key: &Slice) { + todo!() + } + + fn finish(&mut self) -> Result { + self.generate_filter(); + + todo!() + } + + fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + fn get_key(&self) -> &str { + self.key.as_str() + } + + fn get_start(&self) -> Vec { + self.start.to_vec() + } + + fn get_result(&self) -> &str { + self.result.as_str() + } + + fn get_tmp_keys(&self) -> Vec { + self.tmp_keys.to_vec() + } + + fn get_tmp_filter_offsets(&self) -> Vec { + self.filter_offsets.to_vec() + } +} + +impl FilterBlockBuilder { + fn generate_filter(&mut self) { todo!() } } -pub struct FilterBlockReader {} +impl FilterBlockReader { + pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { + let data = String::new(); + let offset = String::new(); -impl FilterBlockReader { - pub fn key_may_match(&self, _block_offset: u64, _key: &Slice) -> bool { + let contents_len = contents.len(); + + // 1 byte for base_lg_ and 4 for start of offset array + if contents_len < 5 { + return Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + }; + + Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + } + + pub fn key_may_match(&self, block_offset: u64, key: &Slice) -> bool { todo!() } + + pub fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + // data, + // offset, + // num: 0, + // base_lg: 0 } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs new file mode 100644 index 0000000..74e87ed --- /dev/null +++ b/src/table/filter_block_test.rs @@ -0,0 +1,50 @@ + +mod test { + use std::sync::Arc; + use crate::table::filter_block; + use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::filter_policy::BloomFilterPolicy; + use crate::util::slice::Slice; + + #[test] + fn test_filter_block_new_with_policy() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + + let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + + let fp = filter_block.get_policy(); + let filter_policy_name = fp.name(); + assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); + assert_eq!(filter_block.get_key(), ""); + assert_eq!(filter_block.get_result(), ""); + assert_eq!(filter_block.get_start().len(), 0); + assert_eq!(filter_block.get_tmp_keys().len(), 0); + assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); + } + + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + } + + // #[test] + // fn test_filter_block_reader_new_with_policy_with_content() { + // let policy = Arc::new(BloomFilterPolicy::new(2)); + // let contents = Slice::default(""); + // + // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + // + // let fp_reader = filter_block_reader.get_policy(); + // let _reader_filter_policy_name = fp_reader.name(); + // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + // // assert_eq!(filter_block_reader.get_key(), ""); + // } +} \ No newline at end of file diff --git a/src/table/mod.rs b/src/table/mod.rs index ade478f..f928426 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,6 +1,7 @@ pub mod block; pub mod block_builder; pub mod filter_block; +mod filter_block_test; pub mod format; mod format_test; pub(crate) mod ss_table; diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index f3e4ad0..6aa0f06 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -23,6 +23,8 @@ pub trait FilterPolicy { /// # Examples /// /// ``` + /// use crate::util::slice::Slice; + /// /// let mut keys : Vec = Vec::new(); /// keys.push(Slice::try_from(String::from("hello")).unwrap()); /// keys.push(Slice::try_from(String::from("world")).unwrap()); diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index f493364..83127d2 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -79,7 +79,7 @@ impl FromPolicy for BloomFilterPolicy { impl FilterPolicy for BloomFilterPolicy { fn name(&self) -> String { - String::from("leveldb.BuiltinBloomFilter2") + String::from("leveldb.BuiltinBloomFilter") } fn create_filter(&self, keys: Vec) -> Slice { diff --git a/src/util/options.rs b/src/util/options.rs index 9028980..fffba75 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,6 +1,8 @@ +use std::sync::Arc; use crate::db::db::Snapshot; use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; +use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; pub enum CompressionType { @@ -13,7 +15,9 @@ pub struct Env {} pub struct Cache {} -pub struct FilterPolicy {} +// 使用如下定义(后续路径会重构) +// use crate::traits::filter_policy_trait::FilterPolicy; +// pub struct FilterPolicy {} pub struct Options { @@ -96,7 +100,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option, + pub filter_policy: Option>, } /// Options that control read operations pub struct ReadOptions { -- Gitee From b9b272a550057cab7721c570073fe035617278a3 Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 17:50:15 +0800 Subject: [PATCH 4/6] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/filter_block.rs | 89 ++++++++++++++++++----- src/table/filter_block_test.rs | 127 ++++++++++++++++++++++++++++++++- src/util/mod.rs | 4 +- 3 files changed, 198 insertions(+), 22 deletions(-) diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index b6fb16c..38e1d16 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,9 +1,15 @@ +use std::io::Write; use std::sync::Arc; use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; +// Generate new filter every 2KB of data +const FILTER_BASE_LG: usize = 11; +const FILTER_BASE: usize = 1 << FILTER_BASE_LG; + /// /// meta block 构建器 /// @@ -46,6 +52,8 @@ pub trait FilterBlock { #[inline] fn start_block(&mut self, block_offset: u64); + fn add_key_from_str(&mut self, key: &str); + /// 添加key到builder /// /// # Arguments @@ -72,7 +80,7 @@ pub trait FilterBlock { fn get_policy(&self) -> Box<&FP>; - fn get_key(&self) -> &str; + fn get_keys(&self) -> &str; fn get_start(&self) -> Vec; @@ -87,7 +95,7 @@ pub trait FilterBlock { pub struct FilterBlockBuilder { policy: Arc, // Flattened key contents - key: String, + keys: String, // Starting index in keys_ of each key start: Vec, // Filter data computed so far @@ -100,9 +108,9 @@ pub struct FilterBlockBuilder { pub struct FilterBlockReader { policy: Arc, // Pointer to filter data (at block-start) - data: String, + data: Vec, // Pointer to beginning of offset array (at block-end) - offset: String, + offset: Vec, // Number of entries in offset array num: usize, // Encoding parameter (see kFilterBaseLg in .cc file) @@ -111,7 +119,7 @@ pub struct FilterBlockReader { impl FilterBlock for FilterBlockBuilder { fn new_with_policy(policy: Arc) -> Self { - let key = String::new(); + let keys = String::new(); let start:Vec = vec![]; let result = String::new(); let tmp_keys:Vec = vec![]; @@ -119,7 +127,7 @@ impl FilterBlock for FilterBlockBuilder { Self { policy, - key, + keys, start, result, tmp_keys, @@ -128,9 +136,16 @@ impl FilterBlock for FilterBlockBuilder { } fn start_block(&mut self, block_offset: u64) { - self.generate_filter(); + let filter_index = block_offset / (FILTER_BASE as u64); + assert!(filter_index >= self.filter_offsets.len() as u64); - todo!() + while filter_index > self.filter_offsets.len() as u64 { + self.generate_filter(); + } + } + + fn add_key_from_str(&mut self, key: &str) { + self.add_key(&Slice::from(key)) } fn add_key(&mut self, key: &Slice) { @@ -147,8 +162,8 @@ impl FilterBlock for FilterBlockBuilder { Box::new(self.policy.as_ref()) } - fn get_key(&self) -> &str { - self.key.as_str() + fn get_keys(&self) -> &str { + self.keys.as_str() } fn get_start(&self) -> Vec { @@ -170,14 +185,38 @@ impl FilterBlock for FilterBlockBuilder { impl FilterBlockBuilder { fn generate_filter(&mut self) { - todo!() + let num_keys = self.start.len(); + + if num_keys == 0 { + // Fast path if there are no keys for this filter + self.filter_offsets.push(self.result.len() as u32); + return; + } + + /* Make list of keys from flattened key structure */ + // Simplify length computation + self.start.push(self.keys.len()); + // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。如果 new_len 小于 len ,则 Vec 将被截断。 + self.tmp_keys.resize(num_keys, Slice::default()); + + for i in 0..num_keys { + let base = &self.keys.as_bytes()[self.start[i]..]; + let length = self.start[i+1] - self.start[i]; + + let mut tmp_key = Vec::with_capacity(length); + tmp_key.write(&base); + self.tmp_keys[i] = Slice::from_vec(tmp_key); + } + + // Generate filter for current set of keys and append to result_. + } } impl FilterBlockReader { pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { - let data = String::new(); - let offset = String::new(); + let data = Vec::new(); + let offset = Vec::new(); let contents_len = contents.len(); @@ -192,6 +231,11 @@ impl FilterBlockReader { } }; + // let buf = contents.as_ref()[contents_len-5..]; + + // let base_lg_ = contentsVe[contents_len-1]; + + // let last_word: u32 = Coding::decode_fixed32(buf)); Self { policy, data, @@ -209,8 +253,19 @@ impl FilterBlockReader { Box::new(self.policy.as_ref()) } - // data, - // offset, - // num: 0, - // base_lg: 0 + pub fn get_data(&self) -> Vec { + self.data.to_vec() + } + + pub fn get_offset(&self) -> Vec { + self.offset.to_vec() + } + + pub fn get_num(&self) -> usize { + self.num + } + + pub fn get_base_lg(&self) -> usize { + self.base_lg + } } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 74e87ed..4deb8b8 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -1,11 +1,107 @@ mod test { + use std::borrow::BorrowMut; use std::sync::Arc; use crate::table::filter_block; use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::coding_trait::CodingTrait; use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::coding::Coding; use crate::util::filter_policy::BloomFilterPolicy; use crate::util::slice::Slice; + use crate::util::hash::{Hash, ToHash}; + + use crate::util::Result; + + pub struct TestHashFilter { + //. + } + + impl TestHashFilter { + fn new() -> Self { + Self { + + } + } + } + + impl FilterPolicy for TestHashFilter { + fn name(&self) -> String { + String::from("TestHashFilter") + } + + fn create_filter(&self, keys: Vec) -> Slice { + let mut n: usize = 0; + for i in 0..keys.len() { + n += keys[i].len(); + } + + let mut dst_chars = vec![0; n]; + let dst_chars_u8 = dst_chars.borrow_mut(); + + let mut offset: usize = 0; + for i in 0..keys.len() { + let h = Hash::hash_code(keys[i].as_ref(), 1); + let of = Coding::put_fixed32(dst_chars_u8, offset, h); + offset += of; + } + + Slice::from_buf(dst_chars_u8) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let h = Hash::hash_code(key.to_vec().as_slice(), 1); + + let mut pos = 0; + while pos <= bloom_filter.size() { + let buf = &bloom_filter.as_ref()[pos..]; + + if h == Coding::decode_fixed32(buf) { + return true + } + + pos += 4; + } + + false + } + } + + #[test] + fn test_create_filter() { + let policy = TestHashFilter::new(); + + let mut keys : Vec = Vec::new(); + keys.push(Slice::try_from(String::from("hello")).unwrap()); + keys.push(Slice::try_from(String::from("world")).unwrap()); + + let bloom_filter: Slice = policy.create_filter(keys); + + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + } #[test] fn test_filter_block_new_with_policy() { @@ -16,13 +112,31 @@ mod test { let fp = filter_block.get_policy(); let filter_policy_name = fp.name(); assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block.get_key(), ""); + assert_eq!(filter_block.get_keys(), ""); assert_eq!(filter_block.get_result(), ""); assert_eq!(filter_block.get_start().len(), 0); assert_eq!(filter_block.get_tmp_keys().len(), 0); assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); } + #[test] + fn test_filter_block_new_with_policy_and_addkey() { + let policy = Arc::new(BloomFilterPolicy::new(2)); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + + filter_block_builder.start_block(100); + filter_block_builder.add_key_from_str("foo"); + filter_block_builder.add_key_from_str("bar"); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(200); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(300); + filter_block_builder.add_key_from_str("hello"); + + let sliceRs: Result = filter_block_builder.finish(); + + } + #[test] fn test_filter_block_reader_new_with_policy_empty_content() { let policy = Arc::new(BloomFilterPolicy::new(2)); @@ -33,18 +147,25 @@ mod test { let fp_reader = filter_block_reader.get_policy(); let _reader_filter_policy_name = fp_reader.name(); assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); } // #[test] // fn test_filter_block_reader_new_with_policy_with_content() { // let policy = Arc::new(BloomFilterPolicy::new(2)); - // let contents = Slice::default(""); + // let contents = Slice::from("\000"); // // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); // // let fp_reader = filter_block_reader.get_policy(); // let _reader_filter_policy_name = fp_reader.name(); // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); - // // assert_eq!(filter_block_reader.get_key(), ""); + // assert_eq!(filter_block_reader.get_data().len(), 0); + // assert_eq!(filter_block_reader.get_offset().len(), 0); + // assert_eq!(filter_block_reader.get_num(), 0); + // assert_eq!(filter_block_reader.get_base_lg(), 0); // } } \ No newline at end of file diff --git a/src/util/mod.rs b/src/util/mod.rs index ade5ddc..b24bec5 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -30,9 +30,9 @@ mod filter_policy_test; pub mod histogram; mod histogram_test; -mod hash; +pub mod hash; mod hash_test; -mod mutex_lock; +pub mod mutex_lock; mod mutex_lock_test; pub mod random; mod random_test; -- Gitee From f4900f46d9e0fb2a4195deccba8efc445f9438db Mon Sep 17 00:00:00 2001 From: fengyang Date: Thu, 30 Mar 2023 21:09:18 +0800 Subject: [PATCH 5/6] =?UTF-8?q?FilterBlock=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/table/filter_block.rs | 75 +++++++++++++---- src/table/filter_block_test.rs | 129 ++++++++++++++++-------------- src/traits/filter_policy_trait.rs | 4 +- src/util/filter_policy.rs | 16 +++- src/util/filter_policy_test.rs | 9 ++- 5 files changed, 148 insertions(+), 85 deletions(-) diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 38e1d16..183c678 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,5 +1,6 @@ use std::io::Write; use std::sync::Arc; +use crate::traits::coding_trait::CodingTrait; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::coding::Coding; use crate::util::slice::Slice; @@ -21,6 +22,7 @@ pub trait FilterBlock { /// # Arguments /// /// * `policy`: + /// * `capacity`: 初始化容量 /// /// returns: Self /// @@ -34,7 +36,7 @@ pub trait FilterBlock { /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); /// ``` #[inline] - fn new_with_policy(policy: Arc) -> Self; + fn new_with_policy(policy: Arc, capacity: usize) -> Self; /// 设置block的起始位置 /// @@ -80,11 +82,11 @@ pub trait FilterBlock { fn get_policy(&self) -> Box<&FP>; - fn get_keys(&self) -> &str; + fn get_keys(&self) -> Vec; fn get_start(&self) -> Vec; - fn get_result(&self) -> &str; + fn get_result(&self) -> Vec; fn get_tmp_keys(&self) -> Vec; @@ -95,11 +97,11 @@ pub trait FilterBlock { pub struct FilterBlockBuilder { policy: Arc, // Flattened key contents - keys: String, + keys: Vec, // Starting index in keys_ of each key start: Vec, // Filter data computed so far - result: String, + result: Vec, // policy_->CreateFilter() argument tmp_keys: Vec, filter_offsets: Vec, @@ -118,10 +120,10 @@ pub struct FilterBlockReader { } impl FilterBlock for FilterBlockBuilder { - fn new_with_policy(policy: Arc) -> Self { - let keys = String::new(); - let start:Vec = vec![]; - let result = String::new(); + fn new_with_policy(policy: Arc, capacity: usize) -> Self { + let keys:Vec = Vec::with_capacity(capacity); + let start:Vec = Vec::with_capacity(capacity); + let result:Vec = Vec::with_capacity(capacity); let tmp_keys:Vec = vec![]; let filter_offsets:Vec = vec![]; @@ -149,29 +151,54 @@ impl FilterBlock for FilterBlockBuilder { } fn add_key(&mut self, key: &Slice) { - todo!() + self.start.push(key.len()); + self.keys.write(key.as_str().as_bytes()).expect("add_key error!"); } fn finish(&mut self) -> Result { - self.generate_filter(); + if self.start.len() != 0 { + self.generate_filter(); + } - todo!() + // Append array of per-filter offsets + let array_offset = self.result.len() as u32; + // 当前需要写入的位置。result 中可能存在数据,因此为 self.result.len() 的位置 + let mut pos: usize = self.result.len(); + + // todo 判断是否需要扩容 + let result_total_capacity = self.result.capacity(); + + let dst_append = self.result.as_mut_slice(); + + for i in 0..self.filter_offsets.len() { + // 判断当前 pos + len 4 + let filter_offset_val = self.filter_offsets[i]; + pos = Coding::put_fixed32(dst_append, pos, filter_offset_val); + } + + pos = Coding::put_fixed32(dst_append, pos, array_offset); + + // Save encoding parameter in result + // todo 判断是否需要扩容 + Coding::put_varint64(self.result.as_mut_slice(), pos, FILTER_BASE_LG as u64); + + Ok(Slice::from_buf(&self.result)) } fn get_policy(&self) -> Box<&FP> { Box::new(self.policy.as_ref()) } - fn get_keys(&self) -> &str { - self.keys.as_str() + fn get_keys(&self) -> Vec { + self.keys.to_vec() } fn get_start(&self) -> Vec { self.start.to_vec() } - fn get_result(&self) -> &str { - self.result.as_str() + fn get_result(&self) -> Vec { + self.result.to_vec() } fn get_tmp_keys(&self) -> Vec { @@ -200,7 +227,7 @@ impl FilterBlockBuilder { self.tmp_keys.resize(num_keys, Slice::default()); for i in 0..num_keys { - let base = &self.keys.as_bytes()[self.start[i]..]; + let base = &self.keys[self.start[i]..]; let length = self.start[i+1] - self.start[i]; let mut tmp_key = Vec::with_capacity(length); @@ -209,7 +236,21 @@ impl FilterBlockBuilder { } // Generate filter for current set of keys and append to result_. + self.filter_offsets.push(self.result.len() as u32); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&self.tmp_keys[0]); + let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.result.write(create_filter.as_ref()); + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.tmp_keys.clear(); + self.keys.clear(); + self.start.clear(); } } diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs index 4deb8b8..0e57a1a 100644 --- a/src/table/filter_block_test.rs +++ b/src/table/filter_block_test.rs @@ -30,12 +30,18 @@ mod test { String::from("TestHashFilter") } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { let mut n: usize = 0; for i in 0..keys.len() { n += keys[i].len(); } + self.create_filter_with_len(n, keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let mut n: usize = len; + let mut dst_chars = vec![0; n]; let dst_chars_u8 = dst_chars.borrow_mut(); @@ -67,62 +73,81 @@ mod test { } } - #[test] - fn test_create_filter() { - let policy = TestHashFilter::new(); - - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); - - let bloom_filter: Slice = policy.create_filter(keys); - - let mut key_may_match = policy.key_may_match( - &Slice::try_from(String::from("hello")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), - &bloom_filter); - assert!(key_may_match); - - let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - - key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), - &bloom_filter); - assert!(!key_not_match); - } + // #[test] + // fn test_create_filter() { + // let policy = TestHashFilter::new(); + // + // let s1 = Slice::try_from(String::from("hello")).unwrap(); + // let s2 = Slice::try_from(String::from("world")).unwrap(); + // let mut keys : Vec<&Slice> = Vec::new(); + // keys.push(&s1); + // keys.push(&s2); + // + // let bloom_filter: Slice = policy.create_filter(keys); + // + // let mut key_may_match = policy.key_may_match( + // &Slice::try_from(String::from("hello")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // } #[test] fn test_filter_block_new_with_policy() { - let policy = Arc::new(BloomFilterPolicy::new(2)); + let policy = Arc::new(TestHashFilter::new()); - let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy, 10); let fp = filter_block.get_policy(); let filter_policy_name = fp.name(); - assert_eq!(filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block.get_keys(), ""); - assert_eq!(filter_block.get_result(), ""); + assert_eq!(filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block.get_keys().len(), 0); + assert_eq!(filter_block.get_result().len(), 0); assert_eq!(filter_block.get_start().len(), 0); assert_eq!(filter_block.get_tmp_keys().len(), 0); assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); } + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy = Arc::new(TestHashFilter::new()); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); + } + #[test] fn test_filter_block_new_with_policy_and_addkey() { - let policy = Arc::new(BloomFilterPolicy::new(2)); - let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + let policy = Arc::new(TestHashFilter::new()); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy( + policy, 10); filter_block_builder.start_block(100); filter_block_builder.add_key_from_str("foo"); @@ -134,23 +159,7 @@ mod test { filter_block_builder.add_key_from_str("hello"); let sliceRs: Result = filter_block_builder.finish(); - - } - - #[test] - fn test_filter_block_reader_new_with_policy_empty_content() { - let policy = Arc::new(BloomFilterPolicy::new(2)); - let contents = Slice::default(); - - let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); - - let fp_reader = filter_block_reader.get_policy(); - let _reader_filter_policy_name = fp_reader.name(); - assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); - assert_eq!(filter_block_reader.get_data().len(), 0); - assert_eq!(filter_block_reader.get_offset().len(), 0); - assert_eq!(filter_block_reader.get_num(), 0); - assert_eq!(filter_block_reader.get_base_lg(), 0); + assert_eq!("a", "leveldb.BuiltinBloomFilter"); } // #[test] diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 6aa0f06..3920604 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -32,7 +32,9 @@ pub trait FilterPolicy { /// let policy = BloomFilterPolicy::new(800); /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec) -> Slice; + fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice; /// /// diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 83127d2..726e541 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -82,8 +82,12 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter") } - fn create_filter(&self, keys: Vec) -> Slice { - let n: usize = keys.len(); + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let n: usize = len; let mut bits: usize = n * self.bits_per_key; @@ -100,7 +104,7 @@ impl FilterPolicy for BloomFilterPolicy { dst_chars[bytes] = self.k as u8; for i in 0..n { - let slice = keys.get(i).unwrap(); + let slice = keys[i]; let mut h : u32 = slice.bloom_hash(); let delta : u32 = (h >> 17) | (h << 15); @@ -173,7 +177,11 @@ impl FilterPolicy for InternalFilterPolicy { todo!() } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 dst中。 diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e84ee12..bea6d2a 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -33,9 +33,12 @@ fn test_new() { fn test_create_filter() { let policy = BloomFilterPolicy::new(800); - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); let bloom_filter: Slice = policy.create_filter(keys); -- Gitee From db74b32f56222640d8eb9ccb55dbf5eb7e443013 Mon Sep 17 00:00:00 2001 From: fengyang Date: Fri, 31 Mar 2023 18:54:34 +0800 Subject: [PATCH 6/6] =?UTF-8?q?SST=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 16 ++++++-- src/db/builder.rs | 72 +++++++++++++++++++++++++++++++++ src/db/builder_test.rs | 0 src/db/db_format.rs | 33 +++++++-------- src/db/file_meta_data.rs | 4 ++ src/db/filename.rs | 11 +++++ src/db/filename_test.rs | 0 src/db/mod.rs | 4 ++ src/db/skip_list.rs | 4 ++ src/db/version_set.rs | 3 +- src/table/block_builder.rs | 5 +++ src/table/mod.rs | 6 ++- src/table/table_builder.rs | 59 +++++++++++++++++++++++++++ src/table/table_builder_test.rs | 0 src/traits/iterator.rs | 9 +++++ src/util/env.rs | 13 ++++++ src/util/env_test.rs | 0 src/util/mod.rs | 2 + src/util/options.rs | 4 +- 19 files changed, 220 insertions(+), 25 deletions(-) create mode 100644 src/db/builder.rs create mode 100644 src/db/builder_test.rs create mode 100644 src/db/filename.rs create mode 100644 src/db/filename_test.rs create mode 100644 src/table/table_builder.rs create mode 100644 src/table/table_builder_test.rs create mode 100644 src/util/env.rs create mode 100644 src/util/env_test.rs diff --git a/README.md b/README.md index 52b2659..b76735c 100644 --- a/README.md +++ b/README.md @@ -76,14 +76,21 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | | util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy,fengyang | 20% | -| FilterBlock, FilterBlockReader | colagy,fengyang | 80% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 100% | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | +| table.Iterator(DBIter、EmptyIterator) | kazeseiriou | 0% | +| table.Iterator(merger.MergingIterator) | kazeseiriou | 0% | +| table.Iterator(TwoLevelIterator) | kazeseiriou | 0% | +| table.Iterator(tabletest.KeyConvertingIterator) | kazeseiriou | 0% | +| table.Iterator(dbtest.ModelIter) | kazeseiriou | 0% | +| table.Iterator(Block::Iter) | fengyang | 0% | | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| db.Builder | fengyang | 20% | +| table.Block | fengyang | 80% | +| table.BlockBuilder, table.FilterBlockBuilder | | | +| FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | | table.Table | peach,tzcyujunyong | | | db.leveldb_util | wangboo | 0% | @@ -95,6 +102,9 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | +| table.table_builder | | | +| filename | | | +| env | | | | | 半支烟 | 40% | diff --git a/src/db/builder.rs b/src/db/builder.rs new file mode 100644 index 0000000..2793bc0 --- /dev/null +++ b/src/db/builder.rs @@ -0,0 +1,72 @@ +use std::ops::Deref; +use std::sync::Arc; +use crate::db::file_meta_data::FileMetaData; +use crate::db::filename::FileName; +use crate::db::table_cache::TableCache; +use crate::table::table_builder::TableBuilder; +use crate::traits::DataIterator; +use crate::util::env::{Env, WritableFile}; +use crate::util::options::{Options}; +use crate::util::Result; +use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; + +pub struct BuildTable {} + +impl BuildTable { + + /// + /// 生成 SSTable + /// + /// Build a Table file from the contents of *iter. + /// The generated file will be named according to meta->number. + /// On success, the rest of meta will be filled with metadata about the generated table. + /// If no data is present in *iter, meta->file_size will be set to zero, and no Table file will be produced. + /// + /// # Arguments + /// + /// * `dbname`: + /// * `env`: + /// * `options`: + /// * `table_cache`: + /// * `iter`: + /// * `meta`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn build_table(dbname: &Slice, env: &Env, options: &Options, + table_cache: &TableCache, mut iter: Box, + meta: &mut FileMetaData) -> Result { + meta.set_file_size(0); + iter.seek_to_first(); + + let file_name = FileName::table_file_name(dbname, meta.get_number()); + + if iter.valid() { + let fileRS: Result = env.new_writable_file(&file_name); + if(!fileRS.is_ok()){ + return Err(fileRS.err().unwrap()); + } + + let writableFile = Arc::new(fileRS.unwrap()); + let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); + + meta.get_smallest().decode_from(&iter.key()); + while iter.valid() && iter.has_next(){ + iter.next(); + + let key = iter.key(); + meta.get_largest().decode_from(&key); + // builder.add(key, iter.value()); + } + } + + + Err(Status::wrapper_str(LevelError::KBadRecord, "a")) + } +} \ No newline at end of file diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/db/db_format.rs b/src/db/db_format.rs index bbe73b1..f318f2c 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -7,6 +7,7 @@ use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::util::coding::Coding; use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; pub enum ValueType { /// 0x0 @@ -93,21 +94,21 @@ impl Default for ParsedInternalKey { impl ParsedInternalKey { - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { Slice::default() } /// Return the length of the encoding of "key". - fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { + pub fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { key.user_key.size() + 8 } // 将 self 的数据追加到 result 中 - fn append_internal_key(&self, result: Slice) { + pub fn append_internal_key(&self, result: Slice) { todo!() } - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { Self { user_key, sequence, @@ -118,13 +119,13 @@ impl ParsedInternalKey { /// Attempt to parse an internal key from "internal_key". On success, /// stores the parsed data in "*result", and returns true. /// On error, returns false, leaves "*result" in an undefined state. - fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + pub fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { // line 173 todo!() } /// Returns the user key portion of an internal key. - fn extract_user_key(internal_key : Slice) -> Slice { + pub fn extract_user_key(internal_key : Slice) -> Slice { todo!() } } @@ -147,7 +148,7 @@ impl PartialEq for InternalKey { } impl InternalKey { - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { // line 145 let result: Slice = Slice::default(); ParsedInternalKey::new(user_key, sequence, value_type) @@ -171,7 +172,7 @@ impl InternalKey { /// ``` /// /// ``` - fn decode_from(&self, input: Slice) { + pub fn decode_from(&self, input: &UnsafeSlice) { todo!() // wangbo @@ -179,7 +180,7 @@ impl InternalKey { } /// 输出 InternalKey 调试信息 - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { // line 164 todo!() } @@ -193,16 +194,16 @@ impl InternalKey { self.rep_.size() } - fn user_key(self) -> Slice { + pub fn user_key(self) -> Slice { ParsedInternalKey::extract_user_key(self.rep_) } - fn set_from(self, p: ParsedInternalKey) { + pub fn set_from(self, p: ParsedInternalKey) { // self.rep_.clear(); p.append_internal_key(self.rep_); } - fn clear(self) { + pub fn clear(self) { // self.rep_.clear(); } } @@ -255,7 +256,7 @@ impl Comparator for InternalKeyComparator { impl LookupKey { /// Initialize *this for looking up user_key at a snapshot with /// the specified sequence number. - fn new(user_key: Slice, sequence: usize) -> Self { + pub fn new(user_key: Slice, sequence: usize) -> Self { let user_key_size = user_key.size(); let need = user_key_size + 13; // A conservative estimate let mut data = Vec::with_capacity(need); @@ -278,12 +279,12 @@ impl LookupKey { } /// Return a key suitable for lookup in a MemTable. - fn mem_table_key(&self) -> Slice { + pub fn mem_table_key(&self) -> Slice { self.data.clone() } /// Return an internal key (suitable for passing to an internal iterator) - fn internal_key(&self) -> Slice { + pub fn internal_key(&self) -> Slice { // line 204 let buf = self.data.as_ref(); let internal_key_buf = &buf[self.user_key_start..]; @@ -291,7 +292,7 @@ impl LookupKey { } /// Return the user key - fn user_key(&self) -> Slice { + pub fn user_key(&self) -> Slice { // line 207 todo!() } diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs index 7c9e3b6..b04f6d4 100644 --- a/src/db/file_meta_data.rs +++ b/src/db/file_meta_data.rs @@ -77,6 +77,10 @@ impl FileMetaData { self.file_size } + pub fn set_file_size(&mut self, file_size: u64) { + self.file_size = file_size; + } + /// Smallest internal key served by table pub fn get_smallest(&self) -> &InternalKey { &self.smallest diff --git a/src/db/filename.rs b/src/db/filename.rs new file mode 100644 index 0000000..fb1db13 --- /dev/null +++ b/src/db/filename.rs @@ -0,0 +1,11 @@ +use crate::util::slice::Slice; + +pub struct FileName { + +} + +impl FileName { + pub fn table_file_name(dbname: &Slice, number : u64) -> Slice{ + todo!() + } +} \ No newline at end of file diff --git a/src/db/filename_test.rs b/src/db/filename_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/db/mod.rs b/src/db/mod.rs index 24a37d4..0d48432 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -19,6 +19,10 @@ pub mod version_set; mod version_set_test; pub mod version_edit; mod version_edit_test; +pub mod builder; +mod builder_test; +pub mod filename; +mod filename_test; /// 默认调表 pub type DefaultSkipList = SkipList; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index a043adc..ad8b5c3 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -441,6 +441,10 @@ impl DataIterator for Iter { } } + fn has_next(&self) -> bool { + todo!() + } + fn pre(&mut self) { todo!() } diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 0a58d3a..6175203 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -6,7 +6,8 @@ use crate::db::table_cache::TableCache; use crate::db::version_edit::VersionEdit; use crate::traits::comparator_trait::Comparator; use crate::util::cache::Cache; -use crate::util::options::{Env, Options, ReadOptions}; +use crate::util::env::Env; +use crate::util::options::{Options, ReadOptions}; use crate::util::slice::Slice; use crate::util::Result; diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index a58922a..2d6467d 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,3 +1,4 @@ +use crate::util::options::Options; use crate::util::slice::Slice; use crate::util::Result; @@ -5,6 +6,10 @@ use crate::util::Result; pub struct BlockBuilder {} impl BlockBuilder { + pub fn new(options: &Options) -> Self { + todo!() + } + /// 添加数据到block /// /// # Arguments diff --git a/src/table/mod.rs b/src/table/mod.rs index f928426..cc22b85 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -4,6 +4,8 @@ pub mod filter_block; mod filter_block_test; pub mod format; mod format_test; -pub(crate) mod ss_table; +pub mod ss_table; mod ss_table_test; -mod iterator_wrapper; \ No newline at end of file +pub mod iterator_wrapper; +pub mod table_builder; +mod table_builder_test; \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs new file mode 100644 index 0000000..bd74361 --- /dev/null +++ b/src/table/table_builder.rs @@ -0,0 +1,59 @@ +use std::borrow::Borrow; +use std::sync::Arc; +use crate::table::block_builder::BlockBuilder; +use crate::table::filter_block::FilterBlockBuilder; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::env::WritableFile; +use crate::util::options::Options; +use crate::util::slice::Slice; +use crate::util::status::Status; + +pub struct TableBuilder { + rep: Rep +} + +struct Rep { + // options: Box, + // index_block_options: Options, + file: Arc, + offset: u64, + status: Status, + // data_block: BlockBuilder, + // index_block: BlockBuilder, + last_key: Slice, + num_entries: u64, + // Either Finish() or Abandon() has been called. + closed: bool, +} + +impl TableBuilder { + pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { + let rep = Rep::new(options, writableFile); + + // Self { + // rep + // } + + todo!() + } + + pub fn add(&self, key: &Slice, value: &Slice) { + todo!() + } +} + +impl Rep { + pub fn new(opt: &Options, writableFile: Arc) -> Self { + Self { + // options: Box::new(*opt), + file: writableFile, + offset: 0, + // todo default Status::OK + status: Status::default(), + // data_block: BlockBuilder::new(&opt), + last_key: Default::default(), + num_entries: 0, + closed: false, + } + } +} \ No newline at end of file diff --git a/src/table/table_builder_test.rs b/src/table/table_builder_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 164fcc5..41a331a 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,6 +1,8 @@ use crate::util::slice::Slice; use crate::util::unsafe_slice::UnsafeSlice; +/// +/// Iterator 迭代器定义 pub trait DataIterator { /// 检查当前位置是否有效 /// @@ -68,6 +70,10 @@ pub trait DataIterator { /// /// ``` fn next(&mut self); + + /// 是否存在下一个元素 + fn has_next(&self) -> bool; + /// 定位到上一个元素 /// /// # Arguments @@ -81,6 +87,7 @@ pub trait DataIterator { /// /// ``` fn pre(&mut self); + /// 获取key值 /// /// # Arguments @@ -93,7 +100,9 @@ pub trait DataIterator { /// ``` /// /// ``` + /// todo UnsafeSlice 与 Slice 应该存在一个共同traits或者struct 便于API操作 fn key(&self) -> UnsafeSlice; + /// 获取value值 /// /// # Arguments diff --git a/src/util/env.rs b/src/util/env.rs new file mode 100644 index 0000000..dbeb438 --- /dev/null +++ b/src/util/env.rs @@ -0,0 +1,13 @@ + +use crate::util::Result; +use crate::util::slice::Slice; + +pub struct Env {} + +pub struct WritableFile {} + +impl Env { + pub fn new_writable_file(&self, fname: &Slice) -> Result { + todo!() + } +} \ No newline at end of file diff --git a/src/util/env_test.rs b/src/util/env_test.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/util/mod.rs b/src/util/mod.rs index b24bec5..c2869aa 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -41,6 +41,8 @@ pub mod debug; pub mod linked_list; mod linked_list_test; pub mod unsafe_slice; +pub mod env; +mod env_test; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/options.rs b/src/util/options.rs index fffba75..4764b88 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -4,15 +4,13 @@ use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; +use crate::util::env::Env; pub enum CompressionType { NoCompression, SnappyCompression } -/// TODO temp -pub struct Env {} - pub struct Cache {} // 使用如下定义(后续路径会重构) -- Gitee