diff --git a/README.md b/README.md index 97f5fa6da73f8a89f63107963f7dd8a07545b882..b76735c8e795e34a303dc6a08c2af3b4e805435e 100644 --- a/README.md +++ b/README.md @@ -76,14 +76,21 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | util.Options(ReadOptions, WriteOptions) | kazeseiriou,wangboo | 0% | | util.ENV(WritableFile, SequentialFile, RandomAccessFile, FileLock) | lxd5866 | 0% | | util.Logger/Log日志库 | peach | 50% | -| table.Block, BlockBuilder, FilterBlockBuilder | colagy | 0% | -| FilterBlock, FilterBlockReader | colagy | 0% | | table.format(Footer, BlockHandle) | 半支烟 | 20% | | db.dbformat(InternalKeyComparator, InternalFilterPolicy, LookupKey, InternalKey) | 半支烟 | 20% | | db.SkipList | wangboo | 100% | -| table.Iterator(DBIter, MergingIterator, TwoLevelIterator...) | kazeseiriou | 0% | +| table.Iterator(DBIter、EmptyIterator) | kazeseiriou | 0% | +| table.Iterator(merger.MergingIterator) | kazeseiriou | 0% | +| table.Iterator(TwoLevelIterator) | kazeseiriou | 0% | +| table.Iterator(tabletest.KeyConvertingIterator) | kazeseiriou | 0% | +| table.Iterator(dbtest.ModelIter) | kazeseiriou | 0% | +| table.Iterator(Block::Iter) | fengyang | 0% | | IteratorWrapper | kazeseiriou | 0% | | db.MemTable(MemTable, MemTableIterator) | wangboo,tzcyujunyong | 20% | +| db.Builder | fengyang | 20% | +| table.Block | fengyang | 80% | +| table.BlockBuilder, table.FilterBlockBuilder | | | +| FilterBlock, FilterBlockReader | fengyang | 80% | | SSTable | fengyang | 0% | | table.Table | peach,tzcyujunyong | | | db.leveldb_util | wangboo | 0% | @@ -95,6 +102,9 @@ RUSTFLAGS='--cfg CORE_DEBUG="false"' cargo build --release | db.VersionEdit(Tag, VersionEdit, FileMetaData) | fengyang | 20% | | db.VersionSet(Version, LevelFileNumIterator, SaverState) | fengyang | 20% | | WriteBatch | tzcyujunyong,wangboo | 50% | +| table.table_builder | | | +| filename | | | +| env | | | | | 半支烟 | 40% | diff --git a/src/db/builder.rs b/src/db/builder.rs new file mode 100644 index 0000000000000000000000000000000000000000..2793bc0df494b9f5e91c3312d39f1ddd1111fd8d --- /dev/null +++ b/src/db/builder.rs @@ -0,0 +1,72 @@ +use std::ops::Deref; +use std::sync::Arc; +use crate::db::file_meta_data::FileMetaData; +use crate::db::filename::FileName; +use crate::db::table_cache::TableCache; +use crate::table::table_builder::TableBuilder; +use crate::traits::DataIterator; +use crate::util::env::{Env, WritableFile}; +use crate::util::options::{Options}; +use crate::util::Result; +use crate::util::slice::Slice; +use crate::util::status::{LevelError, Status}; + +pub struct BuildTable {} + +impl BuildTable { + + /// + /// 生成 SSTable + /// + /// Build a Table file from the contents of *iter. + /// The generated file will be named according to meta->number. + /// On success, the rest of meta will be filled with metadata about the generated table. + /// If no data is present in *iter, meta->file_size will be set to zero, and no Table file will be produced. + /// + /// # Arguments + /// + /// * `dbname`: + /// * `env`: + /// * `options`: + /// * `table_cache`: + /// * `iter`: + /// * `meta`: + /// + /// returns: Result + /// + /// # Examples + /// + /// ``` + /// + /// ``` + pub fn build_table(dbname: &Slice, env: &Env, options: &Options, + table_cache: &TableCache, mut iter: Box, + meta: &mut FileMetaData) -> Result { + meta.set_file_size(0); + iter.seek_to_first(); + + let file_name = FileName::table_file_name(dbname, meta.get_number()); + + if iter.valid() { + let fileRS: Result = env.new_writable_file(&file_name); + if(!fileRS.is_ok()){ + return Err(fileRS.err().unwrap()); + } + + let writableFile = Arc::new(fileRS.unwrap()); + let builder: TableBuilder = TableBuilder::new_with_writable_file(options, writableFile); + + meta.get_smallest().decode_from(&iter.key()); + while iter.valid() && iter.has_next(){ + iter.next(); + + let key = iter.key(); + meta.get_largest().decode_from(&key); + // builder.add(key, iter.value()); + } + } + + + Err(Status::wrapper_str(LevelError::KBadRecord, "a")) + } +} \ No newline at end of file diff --git a/src/db/builder_test.rs b/src/db/builder_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/db/db_format.rs b/src/db/db_format.rs index bbe73b10c47a05fb8081024c7b22a637b6236704..f318f2c4804de49d4fa8124c01c5c2bc20c49ccd 100644 --- a/src/db/db_format.rs +++ b/src/db/db_format.rs @@ -7,6 +7,7 @@ use crate::traits::coding_trait::CodingTrait; use crate::traits::comparator_trait::Comparator; use crate::util::coding::Coding; use crate::util::slice::Slice; +use crate::util::unsafe_slice::UnsafeSlice; pub enum ValueType { /// 0x0 @@ -93,21 +94,21 @@ impl Default for ParsedInternalKey { impl ParsedInternalKey { - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { Slice::default() } /// Return the length of the encoding of "key". - fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { + pub fn internal_key_encoding_length(&self, key: ParsedInternalKey) -> usize { key.user_key.size() + 8 } // 将 self 的数据追加到 result 中 - fn append_internal_key(&self, result: Slice) { + pub fn append_internal_key(&self, result: Slice) { todo!() } - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { Self { user_key, sequence, @@ -118,13 +119,13 @@ impl ParsedInternalKey { /// Attempt to parse an internal key from "internal_key". On success, /// stores the parsed data in "*result", and returns true. /// On error, returns false, leaves "*result" in an undefined state. - fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { + pub fn parse_internal_key(internal_key : Slice, target: ParsedInternalKey) -> bool { // line 173 todo!() } /// Returns the user key portion of an internal key. - fn extract_user_key(internal_key : Slice) -> Slice { + pub fn extract_user_key(internal_key : Slice) -> Slice { todo!() } } @@ -147,7 +148,7 @@ impl PartialEq for InternalKey { } impl InternalKey { - fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { + pub fn new(user_key: Slice, sequence: u64, value_type: ValueType) -> Self { // line 145 let result: Slice = Slice::default(); ParsedInternalKey::new(user_key, sequence, value_type) @@ -171,7 +172,7 @@ impl InternalKey { /// ``` /// /// ``` - fn decode_from(&self, input: Slice) { + pub fn decode_from(&self, input: &UnsafeSlice) { todo!() // wangbo @@ -179,7 +180,7 @@ impl InternalKey { } /// 输出 InternalKey 调试信息 - fn debug_string(&self) -> Slice { + pub fn debug_string(&self) -> Slice { // line 164 todo!() } @@ -193,16 +194,16 @@ impl InternalKey { self.rep_.size() } - fn user_key(self) -> Slice { + pub fn user_key(self) -> Slice { ParsedInternalKey::extract_user_key(self.rep_) } - fn set_from(self, p: ParsedInternalKey) { + pub fn set_from(self, p: ParsedInternalKey) { // self.rep_.clear(); p.append_internal_key(self.rep_); } - fn clear(self) { + pub fn clear(self) { // self.rep_.clear(); } } @@ -255,7 +256,7 @@ impl Comparator for InternalKeyComparator { impl LookupKey { /// Initialize *this for looking up user_key at a snapshot with /// the specified sequence number. - fn new(user_key: Slice, sequence: usize) -> Self { + pub fn new(user_key: Slice, sequence: usize) -> Self { let user_key_size = user_key.size(); let need = user_key_size + 13; // A conservative estimate let mut data = Vec::with_capacity(need); @@ -278,12 +279,12 @@ impl LookupKey { } /// Return a key suitable for lookup in a MemTable. - fn mem_table_key(&self) -> Slice { + pub fn mem_table_key(&self) -> Slice { self.data.clone() } /// Return an internal key (suitable for passing to an internal iterator) - fn internal_key(&self) -> Slice { + pub fn internal_key(&self) -> Slice { // line 204 let buf = self.data.as_ref(); let internal_key_buf = &buf[self.user_key_start..]; @@ -291,7 +292,7 @@ impl LookupKey { } /// Return the user key - fn user_key(&self) -> Slice { + pub fn user_key(&self) -> Slice { // line 207 todo!() } diff --git a/src/db/file_meta_data.rs b/src/db/file_meta_data.rs index 7c9e3b679ba01c0bd3faf4fd811e8fb67b051460..b04f6d4bedaaf327b569780d5b1e6e4ed3a2330b 100644 --- a/src/db/file_meta_data.rs +++ b/src/db/file_meta_data.rs @@ -77,6 +77,10 @@ impl FileMetaData { self.file_size } + pub fn set_file_size(&mut self, file_size: u64) { + self.file_size = file_size; + } + /// Smallest internal key served by table pub fn get_smallest(&self) -> &InternalKey { &self.smallest diff --git a/src/db/filename.rs b/src/db/filename.rs new file mode 100644 index 0000000000000000000000000000000000000000..fb1db13c9c59484796aa108eee2de6d89327bb7c --- /dev/null +++ b/src/db/filename.rs @@ -0,0 +1,11 @@ +use crate::util::slice::Slice; + +pub struct FileName { + +} + +impl FileName { + pub fn table_file_name(dbname: &Slice, number : u64) -> Slice{ + todo!() + } +} \ No newline at end of file diff --git a/src/db/filename_test.rs b/src/db/filename_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/db/mod.rs b/src/db/mod.rs index 24a37d49e7a29e2045b6290c1d2b7ca07d0bcd5c..0d48432cff9606b1bdbf2469b4184d5a479704f0 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -19,6 +19,10 @@ pub mod version_set; mod version_set_test; pub mod version_edit; mod version_edit_test; +pub mod builder; +mod builder_test; +pub mod filename; +mod filename_test; /// 默认调表 pub type DefaultSkipList = SkipList; diff --git a/src/db/skip_list.rs b/src/db/skip_list.rs index a043adc40b514eb04cba38e9c502878538f17354..ad8b5c31953a96144479bc8ace3cc2bf4addc692 100644 --- a/src/db/skip_list.rs +++ b/src/db/skip_list.rs @@ -441,6 +441,10 @@ impl DataIterator for Iter { } } + fn has_next(&self) -> bool { + todo!() + } + fn pre(&mut self) { todo!() } diff --git a/src/db/version_edit.rs b/src/db/version_edit.rs index c7796776c25467c91aad375c98eddc18ea894981..792d00afe2b09ccd289cba0486b0ff2d98a5d4d5 100644 --- a/src/db/version_edit.rs +++ b/src/db/version_edit.rs @@ -276,6 +276,8 @@ impl VersionEdit { let version_edit = VersionEdit::new(); let msg : Option = Option::None; + + // todo Coding::get_varint32 存在问题。开发暂停 while msg.is_none() && Coding::get_varint32(source) != 0_u32 { let tag_value = Coding::get_varint32(source); let tag = Tag::from_value(tag_value); @@ -284,31 +286,47 @@ impl VersionEdit { return LevelError::corruption_string("VersionEdit", "unknown tag"); } - // match tag { - // Tag::k_comparator => 1, - // Tag::kLogNumber => 2, - // Tag::kNextFileNumber => 3, - // Tag::kLastSequence => 4, - // Tag::kCompactPointer => 5, - // Tag::kDeletedFile => 6, - // Tag::kNewFile => 7, - // Tag::kPrevLogNumber => 9, - // _ => 0 - // }; } todo!() } /// VersionEdit 输出调试信息 pub fn debug_string(&self) -> Slice { - todo!() + let debug_str = String::from("VersionEdit {"); + + let mut has_comparator_str = String::default(); + if(self.has_comparator_){ + has_comparator_str.push_str(format!("\n Comparator: {}", self.comparator_.as_str()).as_str()); + } + + let mut has_log_number__str = String::default(); + // if(self.has_log_number_){ + // todo + // // let append_log_number = logging.AppendNumberTo(&r, self.log_number_); + // let append_log_number = self.log_number_ + "".as_ref(); + // has_log_number__str.push_str(format!("\n LogNumber: {}", append_log_number).as_str()); + // } + + let rs = format!("{}{}{}", debug_str, has_log_number__str, "\n}\n"); + + Slice::from(rs) } } /// 静态方法 impl<'a> VersionEdit { - pub fn get_internal_key(inout: Slice) -> Result { + pub fn get_internal_key(input: Slice) -> Result { + let key= InternalKey::default(); + todo!() + + // Slice str; + // if (GetLengthPrefixedSlice(input, &str)) { + // dst->DecodeFrom(str); + // return true; + // } else { + // return false; + // } } /// 从 Slice 中解出 level 值 diff --git a/src/db/version_edit_test.rs b/src/db/version_edit_test.rs index 2c2efec824a1ce11feaae552d53f68ba7a402da2..b5c06d84497e51cc3e9ca613359876974231a67c 100644 --- a/src/db/version_edit_test.rs +++ b/src/db/version_edit_test.rs @@ -26,13 +26,29 @@ mod test { } #[test] - fn test_version_edit_decode_from() { + fn test_version_edit_decode_from_default() { let source = Slice::from("a"); let mut version_edit = VersionEdit::new(); let status = version_edit.decode_from(&source); - println!("status: {}.", status.get_error()); - // todo - // assert_eq!(target.len(), 2); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + + #[test] + fn test_version_edit_decode_from() { + let source = Slice::from("a"); + + let mut version_edit = VersionEdit::new_with_log_number(6); + let status = version_edit.decode_from(&source); + assert!(&status.is_corruption()); + assert_eq!(&status.get_msg(), "VersionEdit: unknown tag"); + } + + #[test] + fn test_version_edit_debug_string() { + let mut version_edit = VersionEdit::new_with_log_number(6); + let debug_str = version_edit.debug_string(); + println!("debug_str: \n {}", debug_str); } } \ No newline at end of file diff --git a/src/db/version_set.rs b/src/db/version_set.rs index 831183d37d5f9c98d0ea349a2dd4f83979486fa4..61752032b02be96ba0c8102c0bc1ecde972fae92 100644 --- a/src/db/version_set.rs +++ b/src/db/version_set.rs @@ -6,7 +6,8 @@ use crate::db::table_cache::TableCache; use crate::db::version_edit::VersionEdit; use crate::traits::comparator_trait::Comparator; use crate::util::cache::Cache; -use crate::util::options::{Env, Options, ReadOptions}; +use crate::util::env::Env; +use crate::util::options::{Options, ReadOptions}; use crate::util::slice::Slice; use crate::util::Result; @@ -93,7 +94,9 @@ pub struct Compaction { // size_t level_ptrs_[config::kNumLevels]; } -// .h line 68 - 71 +/// Lookup the value for key. If found, store it in *val and +/// return OK. Else return a non-OK status. Fills *stats. +/// REQUIRES: lock is not held struct GetStats { seek_file: Rc, seek_file_level: i32 diff --git a/src/table/block_builder.rs b/src/table/block_builder.rs index a58922a09598e533c26255af1605d7dcd7362acb..2d6467d39d3429af6857dd91a947228937e604aa 100644 --- a/src/table/block_builder.rs +++ b/src/table/block_builder.rs @@ -1,3 +1,4 @@ +use crate::util::options::Options; use crate::util::slice::Slice; use crate::util::Result; @@ -5,6 +6,10 @@ use crate::util::Result; pub struct BlockBuilder {} impl BlockBuilder { + pub fn new(options: &Options) -> Self { + todo!() + } + /// 添加数据到block /// /// # Arguments diff --git a/src/table/filter_block.rs b/src/table/filter_block.rs index 524c324c4c7c82cbfef98718c4f3617e15104828..183c67811ac27fa0c512458c5f820603ee39f5ba 100644 --- a/src/table/filter_block.rs +++ b/src/table/filter_block.rs @@ -1,10 +1,43 @@ +use std::io::Write; +use std::sync::Arc; +use crate::traits::coding_trait::CodingTrait; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::coding::Coding; use crate::util::slice::Slice; use crate::util::Result; -pub struct FilterBlockBuilder {} +// Generate new filter every 2KB of data +const FILTER_BASE_LG: usize = 11; +const FILTER_BASE: usize = 1 << FILTER_BASE_LG; + +/// +/// meta block 构建器 +/// +pub trait FilterBlock { + + /// + /// 构造一个 FilterBlockBuilder + /// + /// # Arguments + /// + /// * `policy`: + /// * `capacity`: 初始化容量 + /// + /// returns: Self + /// + /// # Examples + /// + /// ``` + /// use std::sync::Arc; + /// use level_db_rust::util::filter_policy::BloomFilterPolicy; + /// + /// let policy = Arc::new(BloomFilterPolicy::new(2)); + /// let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy); + /// ``` + #[inline] + fn new_with_policy(policy: Arc, capacity: usize) -> Self; -impl FilterBlockBuilder { /// 设置block的起始位置 /// /// # Arguments @@ -18,9 +51,10 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.start_block(1024_u64); /// ``` - pub fn start_block(&mut self, _block_offset: u64) { - todo!() - } + #[inline] + fn start_block(&mut self, block_offset: u64); + + fn add_key_from_str(&mut self, key: &str); /// 添加key到builder /// @@ -35,9 +69,8 @@ impl FilterBlockBuilder { /// ``` /// /// ``` - pub fn add_key(&mut self, _key: &Slice) { - todo!() - } + fn add_key(&mut self, key: &Slice); + /// 构造filterBlock /// /// # Examples @@ -45,15 +78,235 @@ impl FilterBlockBuilder { /// ``` /// filter_block_builder.finish(); /// ``` - pub fn finish(&mut self) -> Result { - todo!() + fn finish(&mut self) -> Result; + + fn get_policy(&self) -> Box<&FP>; + + fn get_keys(&self) -> Vec; + + fn get_start(&self) -> Vec; + + fn get_result(&self) -> Vec; + + fn get_tmp_keys(&self) -> Vec; + + fn get_tmp_filter_offsets(&self) -> Vec; +} + +/// SSTable 文件里面的 meta block 构建器, 按内存里面指定的格式整理在内存中 +pub struct FilterBlockBuilder { + policy: Arc, + // Flattened key contents + keys: Vec, + // Starting index in keys_ of each key + start: Vec, + // Filter data computed so far + result: Vec, + // policy_->CreateFilter() argument + tmp_keys: Vec, + filter_offsets: Vec, +} + +pub struct FilterBlockReader { + policy: Arc, + // Pointer to filter data (at block-start) + data: Vec, + // Pointer to beginning of offset array (at block-end) + offset: Vec, + // Number of entries in offset array + num: usize, + // Encoding parameter (see kFilterBaseLg in .cc file) + base_lg: usize +} + +impl FilterBlock for FilterBlockBuilder { + fn new_with_policy(policy: Arc, capacity: usize) -> Self { + let keys:Vec = Vec::with_capacity(capacity); + let start:Vec = Vec::with_capacity(capacity); + let result:Vec = Vec::with_capacity(capacity); + let tmp_keys:Vec = vec![]; + let filter_offsets:Vec = vec![]; + + Self { + policy, + keys, + start, + result, + tmp_keys, + filter_offsets + } + } + + fn start_block(&mut self, block_offset: u64) { + let filter_index = block_offset / (FILTER_BASE as u64); + assert!(filter_index >= self.filter_offsets.len() as u64); + + while filter_index > self.filter_offsets.len() as u64 { + self.generate_filter(); + } + } + + fn add_key_from_str(&mut self, key: &str) { + self.add_key(&Slice::from(key)) + } + + fn add_key(&mut self, key: &Slice) { + self.start.push(key.len()); + self.keys.write(key.as_str().as_bytes()).expect("add_key error!"); + } + + fn finish(&mut self) -> Result { + if self.start.len() != 0 { + self.generate_filter(); + } + + // Append array of per-filter offsets + let array_offset = self.result.len() as u32; + // 当前需要写入的位置。result 中可能存在数据,因此为 self.result.len() 的位置 + let mut pos: usize = self.result.len(); + + // todo 判断是否需要扩容 + let result_total_capacity = self.result.capacity(); + + let dst_append = self.result.as_mut_slice(); + + for i in 0..self.filter_offsets.len() { + // 判断当前 pos + len 4 + let filter_offset_val = self.filter_offsets[i]; + pos = Coding::put_fixed32(dst_append, pos, filter_offset_val); + } + + pos = Coding::put_fixed32(dst_append, pos, array_offset); + + // Save encoding parameter in result + // todo 判断是否需要扩容 + Coding::put_varint64(self.result.as_mut_slice(), pos, FILTER_BASE_LG as u64); + + Ok(Slice::from_buf(&self.result)) + } + + fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + fn get_keys(&self) -> Vec { + self.keys.to_vec() + } + + fn get_start(&self) -> Vec { + self.start.to_vec() + } + + fn get_result(&self) -> Vec { + self.result.to_vec() + } + + fn get_tmp_keys(&self) -> Vec { + self.tmp_keys.to_vec() + } + + fn get_tmp_filter_offsets(&self) -> Vec { + self.filter_offsets.to_vec() + } +} + +impl FilterBlockBuilder { + fn generate_filter(&mut self) { + let num_keys = self.start.len(); + + if num_keys == 0 { + // Fast path if there are no keys for this filter + self.filter_offsets.push(self.result.len() as u32); + return; + } + + /* Make list of keys from flattened key structure */ + // Simplify length computation + self.start.push(self.keys.len()); + // 如果 new_len 大于 len ,则 Vec 由差异扩展,每个额外的插槽都用 value 填充。如果 new_len 小于 len ,则 Vec 将被截断。 + self.tmp_keys.resize(num_keys, Slice::default()); + + for i in 0..num_keys { + let base = &self.keys[self.start[i]..]; + let length = self.start[i+1] - self.start[i]; + + let mut tmp_key = Vec::with_capacity(length); + tmp_key.write(&base); + self.tmp_keys[i] = Slice::from_vec(tmp_key); + } + + // Generate filter for current set of keys and append to result_. + self.filter_offsets.push(self.result.len() as u32); + + let mut keys: Vec<&Slice> = Vec::new(); + keys.push(&self.tmp_keys[0]); + let create_filter:Slice = self.policy.create_filter_with_len(num_keys, keys); + + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + self.result.write(create_filter.as_ref()); + // let result_len = self.result.len(); + // let result_total_capacity = self.result.capacity(); + + self.tmp_keys.clear(); + self.keys.clear(); + self.start.clear(); } } -pub struct FilterBlockReader {} +impl FilterBlockReader { + pub fn new_with_policy(policy: Arc, contents: Slice) -> Self { + let data = Vec::new(); + let offset = Vec::new(); + + let contents_len = contents.len(); + + // 1 byte for base_lg_ and 4 for start of offset array + if contents_len < 5 { + return Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + }; + + // let buf = contents.as_ref()[contents_len-5..]; + + // let base_lg_ = contentsVe[contents_len-1]; + + // let last_word: u32 = Coding::decode_fixed32(buf)); + Self { + policy, + data, + offset, + num: 0, + base_lg: 0 + } + } -impl FilterBlockReader { - pub fn key_may_match(&self, _block_offset: u64, _key: &Slice) -> bool { + pub fn key_may_match(&self, block_offset: u64, key: &Slice) -> bool { todo!() } + + pub fn get_policy(&self) -> Box<&FP> { + Box::new(self.policy.as_ref()) + } + + pub fn get_data(&self) -> Vec { + self.data.to_vec() + } + + pub fn get_offset(&self) -> Vec { + self.offset.to_vec() + } + + pub fn get_num(&self) -> usize { + self.num + } + + pub fn get_base_lg(&self) -> usize { + self.base_lg + } } \ No newline at end of file diff --git a/src/table/filter_block_test.rs b/src/table/filter_block_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..0e57a1a046be00f706aa4157e2689c55e9a45f9f --- /dev/null +++ b/src/table/filter_block_test.rs @@ -0,0 +1,180 @@ + +mod test { + use std::borrow::BorrowMut; + use std::sync::Arc; + use crate::table::filter_block; + use crate::table::filter_block::{FilterBlock, FilterBlockBuilder, FilterBlockReader}; + use crate::traits::coding_trait::CodingTrait; + use crate::traits::filter_policy_trait::FilterPolicy; + use crate::util::coding::Coding; + use crate::util::filter_policy::BloomFilterPolicy; + use crate::util::slice::Slice; + use crate::util::hash::{Hash, ToHash}; + + use crate::util::Result; + + pub struct TestHashFilter { + //. + } + + impl TestHashFilter { + fn new() -> Self { + Self { + + } + } + } + + impl FilterPolicy for TestHashFilter { + fn name(&self) -> String { + String::from("TestHashFilter") + } + + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + let mut n: usize = 0; + for i in 0..keys.len() { + n += keys[i].len(); + } + + self.create_filter_with_len(n, keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let mut n: usize = len; + + let mut dst_chars = vec![0; n]; + let dst_chars_u8 = dst_chars.borrow_mut(); + + let mut offset: usize = 0; + for i in 0..keys.len() { + let h = Hash::hash_code(keys[i].as_ref(), 1); + let of = Coding::put_fixed32(dst_chars_u8, offset, h); + offset += of; + } + + Slice::from_buf(dst_chars_u8) + } + + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let h = Hash::hash_code(key.to_vec().as_slice(), 1); + + let mut pos = 0; + while pos <= bloom_filter.size() { + let buf = &bloom_filter.as_ref()[pos..]; + + if h == Coding::decode_fixed32(buf) { + return true + } + + pos += 4; + } + + false + } + } + + // #[test] + // fn test_create_filter() { + // let policy = TestHashFilter::new(); + // + // let s1 = Slice::try_from(String::from("hello")).unwrap(); + // let s2 = Slice::try_from(String::from("world")).unwrap(); + // let mut keys : Vec<&Slice> = Vec::new(); + // keys.push(&s1); + // keys.push(&s2); + // + // let bloom_filter: Slice = policy.create_filter(keys); + // + // let mut key_may_match = policy.key_may_match( + // &Slice::try_from(String::from("hello")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + // + // let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // + // key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + // &bloom_filter); + // assert!(!key_not_match); + // } + + #[test] + fn test_filter_block_new_with_policy() { + let policy = Arc::new(TestHashFilter::new()); + + let filter_block: FilterBlockBuilder = FilterBlockBuilder::new_with_policy(policy, 10); + + let fp = filter_block.get_policy(); + let filter_policy_name = fp.name(); + assert_eq!(filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block.get_keys().len(), 0); + assert_eq!(filter_block.get_result().len(), 0); + assert_eq!(filter_block.get_start().len(), 0); + assert_eq!(filter_block.get_tmp_keys().len(), 0); + assert_eq!(filter_block.get_tmp_filter_offsets().len(), 0); + } + + #[test] + fn test_filter_block_reader_new_with_policy_empty_content() { + let policy = Arc::new(TestHashFilter::new()); + let contents = Slice::default(); + + let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + + let fp_reader = filter_block_reader.get_policy(); + let _reader_filter_policy_name = fp_reader.name(); + assert_eq!(_reader_filter_policy_name, "TestHashFilter"); + assert_eq!(filter_block_reader.get_data().len(), 0); + assert_eq!(filter_block_reader.get_offset().len(), 0); + assert_eq!(filter_block_reader.get_num(), 0); + assert_eq!(filter_block_reader.get_base_lg(), 0); + } + + #[test] + fn test_filter_block_new_with_policy_and_addkey() { + let policy = Arc::new(TestHashFilter::new()); + let mut filter_block_builder: FilterBlockBuilder = FilterBlockBuilder::new_with_policy( + policy, 10); + + filter_block_builder.start_block(100); + filter_block_builder.add_key_from_str("foo"); + filter_block_builder.add_key_from_str("bar"); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(200); + filter_block_builder.add_key_from_str("box"); + filter_block_builder.start_block(300); + filter_block_builder.add_key_from_str("hello"); + + let sliceRs: Result = filter_block_builder.finish(); + assert_eq!("a", "leveldb.BuiltinBloomFilter"); + } + + // #[test] + // fn test_filter_block_reader_new_with_policy_with_content() { + // let policy = Arc::new(BloomFilterPolicy::new(2)); + // let contents = Slice::from("\000"); + // + // let filter_block_reader: FilterBlockReader = FilterBlockReader::new_with_policy(policy, contents); + // + // let fp_reader = filter_block_reader.get_policy(); + // let _reader_filter_policy_name = fp_reader.name(); + // assert_eq!(_reader_filter_policy_name, "leveldb.BuiltinBloomFilter"); + // assert_eq!(filter_block_reader.get_data().len(), 0); + // assert_eq!(filter_block_reader.get_offset().len(), 0); + // assert_eq!(filter_block_reader.get_num(), 0); + // assert_eq!(filter_block_reader.get_base_lg(), 0); + // } +} \ No newline at end of file diff --git a/src/table/mod.rs b/src/table/mod.rs index ade478fc4045f8c85b832af1adda4f74d428e097..cc22b85d7a588c8d4b67a39b921050ca305a44dd 100644 --- a/src/table/mod.rs +++ b/src/table/mod.rs @@ -1,8 +1,11 @@ pub mod block; pub mod block_builder; pub mod filter_block; +mod filter_block_test; pub mod format; mod format_test; -pub(crate) mod ss_table; +pub mod ss_table; mod ss_table_test; -mod iterator_wrapper; \ No newline at end of file +pub mod iterator_wrapper; +pub mod table_builder; +mod table_builder_test; \ No newline at end of file diff --git a/src/table/table_builder.rs b/src/table/table_builder.rs new file mode 100644 index 0000000000000000000000000000000000000000..bd74361ccf5666a5285c2b3a926c0fe7501accd6 --- /dev/null +++ b/src/table/table_builder.rs @@ -0,0 +1,59 @@ +use std::borrow::Borrow; +use std::sync::Arc; +use crate::table::block_builder::BlockBuilder; +use crate::table::filter_block::FilterBlockBuilder; +use crate::traits::filter_policy_trait::FilterPolicy; +use crate::util::env::WritableFile; +use crate::util::options::Options; +use crate::util::slice::Slice; +use crate::util::status::Status; + +pub struct TableBuilder { + rep: Rep +} + +struct Rep { + // options: Box, + // index_block_options: Options, + file: Arc, + offset: u64, + status: Status, + // data_block: BlockBuilder, + // index_block: BlockBuilder, + last_key: Slice, + num_entries: u64, + // Either Finish() or Abandon() has been called. + closed: bool, +} + +impl TableBuilder { + pub fn new_with_writable_file(options: &Options, writableFile: Arc) -> Self { + let rep = Rep::new(options, writableFile); + + // Self { + // rep + // } + + todo!() + } + + pub fn add(&self, key: &Slice, value: &Slice) { + todo!() + } +} + +impl Rep { + pub fn new(opt: &Options, writableFile: Arc) -> Self { + Self { + // options: Box::new(*opt), + file: writableFile, + offset: 0, + // todo default Status::OK + status: Status::default(), + // data_block: BlockBuilder::new(&opt), + last_key: Default::default(), + num_entries: 0, + closed: false, + } + } +} \ No newline at end of file diff --git a/src/table/table_builder_test.rs b/src/table/table_builder_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index f3e4ad07bdcfb2c33b39b569c527890873dbface..3920604d71a533583b8243645d75dbc1886191b8 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -23,6 +23,8 @@ pub trait FilterPolicy { /// # Examples /// /// ``` + /// use crate::util::slice::Slice; + /// /// let mut keys : Vec = Vec::new(); /// keys.push(Slice::try_from(String::from("hello")).unwrap()); /// keys.push(Slice::try_from(String::from("world")).unwrap()); @@ -30,7 +32,9 @@ pub trait FilterPolicy { /// let policy = BloomFilterPolicy::new(800); /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec) -> Slice; + fn create_filter(&self, keys: Vec<&Slice>) -> Slice; + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice; /// /// diff --git a/src/traits/iterator.rs b/src/traits/iterator.rs index 164fcc563f499eb96908df23945eb8221e1f1a58..41a331ad518fb6a9bcbb7a528f5dc8190d5a1fc0 100644 --- a/src/traits/iterator.rs +++ b/src/traits/iterator.rs @@ -1,6 +1,8 @@ use crate::util::slice::Slice; use crate::util::unsafe_slice::UnsafeSlice; +/// +/// Iterator 迭代器定义 pub trait DataIterator { /// 检查当前位置是否有效 /// @@ -68,6 +70,10 @@ pub trait DataIterator { /// /// ``` fn next(&mut self); + + /// 是否存在下一个元素 + fn has_next(&self) -> bool; + /// 定位到上一个元素 /// /// # Arguments @@ -81,6 +87,7 @@ pub trait DataIterator { /// /// ``` fn pre(&mut self); + /// 获取key值 /// /// # Arguments @@ -93,7 +100,9 @@ pub trait DataIterator { /// ``` /// /// ``` + /// todo UnsafeSlice 与 Slice 应该存在一个共同traits或者struct 便于API操作 fn key(&self) -> UnsafeSlice; + /// 获取value值 /// /// # Arguments diff --git a/src/util/env.rs b/src/util/env.rs new file mode 100644 index 0000000000000000000000000000000000000000..dbeb4385f7204c1a36165d59c3f85401e18fe8cc --- /dev/null +++ b/src/util/env.rs @@ -0,0 +1,13 @@ + +use crate::util::Result; +use crate::util::slice::Slice; + +pub struct Env {} + +pub struct WritableFile {} + +impl Env { + pub fn new_writable_file(&self, fname: &Slice) -> Result { + todo!() + } +} \ No newline at end of file diff --git a/src/util/env_test.rs b/src/util/env_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index f493364845832f3530c9fa88e9df05aa4994bcdd..726e5411b9fb034396b426a875163bd35a6e5741 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -79,11 +79,15 @@ impl FromPolicy for BloomFilterPolicy { impl FilterPolicy for BloomFilterPolicy { fn name(&self) -> String { - String::from("leveldb.BuiltinBloomFilter2") + String::from("leveldb.BuiltinBloomFilter") } - fn create_filter(&self, keys: Vec) -> Slice { - let n: usize = keys.len(); + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { + let n: usize = len; let mut bits: usize = n * self.bits_per_key; @@ -100,7 +104,7 @@ impl FilterPolicy for BloomFilterPolicy { dst_chars[bytes] = self.k as u8; for i in 0..n { - let slice = keys.get(i).unwrap(); + let slice = keys[i]; let mut h : u32 = slice.bloom_hash(); let delta : u32 = (h >> 17) | (h << 15); @@ -173,7 +177,11 @@ impl FilterPolicy for InternalFilterPolicy { todo!() } - fn create_filter(&self, keys: Vec) -> Slice { + fn create_filter(&self, keys: Vec<&Slice>) -> Slice { + self.create_filter_with_len(keys.len(), keys) + } + + fn create_filter_with_len(&self, len: usize, keys: Vec<&Slice>) -> Slice { // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 dst中。 diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e84ee121de60da5aa508025477dcad7d8ab595bd..bea6d2aafb9bf43e610f680a568aa0859754e1ef 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -33,9 +33,12 @@ fn test_new() { fn test_create_filter() { let policy = BloomFilterPolicy::new(800); - let mut keys : Vec = Vec::new(); - keys.push(Slice::try_from(String::from("hello")).unwrap()); - keys.push(Slice::try_from(String::from("world")).unwrap()); + let s1 = Slice::try_from(String::from("hello")).unwrap(); + let s2 = Slice::try_from(String::from("world")).unwrap(); + + let mut keys : Vec<&Slice> = Vec::new(); + keys.push(&s1); + keys.push(&s2); let bloom_filter: Slice = policy.create_filter(keys); diff --git a/src/util/mod.rs b/src/util/mod.rs index ade5ddcd886631e5f833e69cd0ff137219873ded..c2869aacd778422c161d8d5ad3a3cb4522c9a78f 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -30,9 +30,9 @@ mod filter_policy_test; pub mod histogram; mod histogram_test; -mod hash; +pub mod hash; mod hash_test; -mod mutex_lock; +pub mod mutex_lock; mod mutex_lock_test; pub mod random; mod random_test; @@ -41,6 +41,8 @@ pub mod debug; pub mod linked_list; mod linked_list_test; pub mod unsafe_slice; +pub mod env; +mod env_test; /// 定义别名 pub type Result = result::Result; diff --git a/src/util/options.rs b/src/util/options.rs index 90289807904f1c37922f3de1abe7ddd93774239f..4764b888de7c214ba7cffd672dbe2c044cdd3fd2 100644 --- a/src/util/options.rs +++ b/src/util/options.rs @@ -1,19 +1,21 @@ +use std::sync::Arc; use crate::db::db::Snapshot; use crate::db::db_format::InternalKeyComparator; use crate::traits::comparator_trait::Comparator; +use crate::traits::filter_policy_trait::FilterPolicy; use crate::util::comparator::BytewiseComparatorImpl; +use crate::util::env::Env; pub enum CompressionType { NoCompression, SnappyCompression } -/// TODO temp -pub struct Env {} - pub struct Cache {} -pub struct FilterPolicy {} +// 使用如下定义(后续路径会重构) +// use crate::traits::filter_policy_trait::FilterPolicy; +// pub struct FilterPolicy {} pub struct Options { @@ -96,7 +98,7 @@ pub struct Options { /// If non-null, use the specified filter policy to reduce disk reads. /// Many applications will benefit from passing the result of /// NewBloomFilterPolicy() here. - pub filter_policy: Option, + pub filter_policy: Option>, } /// Options that control read operations pub struct ReadOptions { diff --git a/src/util/status.rs b/src/util/status.rs index eedb41f97f1971d1e15cf6922778c55b54ee720c..97b33d6ecb53e76dd74a8320c3a8cc2add26bad9 100644 --- a/src/util/status.rs +++ b/src/util/status.rs @@ -103,10 +103,13 @@ impl Status { self.err.is_invalid_argument() } - pub fn get_error_string(&self) -> String { - self.err.to_string() + pub fn get_msg(&self) -> String { + let msg = &self.msg; + + String::from(msg.as_str()) } + /// 得到 LevelError /// 请注意, err 的所有权会发生转移!!! pub fn get_error(self) -> LevelError { self.err