From 356fc4cdcf4727ce2475a8452b8fa273da07624e Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 16 Jan 2023 16:44:06 +0800 Subject: [PATCH 1/4] compare --- README.md | 34 +++++++++++++++---------------- src/traits/filter_policy_trait.rs | 34 +++++++++++++++---------------- src/util/comparator_test.rs | 1 + src/util/filter_policy.rs | 30 +++++++++++++-------------- 4 files changed, 50 insertions(+), 49 deletions(-) diff --git a/README.md b/README.md index 2b20183..b8abfe1 100644 --- a/README.md +++ b/README.md @@ -46,23 +46,23 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi ### 1.0.0 1.0.0 版本, 完成 util 相关的内容 -| 功能模块 | 完成人 | 进度 | -|----------------------------------------|-----------------|------| -| Arena (Memory Management) | wangboo | 100% | -| bloom | fengyang | 10% | -| Cache | colagy | 10% | -| Coding (Primitive Type SerDe) | colagy | 100% | -| Comparator | fengyang | 90% | -| CRC | wangboo、lxd5866 | 100% | -| Env | lxd5866 | | -| filter_policy | fengyang | 10% | -| Hash | fengyang | 100% | -| Histgram | kazeseiriou | 100% | -| loging | | | -| MutexLock | kazeseiriou | 100% | -| Random | colagy | | -| Status | fengyang | 100% | -| Slice | wangboo | 100% | +| 功能模块 | 完成人 | 进度 | +|-------------------------------|-----------------|------| +| Arena (Memory Management) | wangboo | 100% | +| bloom | fengyang | 10% | +| Cache | colagy | 10% | +| Coding (Primitive Type SerDe) | colagy | 100% | +| Comparator | fengyang | 100% | +| CRC | wangboo、lxd5866 | 100% | +| Env | lxd5866 | | +| filter_policy | fengyang | 10% | +| Hash | fengyang | 100% | +| Histgram | kazeseiriou | 100% | +| loging | | | +| MutexLock | kazeseiriou | 100% | +| Random | colagy | | +| Status | fengyang | 100% | +| Slice | wangboo | 100% | ### 1.1.0 1.1.0 版本, 完成基础零部件 diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 361e5a8..24b16fd 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -10,23 +10,23 @@ pub trait FilterPolicy { /// passed to methods of this type. fn name() -> String; - /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - /// 并把根据这些key创建的filter追加返回。 - /// - /// # Arguments - /// - /// * `keys`: - /// * `n`: - /// - /// returns: String - /// - /// # Examples - /// - /// ``` - /// - /// ``` - fn create_filter(&self, keys: Slice, n: u32) -> String; + // /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + // /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + // /// 并把根据这些key创建的filter追加返回。 + // /// + // /// # Arguments + // /// + // /// * `keys`: + // /// * `n`: + // /// + // /// returns: String + // /// + // /// # Examples + // /// + // /// ``` + // /// + // /// ``` + // fn create_filter(&self, keys: Slice, n: u32) -> String; fn key_may_match(key: &Slice, filter: &Slice) -> bool; } \ No newline at end of file diff --git a/src/util/comparator_test.rs b/src/util/comparator_test.rs index a9b2807..b1fb79e 100644 --- a/src/util/comparator_test.rs +++ b/src/util/comparator_test.rs @@ -6,6 +6,7 @@ mod test { use crate::util::comparator::{BytewiseComparatorImpl, InternalKeyComparator}; use crate::util::slice::Slice; + // ##################### BytewiseComparatorImpl test #[test] fn test_bytewise_comparator_impl_get_name() { let name = BytewiseComparatorImpl::get_name(); diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 46303bf..ded87dc 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -57,21 +57,21 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Slice, n: usize) -> String { - // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, 并把根据这些key创建的filter追加到 dst中。 - let mut bits: usize = n * self.bits_per_key; - - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. - if bits < 64 { - bits = 64; - } - - let bytes: usize = (bits + 7) / 8; - bits = bytes * 8; - - } + // fn create_filter(&self, keys: Slice, n: usize) -> String { + // // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + // // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, 并把根据这些key创建的filter追加到 dst中。 + // let mut bits: usize = n * self.bits_per_key; + // + // // For small n, we can see a very high false positive rate. Fix it + // // by enforcing a minimum bloom filter length. + // if bits < 64 { + // bits = 64; + // } + // + // let bytes: usize = (bits + 7) / 8; + // bits = bytes * 8; + // + // } fn key_may_match(key: &Slice, filter: &Slice) -> bool { todo!() -- Gitee From d0192c9a43ad28bc793a76c207e5c12268c60abb Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 16 Jan 2023 20:40:11 +0800 Subject: [PATCH 2/4] create_filter --- src/traits/filter_policy_trait.rs | 38 +++++++++--------- src/util/filter_policy.rs | 66 +++++++++++++++++++++---------- src/util/filter_policy_test.rs | 25 +++++++++--- src/util/hash.rs | 1 + 4 files changed, 85 insertions(+), 45 deletions(-) diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 24b16fd..d44d7c1 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -8,25 +8,25 @@ pub trait FilterPolicy { /// changes in an incompatible way, the name returned by this method /// must be changed. Otherwise, old incompatible filters may be /// passed to methods of this type. - fn name() -> String; + fn name(&self) -> String; - // /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - // /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - // /// 并把根据这些key创建的filter追加返回。 - // /// - // /// # Arguments - // /// - // /// * `keys`: - // /// * `n`: - // /// - // /// returns: String - // /// - // /// # Examples - // /// - // /// ``` - // /// - // /// ``` - // fn create_filter(&self, keys: Slice, n: u32) -> String; + /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + /// 并把根据这些key创建的filter追加返回。 + /// + /// # Arguments + /// + /// * `keys`: + /// * `n`: + /// + /// returns: String + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn create_filter(&self, keys: Vec, n: usize) -> String; - fn key_may_match(key: &Slice, filter: &Slice) -> bool; + fn key_may_match(&self, key: &Slice, filter: &Slice) -> bool; } \ No newline at end of file diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index ded87dc..4d9e7e1 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,4 +1,4 @@ -use std::ops::Mul; +use std::ops::{BitOr, Mul, Shl}; use crate::traits::filter_policy_trait::{FilterPolicy}; use crate::util::hash::{Hash, ToHash}; use crate::util::slice::Slice; @@ -14,7 +14,7 @@ pub struct BloomFilterPolicy { } impl<'a> BloomFilterPolicy { - pub fn bloom_hash(key: Slice) -> u32 { + pub fn bloom_hash(key: &Slice) -> u32 { key.to_hash_with_seed(0xbc9f1d34) } } @@ -51,29 +51,55 @@ impl BloomFilterPolicy { } } +// dyn FilterPolicy + FromPolicy impl FilterPolicy for BloomFilterPolicy { - fn name() -> String { + fn name(&self) -> String { String::from("leveldb.BuiltinBloomFilter2") } - // fn create_filter(&self, keys: Slice, n: usize) -> String { - // // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - // // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, 并把根据这些key创建的filter追加到 dst中。 - // let mut bits: usize = n * self.bits_per_key; - // - // // For small n, we can see a very high false positive rate. Fix it - // // by enforcing a minimum bloom filter length. - // if bits < 64 { - // bits = 64; - // } - // - // let bytes: usize = (bits + 7) / 8; - // bits = bytes * 8; - // - // } - - fn key_may_match(key: &Slice, filter: &Slice) -> bool { + fn create_filter(&self, keys: Vec, n: usize) -> String { + // 根据指定的参数创建过滤器,并返回结果。 + // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + // 并把根据这些key创建的filter追加到 返回结果中。 + let mut bits: usize = n * self.bits_per_key; + + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if bits < 64 { + bits = 64; + } + + let bytes: usize = (bits + 7) / 8; + bits = bytes * 8; + + let mut dstChars: Vec = Vec::with_capacity(bytes); + for i in 0..n { + let mut h : u32 = BloomFilterPolicy::bloom_hash(keys.get(i).unwrap()); + let delta : u32 = (h >> 17) | (h << 15); + + for j in 0..self.k { + let bitpos:usize = ((h as usize) % bits); + + // a |= b --> 按位或, 后赋值给a + let position: usize = bitpos / 8; + let mod_val: usize = bitpos % 8; + let val = (1 as u8).wrapping_shl(mod_val as u32); + + // TODO error: index out of bounds: the len is 0 but the index is 161 + dstChars[position] |= val; + + h = h.wrapping_add(delta); + } + } + + // Vec 转 String + let ss = Slice::from_buf(&dstChars).borrow_data(); + + ss.to_string() + } + + fn key_may_match(&self, key: &Slice, filter: &Slice) -> bool { todo!() } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index e3247fe..ba86100 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -4,12 +4,13 @@ use crate::util::bloom_filter; use crate::util::filter_policy::{BloomFilterPolicy, FromPolicy}; use crate::util::slice::Slice; +// #################### BloomFilterPolicy test #[test] fn test_bloom_hash() { let val = "aabbccd"; let slice: Slice = Slice::from_buf(val.as_bytes()); - let hash_val = BloomFilterPolicy::bloom_hash(slice); + let hash_val = BloomFilterPolicy::bloom_hash(&slice); assert_eq!(hash_val, 2085241752); } @@ -25,9 +26,21 @@ fn test_new() { } -// #[test] -// fn test_create_filter() { -// let bloom_filter: BloomFilterPolicy = BloomFilterPolicy::create_filter(8); -// println!("{}", "aa") -// +// #################### FilterPolicy test +#[test] +fn test_create_filter() { + let policy = BloomFilterPolicy::new(800); + + let mut keys : Vec = Vec::new(); + keys.push(Slice::try_from(String::from("hello")).unwrap()); + keys.push(Slice::try_from(String::from("world")).unwrap()); + + let filter_ = policy.create_filter(keys, 2); + println!("{}", "aa") + +} + +// a(&policy1); +// fn a(a: &dyn FilterPolicy) { +// //. // } \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index 7d111c7..2b2e0ba 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -147,6 +147,7 @@ impl Hash { h = h.wrapping_add(w); // h *= m h = h.wrapping_mul(murmur_hash); + // Rust的位运算符包括:按位取反(!)、按位与(&)、按位或(|)、按位异或(^)、左移(<<)、右移(>>) // ^ 按位异或 bitxor , >> 右移位 shr, << 左移位 shl // h ^= (h >> 16) == h ^= h.shr(16); h = h.bitxor(h.wrapping_shr(16)); -- Gitee From 0d3d6747d360c05c2b8b297b4296228acc1d746a Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 16 Jan 2023 20:57:23 +0800 Subject: [PATCH 3/4] FilterPolicy create_filter --- src/traits/filter_policy_trait.rs | 22 +++++++++++++++++++--- src/util/filter_policy.rs | 14 ++++++++------ src/util/filter_policy_test.rs | 21 ++++++++++++++++++--- 3 files changed, 45 insertions(+), 12 deletions(-) diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index d44d7c1..8050b32 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -3,11 +3,13 @@ use crate::util::slice::Slice; /// 用于key过滤,可以快速的排除不存在的key pub trait FilterPolicy { + /// /// filter的名字 /// Return the name of this policy. Note that if the filter encoding /// changes in an incompatible way, the name returned by this method /// must be changed. Otherwise, old incompatible filters may be /// passed to methods of this type. + /// fn name(&self) -> String; /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 @@ -19,14 +21,28 @@ pub trait FilterPolicy { /// * `keys`: /// * `n`: /// - /// returns: String + /// returns: bloom_filter Slice /// /// # Examples /// /// ``` /// /// ``` - fn create_filter(&self, keys: Vec, n: usize) -> String; + fn create_filter(&self, keys: Vec, n: usize) -> Slice; - fn key_may_match(&self, key: &Slice, filter: &Slice) -> bool; + /// + /// + /// # Arguments + /// + /// * `key`: + /// * `bloom_filter`: + /// + /// returns: bool + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool; } \ No newline at end of file diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 4d9e7e1..7a1cf32 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -58,7 +58,7 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Vec, n: usize) -> String { + fn create_filter(&self, keys: Vec, n: usize) -> Slice { // 根据指定的参数创建过滤器,并返回结果。 // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, // 并把根据这些key创建的filter追加到 返回结果中。 @@ -74,6 +74,10 @@ impl FilterPolicy for BloomFilterPolicy { bits = bytes * 8; let mut dstChars: Vec = Vec::with_capacity(bytes); + for i in 0..bytes { + dstChars.push(0); + } + for i in 0..n { let mut h : u32 = BloomFilterPolicy::bloom_hash(keys.get(i).unwrap()); let delta : u32 = (h >> 17) | (h << 15); @@ -93,13 +97,11 @@ impl FilterPolicy for BloomFilterPolicy { } } - // Vec 转 String - let ss = Slice::from_buf(&dstChars).borrow_data(); - - ss.to_string() + // Vec 转 Slice + Slice::from_buf(&dstChars) } - fn key_may_match(&self, key: &Slice, filter: &Slice) -> bool { + fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { todo!() } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index ba86100..87ee0fa 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -35,9 +35,24 @@ fn test_create_filter() { keys.push(Slice::try_from(String::from("hello")).unwrap()); keys.push(Slice::try_from(String::from("world")).unwrap()); - let filter_ = policy.create_filter(keys, 2); - println!("{}", "aa") - + let bloom_filter = policy.create_filter(keys, 2); + + let mut key_may_match = policy.key_may_match( + &Slice::try_from(String::from("hello")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + // &bloom_filter); + // assert!(key_may_match); + + // let mut un_key_may_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + // &bloom_filter); + // assert!(!un_key_may_match); + // + // un_key_may_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + // &bloom_filter); + // assert!(!un_key_may_match); } // a(&policy1); -- Gitee From 7be628757a7d65fc7cbec9bd71f4ab6c91c08b1a Mon Sep 17 00:00:00 2001 From: fengyang Date: Mon, 16 Jan 2023 21:04:03 +0800 Subject: [PATCH 4/4] FilterPolicy doc and from_buf --- README.md | 4 ++-- src/traits/filter_policy_trait.rs | 14 ++++++++------ src/util/filter_policy.rs | 16 +++++++++------- src/util/filter_policy_test.rs | 2 +- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index b8abfe1..923a340 100644 --- a/README.md +++ b/README.md @@ -49,13 +49,13 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | 功能模块 | 完成人 | 进度 | |-------------------------------|-----------------|------| | Arena (Memory Management) | wangboo | 100% | -| bloom | fengyang | 10% | +| bloom | fengyang | 90% | | Cache | colagy | 10% | | Coding (Primitive Type SerDe) | colagy | 100% | | Comparator | fengyang | 100% | | CRC | wangboo、lxd5866 | 100% | | Env | lxd5866 | | -| filter_policy | fengyang | 10% | +| filter_policy | fengyang | 90% | | Hash | fengyang | 100% | | Histgram | kazeseiriou | 100% | | loging | | | diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 8050b32..f3e4ad0 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -12,23 +12,25 @@ pub trait FilterPolicy { /// fn name(&self) -> String; - /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 - /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - /// 并把根据这些key创建的filter追加返回。 + /// 根据 keys 创建过滤器,并返回 bloom_filter Slice /// /// # Arguments /// - /// * `keys`: - /// * `n`: + /// * `keys`: 创建过滤器的数据清单 /// /// returns: bloom_filter Slice /// /// # Examples /// /// ``` + /// let mut keys : Vec = Vec::new(); + /// keys.push(Slice::try_from(String::from("hello")).unwrap()); + /// keys.push(Slice::try_from(String::from("world")).unwrap()); /// + /// let policy = BloomFilterPolicy::new(800); + /// let bloom_filter: Slice = policy.create_filter(keys); /// ``` - fn create_filter(&self, keys: Vec, n: usize) -> Slice; + fn create_filter(&self, keys: Vec) -> Slice; /// /// diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 7a1cf32..d9e4bf9 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -58,14 +58,13 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Vec, n: usize) -> Slice { - // 根据指定的参数创建过滤器,并返回结果。 - // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, - // 并把根据这些key创建的filter追加到 返回结果中。 + fn create_filter(&self, keys: Vec) -> Slice { + let n: usize = keys.len(); + let mut bits: usize = n * self.bits_per_key; - // For small n, we can see a very high false positive rate. Fix it - // by enforcing a minimum bloom filter length. + // For small n, we can see a very high false positive rate. + // Fix it by enforcing a minimum bloom filter length. if bits < 64 { bits = 64; } @@ -74,7 +73,7 @@ impl FilterPolicy for BloomFilterPolicy { bits = bytes * 8; let mut dstChars: Vec = Vec::with_capacity(bytes); - for i in 0..bytes { + for bi in 0..bytes { dstChars.push(0); } @@ -102,6 +101,9 @@ impl FilterPolicy for BloomFilterPolicy { } fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + + let bloom_hash : u32 = BloomFilterPolicy::bloom_hash(bloom_filter); + todo!() } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index 87ee0fa..a4afa00 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -35,7 +35,7 @@ fn test_create_filter() { keys.push(Slice::try_from(String::from("hello")).unwrap()); keys.push(Slice::try_from(String::from("world")).unwrap()); - let bloom_filter = policy.create_filter(keys, 2); + let bloom_filter: Slice = policy.create_filter(keys); let mut key_may_match = policy.key_may_match( &Slice::try_from(String::from("hello")).unwrap(), -- Gitee