diff --git a/README.md b/README.md index 923a340219d4732508d88b695ae86a4e2ce66905..b4cd64f02ae5f8dc64c076d445e83c794cd34f23 100644 --- a/README.md +++ b/README.md @@ -49,13 +49,13 @@ LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如Bi | 功能模块 | 完成人 | 进度 | |-------------------------------|-----------------|------| | Arena (Memory Management) | wangboo | 100% | -| bloom | fengyang | 90% | +| bloom | fengyang | 100% | | Cache | colagy | 10% | | Coding (Primitive Type SerDe) | colagy | 100% | | Comparator | fengyang | 100% | | CRC | wangboo、lxd5866 | 100% | | Env | lxd5866 | | -| filter_policy | fengyang | 90% | +| filter_policy | fengyang | 100% | | Hash | fengyang | 100% | | Histgram | kazeseiriou | 100% | | loging | | | diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index d9e4bf926eb6f6022ff4411f62cf6ccbb1ba5d2a..07b33b228f3fd65c17766efe2738abe1a020b64a 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -72,10 +72,8 @@ impl FilterPolicy for BloomFilterPolicy { let bytes: usize = (bits + 7) / 8; bits = bytes * 8; - let mut dstChars: Vec = Vec::with_capacity(bytes); - for bi in 0..bytes { - dstChars.push(0); - } + let mut dst_chars: Vec = vec![0; bytes + 1]; + dst_chars[bytes] = self.k as u8; for i in 0..n { let mut h : u32 = BloomFilterPolicy::bloom_hash(keys.get(i).unwrap()); @@ -89,21 +87,45 @@ impl FilterPolicy for BloomFilterPolicy { let mod_val: usize = bitpos % 8; let val = (1 as u8).wrapping_shl(mod_val as u32); - // TODO error: index out of bounds: the len is 0 but the index is 161 - dstChars[position] |= val; + dst_chars[position] |= val; h = h.wrapping_add(delta); } } // Vec 转 Slice - Slice::from_buf(&dstChars) + Slice::from_buf(&dst_chars) } fn key_may_match(&self, key: &Slice, bloom_filter: &Slice) -> bool { + let filter_size: usize = bloom_filter.size(); + if filter_size < 2 { + return false; + } + + let bloom_filter_array:Vec = bloom_filter.to_vec(); + let bits: usize = (filter_size - 1) * 8; + + // Use the encoded k so that we can read filters generated by bloom filters created using different parameters. + let k: u8 = bloom_filter_array[filter_size - 1]; + if k > 30 { + // Reserved for potentially new encodings for short bloom filters. Consider it a match. + return true; + } - let bloom_hash : u32 = BloomFilterPolicy::bloom_hash(bloom_filter); + let mut h : u32 = BloomFilterPolicy::bloom_hash(key); + // Rotate right 17 bits + let delta = (h >> 17) | (h << 15); + + for j in 0..k { + let bitpos:usize = ((h as usize) % bits); + if (bloom_filter_array[bitpos/8] & (1 << (bitpos % 8))) == 0 { + return false; + } + + h = h.wrapping_add(delta); + } - todo!() + return true; } } \ No newline at end of file diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs index a4afa005643cef9c07722c441ebfd2ed794c5047..280f19ea53104572012e0681c15a4a9576698125 100644 --- a/src/util/filter_policy_test.rs +++ b/src/util/filter_policy_test.rs @@ -42,20 +42,23 @@ fn test_create_filter() { &bloom_filter); assert!(key_may_match); - // key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), - // &bloom_filter); - // assert!(key_may_match); - - // let mut un_key_may_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), - // &bloom_filter); - // assert!(!un_key_may_match); - // - // un_key_may_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), - // &bloom_filter); - // assert!(!un_key_may_match); -} + key_may_match = policy.key_may_match(&Slice::try_from(String::from("world")).unwrap(), + &bloom_filter); + assert!(key_may_match); + + let mut key_not_match = policy.key_may_match(&Slice::try_from(String::from("x")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("helloworld")).unwrap(), + &bloom_filter); + assert!(!key_not_match); + + key_not_match = policy.key_may_match(&Slice::try_from(String::from("hello world")).unwrap(), + &bloom_filter); + assert!(!key_not_match); -// a(&policy1); -// fn a(a: &dyn FilterPolicy) { -// //. -// } \ No newline at end of file + key_not_match = policy.key_may_match(&Slice::try_from(String::from("foo")).unwrap(), + &bloom_filter); + assert!(!key_not_match); +} \ No newline at end of file