diff --git a/README.md b/README.md index cb49cd3438342699b0eeb56aa496ccf899d64c8e..a2157d8077c8ee1169d52cf4d46bdd245ea06705 100644 --- a/README.md +++ b/README.md @@ -4,20 +4,23 @@ LevelDB for rust #### 软件架构 -软件架构说明 + +![LevelDB--整体架构](doc/images/LevelDB--整体架构.png) + +LevelDB是一款写性能十分优秀的可持久化的KV存储引擎,其实现原理是依据LSM-Tree(Log Structed-Merge Tree). + +LSM tree (log-structured merge-tree) 是一种对写操作非常友好的存储方案。 + +LSM tree 是许多 KV型或日志型数据库所依赖的核心实现,例如BigTable、HBase、Cassandra、LevelDB、SQLite、RocksDB 等 #### 安装教程 1. xxxx -2. xxxx -3. xxxx #### 使用说明 1. xxxx -2. xxxx -3. xxxx #### 参与贡献 diff --git "a/doc/images/LevelDB--\346\225\264\344\275\223\346\236\266\346\236\204.png" "b/doc/images/LevelDB--\346\225\264\344\275\223\346\236\266\346\236\204.png" new file mode 100644 index 0000000000000000000000000000000000000000..ddbbc5c89ef655f5ec7a22701368f0c32b3a0e59 Binary files /dev/null and "b/doc/images/LevelDB--\346\225\264\344\275\223\346\236\266\346\236\204.png" differ diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index 2ec339d83ae80b7f94dd8365195e43d0e86c9be9..ea17aeab69a77931e5af205ec42ab81c8dd05610 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -14,13 +14,6 @@ pub trait FilterPolicy { /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, /// 并把根据这些key创建的filter追加到 dst中。 /// - /// keys[0,n-1] contains a list of keys (potentially with duplicates) - /// that are ordered according to the user supplied comparator. - /// Append a filter that summarizes keys[0,n-1] to *dst. - /// - /// Warning: do not change the initial contents of dst. Instead, - /// append the newly constructed filter to dst. - /// /// # Arguments /// /// * `keys`: diff --git a/src/util/bloom_filter.rs b/src/util/bloom_filter.rs index 2d507b073c8537523e8e93546779a3802514aebb..b3d1b9e0b780de161b4833533f8495dd0b0d3519 100644 --- a/src/util/bloom_filter.rs +++ b/src/util/bloom_filter.rs @@ -3,4 +3,8 @@ pub struct BloomFilter { +} + +impl BloomFilter { + } \ No newline at end of file diff --git a/src/util/const.rs b/src/util/const.rs index b57d833d77771522f0142d8b16f171c1b9592885..a8ffcf84196499637a80e70873f174ae079929b0 100644 --- a/src/util/const.rs +++ b/src/util/const.rs @@ -3,5 +3,5 @@ /// 冒号 + 空格 pub const COLON_WHITE_SPACE: &'static str = ": "; -/// hash 的默认seed +/// hash 的默认seed: 0xbc9f1d34 pub const HASH_DEFAULT_SEED: u32 = 0xbc9f1d34; diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index b40e0dd354158de2b2708a2ae5221a2848a94f5f..5a87f2e8163ed800895668f56bb693336abdc961 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -1,4 +1,6 @@ +use std::ops::Mul; use crate::traits::filter_policy_trait::{FilterPolicy}; +use crate::util::hash::{Hash, ToHash}; use crate::util::slice::Slice; pub struct BloomFilterPolicy { @@ -6,6 +8,35 @@ pub struct BloomFilterPolicy { k: usize } +impl<'a> BloomFilterPolicy { + + pub fn bloom_hash(key: Slice) -> u32 { + key.to_hash() + // Hash::hash_code(key., 0xbc9f1d34) + } +} + +impl BloomFilterPolicy { + pub fn new(bits_per_key: usize) -> Self { + // We intentionally round down to reduce probing cost a little bit + // 0.69 =~ ln(2) + let factor: f64 = 0.69; + let mut k_k: usize = factor.mul(bits_per_key as f64).round() as usize; + + if k_k < 1 { + k_k = 1; + } + if k_k > 30{ + k_k = 30; + } + + Self { + bits_per_key, + k : k_k + } + } +} + impl FilterPolicy for BloomFilterPolicy { fn name() -> String { @@ -13,6 +44,10 @@ impl FilterPolicy for BloomFilterPolicy { } fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String { + // 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + // 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + // 并把根据这些key创建的filter追加到 dst中。 + // todo!() } diff --git a/src/util/filter_policy_test.rs b/src/util/filter_policy_test.rs new file mode 100644 index 0000000000000000000000000000000000000000..b49448ff809bba65c38afa91d2405faf52bcf1e0 --- /dev/null +++ b/src/util/filter_policy_test.rs @@ -0,0 +1,14 @@ +use std::ptr::null; +use crate::util::bloom_filter; +use crate::util::filter_policy::BloomFilterPolicy; + +#[test] +fn test_new() { + let bloom_filter = BloomFilterPolicy::new(8); + println!("hash:{}", "a"); + // assert_eq!(bloom_filter, null()); + + let bloom_filter = BloomFilterPolicy::new(800); + println!("hash:{}", "a"); + +} \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index fef5dc5bf075feb0f0ce5651546c1e437205ccee..945d7f6c14ccbbe49ac72f730353903665980cc3 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -12,9 +12,11 @@ use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 pub trait ToHash { fn to_hash(&self) -> u32; + + fn to_hash_seed(&self, seed: u32) -> u32; } -/// 所有基本类型 u8, i8, u16, u32 ... 的数组都可以实现 hash 值计算 +/// 所有基本类型 u8, i8, u16, u32 ... 的Vec都可以实现 hash 值计算 /// Sample: /// ``` /// let hash = vec!['a','b','c'].to_hash(); @@ -25,6 +27,12 @@ impl ToHash for Vec { v_v.to_hash() } + + fn to_hash_seed(&self, seed: u32) -> u32 { + let v_v = self.as_slice(); + + v_v.to_hash_seed(seed) + } } /// 所有基本类型 u8, i8, u16, u32 ... 的slice都可以实现 hash 值计算 @@ -44,6 +52,16 @@ impl ToHash for &[T] { Hash::hash_code(data, HASH_DEFAULT_SEED) } + + fn to_hash_seed(&self, seed: u32) -> u32 { + let ptr_u8 = self.as_ptr() as *const _ as *const u8; + + let data = unsafe { + stds::from_raw_parts(ptr_u8, size_of::() * self.len()) + }; + + Hash::hash_code(data, seed) + } } /// 实现了 &str 转 ToHash 的特质 @@ -55,6 +73,10 @@ impl ToHash for &str { fn to_hash(&self) -> u32 { Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) } + + fn to_hash_seed(&self, seed: u32) -> u32 { + Hash::hash_code(self.as_bytes(), seed) + } } /// 实现了 Slice 转 ToHash 的特质 @@ -68,6 +90,10 @@ impl ToHash for Slice { fn to_hash(&self) -> u32 { Hash::hash_code(self.to_vec().as_slice(), HASH_DEFAULT_SEED) } + + fn to_hash_seed(&self, seed: u32) -> u32 { + Hash::hash_code(self.to_vec().as_slice(), seed) + } } /// 实现了 String 转 ToHash 的特质 @@ -81,6 +107,10 @@ impl ToHash for String { fn to_hash(&self) -> u32 { Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) } + + fn to_hash_seed(&self, seed: u32) -> u32 { + Hash::hash_code(self.as_bytes(), seed) + } } /// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 diff --git a/src/util/mod.rs b/src/util/mod.rs index d2f931c2a86a20acfec9e7bbe14b1255a7231f66..3d2299f8775e47951d7e06777623b585d92b6114 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -21,6 +21,7 @@ mod crc_test; pub mod bloom_filter; mod bloom_filter_test; pub mod filter_policy; +mod filter_policy_test; /// 定义别名 pub type ResultT = result::Result;