diff --git a/Cargo.toml b/Cargo.toml index 7b77544452777e14c746dc56715b2fa8e169f87e..6f688ef24e926e517e32af2d66ae4932fa50edca 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,3 +10,8 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] + + +[profile.dev] + +[profile.release] diff --git a/README.md b/README.md index b7d8eb048e93d12fb311db0d718cdbb1174b36f0..dc51f61e635314507bab8927e313d948bc0981a6 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,6 @@ LevelDB for rust | BloomFilter | fengyang | 0% | | CRC | wangboo、lxd5866 | | | Env | lxd5866 | | -| Hash | fengyang | 30% | +| Hash | fengyang | 100% | | MutexLock | kazeseiriou | | | Histgram | kazeseiriou | | \ No newline at end of file diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs index fcfd19d3e908fe162cf214dec88fbae7e90f5517..2ec339d83ae80b7f94dd8365195e43d0e86c9be9 100644 --- a/src/traits/filter_policy_trait.rs +++ b/src/traits/filter_policy_trait.rs @@ -1,9 +1,40 @@ use crate::util::slice::Slice; +/// 用于key过滤,可以快速的排除不存在的key pub trait FilterPolicy { + /// filter的名字 + /// Return the name of this policy. Note that if the filter encoding + /// changes in an incompatible way, the name returned by this method + /// must be changed. Otherwise, old incompatible filters may be + /// passed to methods of this type. fn name() -> String; - fn create_filter(&self, keys: Slice, n: u32) -> String; + /// 根据指定的参数创建过滤器,并返回结果, 结果为dst的原始内容 + append结果。 + /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复, + /// 并把根据这些key创建的filter追加到 dst中。 + /// + /// keys[0,n-1] contains a list of keys (potentially with duplicates) + /// that are ordered according to the user supplied comparator. + /// Append a filter that summarizes keys[0,n-1] to *dst. + /// + /// Warning: do not change the initial contents of dst. Instead, + /// append the newly constructed filter to dst. + /// + /// # Arguments + /// + /// * `keys`: + /// * `n`: + /// * `dst`: + /// + /// returns: String + /// + /// # Examples + /// + /// ``` + /// + /// ``` + fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String; + fn key_may_match(key: &Slice, filter: &Slice) -> bool; } \ No newline at end of file diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs index 9e9811ee4d430fb1f43407969101dd4e0c13dd2a..b40e0dd354158de2b2708a2ae5221a2848a94f5f 100644 --- a/src/util/filter_policy.rs +++ b/src/util/filter_policy.rs @@ -12,7 +12,11 @@ impl FilterPolicy for BloomFilterPolicy { String::from("leveldb.BuiltinBloomFilter2") } - fn create_filter(&self, keys: Slice, n: u32) -> String { + fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String { + todo!() + } + + fn key_may_match(key: &Slice, filter: &Slice) -> bool { todo!() } } \ No newline at end of file diff --git a/src/util/hash.rs b/src/util/hash.rs index d383f2b80c66a6fef143d61dc3e40d8c17eaa93b..ea8257507dab91439dce6b13522fc2293f7eb794 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -1,6 +1,12 @@ +use std::ops::{BitXor, Mul}; +use crate::traits::coding_trait::CodingTrait; +use crate::util::coding::Coding; -pub trait AsHash { - /// +/// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 +pub struct Hash {} + +impl<'a> Hash { + /// 计算 data 的 hash /// /// # Arguments /// @@ -13,9 +19,84 @@ pub trait AsHash { /// # Examples /// /// ``` - /// + /// let data3: Vec = vec![0xe2, 0x99, 0xa5]; + /// let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); /// + /// assert_eq!(0x323c078f, hash_val); /// ``` - fn hash(data: String, n: usize, seed: u32) -> u32; -} + pub fn hash(mut data: String, data_size: usize, seed: u32) -> u32 { + let data_u8_vec; + unsafe { + data_u8_vec = data.as_mut_vec(); + } + + Hash::hash_char(data_u8_vec, data_size, seed) + } + + pub fn hash_char(data: &Vec, data_size: usize, seed: u32) -> u32 { + let murmur_hash : u32 = 0xc6a4a793; + let r : u32 = 24; + + let limit: usize = data_size; + let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash + let mut h: u32 = seed.bitxor(mul_first as u32); // h = seed ^ x + + // 每次按照四字节长度读取字节流中的数据 w,并使用普通的哈希函数计算哈希值。 + let mut position: usize = 0; + while position + 4 <= limit { + //每次解码前4个字节,直到最后剩下小于4个字节 + // rust的 &[u8] 是胖指针,带长度信息的,会做range check,所以是安全的。 + let slice_str: &[u8] = data[position..(position + 4)].as_ref(); + let w: u32 = Coding::decode_fixed32(slice_str); + + // 向后移动4个字节 + position += 4; + + // /计算过程中使用了自然溢出特性 + // h += w + h = h.wrapping_add(w); + // h *= m + h = h.wrapping_mul(murmur_hash); + // ^ 按位异或 bitxor , >> 右移位 shr, << 左移位 shl + // h ^= (h >> 16) == h ^= h.shr(16); + h = h.bitxor(h.wrapping_shr(16)); + } + + // 四字节读取则为了加速,最终可能剩下 3/2/1 个多余的字节, + // 将剩下的字节转化到 h 里面 + let mut mark: usize = 0; + while limit - position - mark != 0 { + match limit - position - mark { + 3 => { + let us: &[u8] = data[position..].as_ref(); + let as_us: u32 = us[2] as u32; + h = h.wrapping_add(as_us.wrapping_shl(16)); + + mark += 1; + }, + 2 => { + let us: &[u8] = data[position..].as_ref(); + let as_us: u32 = us[1] as u32; + h = h.wrapping_add( as_us.wrapping_shl(8)); + + mark += 1; + }, + 1 => { + let us: &[u8] = data[position..].as_ref(); + let as_us: u32 = us[0] as u32; + h = h.wrapping_add(as_us); + // h *= m + h = h.wrapping_mul(murmur_hash); + // h ^= (h >> r) ==> h ^= h.shr(r); + h = h.bitxor(h.wrapping_shr(r)); + + mark += 1; + }, + _ => { + println!("0") + } + }; + } -pub struct Hash {} \ No newline at end of file + h + } +} \ No newline at end of file diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 0384579bac614a1d033fd08d59a0f40e9b67e2b8..770192b0fc79f2416deac8752d4041ef287a75d7 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,6 +1,53 @@ +use crate::util::hash::{Hash}; #[test] fn test_hash() { + let val = "aabbccd"; + let hash_val = Hash::hash(String::from(val), val.len(), 3); + println!("hash:{}", hash_val); + let val = "aabbcc"; + let hash_val = Hash::hash(String::from(val), val.len(), 3); + println!("hash:{}", hash_val); -} \ No newline at end of file + let val = "aabbc"; + let hash_val = Hash::hash(String::from(val), val.len(), 3); + println!("hash:{}", hash_val); +} + +#[test] +fn test_hash_code() { + let data1: Vec = vec![0x62]; + let data2: Vec = vec![0xc3, 0x97]; + let data3: Vec = vec![0xe2, 0x99, 0xa5]; + let data4: Vec = vec![0xe1, 0x80, 0xb9, 0x32]; + let data5: Vec = vec![0x01, 0xc0, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x18, + 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00]; + + let hash_val = Hash::hash_char(&vec![0], 0, 0xbc9f1d34); + assert_eq!(0xbc9f1d34, hash_val); + + let hash_val = Hash::hash_char(&data1, data1.len(), 0xbc9f1d34); + assert_eq!(0xef1345c4, hash_val); + + let hash_val = Hash::hash_char(&data2, data2.len(), 0xbc9f1d34); + assert_eq!(0x5b663814, hash_val); + let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); + assert_eq!(0x323c078f, hash_val); + + let hash_val = Hash::hash_char(&data4, data4.len(), 0xbc9f1d34); + assert_eq!(0xed21633a, hash_val); + + let hash_val = Hash::hash_char(&data5, data5.len(), 0x12345678); + assert_eq!(0xf333dabb, hash_val); +}