diff --git a/README.md b/README.md index dc51f61e635314507bab8927e313d948bc0981a6..cb49cd3438342699b0eeb56aa496ccf899d64c8e 100644 --- a/README.md +++ b/README.md @@ -44,11 +44,11 @@ LevelDB for rust | Random | colagy | | | Cache | colagy | | | Coding (Primitive Type SerDe) | colagy | | -| Comparator | fengyang | 85% | +| Comparator | fengyang | 90% | | Status | fengyang | 100% | -| BloomFilter | fengyang | 0% | +| BloomFilter | fengyang | 10% | | CRC | wangboo、lxd5866 | | | Env | lxd5866 | | -| Hash | fengyang | 100% | +| Hash | fengyang | 100% | | MutexLock | kazeseiriou | | | Histgram | kazeseiriou | | \ No newline at end of file diff --git a/src/util/const.rs b/src/util/const.rs index e7d7eac0ad0601d189351344a2b807b851d93022..b57d833d77771522f0142d8b16f171c1b9592885 100644 --- a/src/util/const.rs +++ b/src/util/const.rs @@ -2,3 +2,6 @@ /// 冒号 + 空格 pub const COLON_WHITE_SPACE: &'static str = ": "; + +/// hash 的默认seed +pub const HASH_DEFAULT_SEED: u32 = 0xbc9f1d34; diff --git a/src/util/hash.rs b/src/util/hash.rs index ea8257507dab91439dce6b13522fc2293f7eb794..fef5dc5bf075feb0f0ce5651546c1e437205ccee 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -1,43 +1,98 @@ use std::ops::{BitXor, Mul}; +use std::mem::size_of; +use std::slice as stds; + use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; +use crate::util::crc::AsCrc; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice; +use crate::util::slice::Slice; -/// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 -pub struct Hash {} +/// 一种可以计算 hash 的特质 +pub trait ToHash { + fn to_hash(&self) -> u32; +} -impl<'a> Hash { - /// 计算 data 的 hash - /// - /// # Arguments - /// - /// * `data`: - /// * `n`: data 的长度 - /// * `seed`: 随机数种子 - /// - /// returns: u32 - /// - /// # Examples - /// - /// ``` - /// let data3: Vec = vec![0xe2, 0x99, 0xa5]; - /// let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); /// - /// assert_eq!(0x323c078f, hash_val); - /// ``` - pub fn hash(mut data: String, data_size: usize, seed: u32) -> u32 { - let data_u8_vec; - unsafe { - data_u8_vec = data.as_mut_vec(); - } +/// 所有基本类型 u8, i8, u16, u32 ... 的数组都可以实现 hash 值计算 +/// Sample: +/// ``` +/// let hash = vec!['a','b','c'].to_hash(); +/// ``` +impl ToHash for Vec { + fn to_hash(&self) -> u32 { + let v_v = self.as_slice(); + + v_v.to_hash() + } +} + +/// 所有基本类型 u8, i8, u16, u32 ... 的slice都可以实现 hash 值计算 +/// Sample: +/// ``` +/// let buf = ['a','b','c']; +/// let hash_val = &buf.as_slice().to_hash(); +/// ``` +impl ToHash for &[T] { + #[inline] + fn to_hash(&self) -> u32 { + let ptr_u8 = self.as_ptr() as *const _ as *const u8; + + let data = unsafe { + stds::from_raw_parts(ptr_u8, size_of::() * self.len()) + }; + + Hash::hash_code(data, HASH_DEFAULT_SEED) + } +} - Hash::hash_char(data_u8_vec, data_size, seed) +/// 实现了 &str 转 ToHash 的特质 +/// Sample: +/// ``` +/// let hash = "abc".to_hash(); +/// ``` +impl ToHash for &str { + fn to_hash(&self) -> u32 { + Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) } +} - pub fn hash_char(data: &Vec, data_size: usize, seed: u32) -> u32 { - let murmur_hash : u32 = 0xc6a4a793; - let r : u32 = 24; +/// 实现了 Slice 转 ToHash 的特质 +/// Sample: +/// ``` +/// let val = "aabbccd"; +/// let slice: Slice = Slice::from_buf(val.as_bytes()); +/// let slice_hash_val = slice.to_hash(); +/// ``` +impl ToHash for Slice { + fn to_hash(&self) -> u32 { + Hash::hash_code(self.to_vec().as_slice(), HASH_DEFAULT_SEED) + } +} + +/// 实现了 String 转 ToHash 的特质 +/// Sample: +/// ``` +/// let val = "aabbccd"; +/// let val_s = String::from(val); +/// let string_hash_val = val_s.to_hash(); +/// ``` +impl ToHash for String { + fn to_hash(&self) -> u32 { + Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) + } +} + +/// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 +pub struct Hash {} + +impl Hash { + pub fn hash_code(data: &[u8], seed: u32) -> u32 { + let murmur_hash: u32 = 0xc6a4a793; + let r: u32 = 24; - let limit: usize = data_size; - let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash + let limit: usize = data.len(); + let mul_first = limit.mul(murmur_hash as usize); // x = data_size * murmur_hash let mut h: u32 = seed.bitxor(mul_first as u32); // h = seed ^ x // 每次按照四字节长度读取字节流中的数据 w,并使用普通的哈希函数计算哈希值。 @@ -45,8 +100,8 @@ impl<'a> Hash { while position + 4 <= limit { //每次解码前4个字节,直到最后剩下小于4个字节 // rust的 &[u8] 是胖指针,带长度信息的,会做range check,所以是安全的。 - let slice_str: &[u8] = data[position..(position + 4)].as_ref(); - let w: u32 = Coding::decode_fixed32(slice_str); + // 虽然decode_fixed32 中也是解码4字节,但传入整个data在方法上不明确,因此传 [position..(position + 4)], 可以更加方便理解,对性能无影响 + let w = Coding::decode_fixed32(&data[position..(position + 4)]); // 向后移动4个字节 position += 4; @@ -67,22 +122,19 @@ impl<'a> Hash { while limit - position - mark != 0 { match limit - position - mark { 3 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[2] as u32; + let as_us: u32 = data[position + 2] as u32; h = h.wrapping_add(as_us.wrapping_shl(16)); mark += 1; - }, + } 2 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[1] as u32; - h = h.wrapping_add( as_us.wrapping_shl(8)); + let as_us: u32 = data[position + 1] as u32; + h = h.wrapping_add(as_us.wrapping_shl(8)); mark += 1; - }, + } 1 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[0] as u32; + let as_us: u32 = data[position] as u32; h = h.wrapping_add(as_us); // h *= m h = h.wrapping_mul(murmur_hash); @@ -90,7 +142,7 @@ impl<'a> Hash { h = h.bitxor(h.wrapping_shr(r)); mark += 1; - }, + } _ => { println!("0") } diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 770192b0fc79f2416deac8752d4041ef287a75d7..81dce89e612992852a6e784b4a32915532de6bfb 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,17 +1,20 @@ -use crate::util::hash::{Hash}; +use crate::util::hash::{Hash, ToHash}; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice::Slice; +use std::slice; #[test] fn test_hash() { let val = "aabbccd"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbcc"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbc"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); } @@ -34,20 +37,90 @@ fn test_hash_code() { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let hash_val = Hash::hash_char(&vec![0], 0, 0xbc9f1d34); + let hash_val = Hash::hash_code(&vec![], 0xbc9f1d34); assert_eq!(0xbc9f1d34, hash_val); - let hash_val = Hash::hash_char(&data1, data1.len(), 0xbc9f1d34); + let hash_val = Hash::hash_code(&data1, 0xbc9f1d34); assert_eq!(0xef1345c4, hash_val); - let hash_val = Hash::hash_char(&data2, data2.len(), 0xbc9f1d34); + let hash_val = Hash::hash_code(&data2, 0xbc9f1d34); assert_eq!(0x5b663814, hash_val); - let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); + let hash_val = Hash::hash_code(&data3, 0xbc9f1d34); assert_eq!(0x323c078f, hash_val); - let hash_val = Hash::hash_char(&data4, data4.len(), 0xbc9f1d34); + let hash_val = Hash::hash_code(&data4, 0xbc9f1d34); assert_eq!(0xed21633a, hash_val); - let hash_val = Hash::hash_char(&data5, data5.len(), 0x12345678); + let hash_val = Hash::hash_code(&data5, 0x12345678); assert_eq!(0xf333dabb, hash_val); } + +#[test] +fn test_string_to_hash() { + let val = "aabbccd"; + let hash_val_get = Hash::hash_code(val.as_bytes(), HASH_DEFAULT_SEED); + println!("hash_val_get:{}", hash_val_get); + + let val_s = String::from(val); + let string_hash_val = val_s.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + assert_eq!(hash_val_get, string_hash_val); +} + +#[test] +fn test_slice_to_hash() { + let val = "aabbccd"; + let slice: Slice = Slice::from_buf(val.as_bytes()); + let slice_hash_val = slice.to_hash(); + println!("slice_hash_val:{}", slice_hash_val); + + let hash_val_get = Hash::hash_code(slice.to_vec().as_slice(), HASH_DEFAULT_SEED); + println!("hash_code:{}", hash_val_get); + + assert_eq!(hash_val_get, slice_hash_val); +} + +#[test] +fn test_str_to_hash() { + let str = "aabbccd"; + let str_hash_val = str.to_hash(); + println!("str_hash_val:{}", str_hash_val); + + let hash_val_get = Hash::hash_code(str.as_bytes(), HASH_DEFAULT_SEED); + println!("hash_code:{}", hash_val_get); + + assert_eq!(hash_val_get, str_hash_val); +} + +#[test] +fn test_size_base_to_hash() { + // 所有基本类型 u8, i8, u16, u32 + + let buf = ['a','b','c']; + let char_hash_val = &buf.as_slice().to_hash(); + println!("char_hash_val:{}", char_hash_val); + + let buf = ["aa", "bb", "cc"].as_slice(); + let string_hash_val = &buf.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + let buf = [1, 2, u32::MAX].as_slice(); + let u32_hash_val = &buf.to_hash(); + println!("u32_hash_val:{}", u32_hash_val); +} + +#[test] +fn test_size_vec_to_hash() { + let buf = vec!['a','b','c']; + let char_hash_val = buf.to_hash(); + println!("char_hash_val:{}", char_hash_val); + + let buf = ["aa", "bb", "cc"].as_slice(); + let string_hash_val = &buf.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + let buf = [1, 2, u32::MAX].as_slice(); + let u32_hash_val = &buf.to_hash(); + println!("u32_hash_val:{}", u32_hash_val); +}