From 4026c7e469079ada35f1f3afc14e7e58b682dc5c Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Sat, 17 Dec 2022 11:01:35 +0800 Subject: [PATCH 1/5] fix some code, add ToHash trait --- src/util/hash.rs | 98 ++++++++++++++++++++++++------------------- src/util/hash_test.rs | 18 ++++---- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/src/util/hash.rs b/src/util/hash.rs index ea82575..2fd4e0b 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -1,43 +1,59 @@ use std::ops::{BitXor, Mul}; + use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; +use crate::util::slice::Slice; -/// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 -pub struct Hash {} +/// 一种可以计算 hash 的特质 +pub trait ToHash { + fn to_hash(&self) -> u32; +} -impl<'a> Hash { - /// 计算 data 的 hash - /// - /// # Arguments - /// - /// * `data`: - /// * `n`: data 的长度 - /// * `seed`: 随机数种子 - /// - /// returns: u32 - /// - /// # Examples - /// - /// ``` - /// let data3: Vec = vec![0xe2, 0x99, 0xa5]; - /// let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); /// - /// assert_eq!(0x323c078f, hash_val); - /// ``` - pub fn hash(mut data: String, data_size: usize, seed: u32) -> u32 { - let data_u8_vec; - unsafe { - data_u8_vec = data.as_mut_vec(); - } +/// 所有基本类型 u8, i8, u16, u32 ... 的数组都可以实现 hash 值计算 +/// Sample: +/// ``` +/// let hash = vec!['a','b','c'].to_hash(); +/// ``` +impl ToHash for Vec { + fn to_hash(&self) -> u32 { + todo!() + } +} + +/// 所有基本类型 u8, i8, u16, u32 ... 的slice都可以实现 hash 值计算 +/// Sample: +/// ``` +/// let buf = ['a','b','c']; +/// let hash = &buf.to_hash(); +/// ``` +impl ToHash for &[T] { + fn to_hash(&self) -> u32 { + todo!() + } +} - Hash::hash_char(data_u8_vec, data_size, seed) +impl ToHash for Slice { + fn to_hash(&self) -> u32 { + todo!() } +} - pub fn hash_char(data: &Vec, data_size: usize, seed: u32) -> u32 { - let murmur_hash : u32 = 0xc6a4a793; - let r : u32 = 24; +impl ToHash for String { + fn to_hash(&self) -> u32 { + todo!() + } +} - let limit: usize = data_size; - let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash +/// 本方案中,采用的是MurMurHash的一种变体,是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率 +pub struct Hash {} + +impl Hash { + pub fn hash_char(data: &[u8], seed: u32) -> u32 { + let murmur_hash: u32 = 0xc6a4a793; + let r: u32 = 24; + + let limit: usize = data.len(); + let mul_first = limit.mul(murmur_hash as usize); // x = data_size * murmur_hash let mut h: u32 = seed.bitxor(mul_first as u32); // h = seed ^ x // 每次按照四字节长度读取字节流中的数据 w,并使用普通的哈希函数计算哈希值。 @@ -45,8 +61,7 @@ impl<'a> Hash { while position + 4 <= limit { //每次解码前4个字节,直到最后剩下小于4个字节 // rust的 &[u8] 是胖指针,带长度信息的,会做range check,所以是安全的。 - let slice_str: &[u8] = data[position..(position + 4)].as_ref(); - let w: u32 = Coding::decode_fixed32(slice_str); + let w = Coding::decode_fixed32(&data[position..]); // 向后移动4个字节 position += 4; @@ -67,22 +82,19 @@ impl<'a> Hash { while limit - position - mark != 0 { match limit - position - mark { 3 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[2] as u32; + let as_us: u32 = data[position + 2] as u32; h = h.wrapping_add(as_us.wrapping_shl(16)); mark += 1; - }, + } 2 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[1] as u32; - h = h.wrapping_add( as_us.wrapping_shl(8)); + let as_us: u32 = data[position + 1] as u32; + h = h.wrapping_add(as_us.wrapping_shl(8)); mark += 1; - }, + } 1 => { - let us: &[u8] = data[position..].as_ref(); - let as_us: u32 = us[0] as u32; + let as_us: u32 = data[position] as u32; h = h.wrapping_add(as_us); // h *= m h = h.wrapping_mul(murmur_hash); @@ -90,7 +102,7 @@ impl<'a> Hash { h = h.bitxor(h.wrapping_shr(r)); mark += 1; - }, + } _ => { println!("0") } diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 770192b..e02bd4b 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -3,15 +3,15 @@ use crate::util::hash::{Hash}; #[test] fn test_hash() { let val = "aabbccd"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_char(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbcc"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_char(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbc"; - let hash_val = Hash::hash(String::from(val), val.len(), 3); + let hash_val = Hash::hash_char(val.as_bytes(), 3); println!("hash:{}", hash_val); } @@ -34,20 +34,20 @@ fn test_hash_code() { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let hash_val = Hash::hash_char(&vec![0], 0, 0xbc9f1d34); + let hash_val = Hash::hash_char(&vec![0],0xbc9f1d34); assert_eq!(0xbc9f1d34, hash_val); - let hash_val = Hash::hash_char(&data1, data1.len(), 0xbc9f1d34); + let hash_val = Hash::hash_char(&data1, 0xbc9f1d34); assert_eq!(0xef1345c4, hash_val); - let hash_val = Hash::hash_char(&data2, data2.len(), 0xbc9f1d34); + let hash_val = Hash::hash_char(&data2, 0xbc9f1d34); assert_eq!(0x5b663814, hash_val); - let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34); + let hash_val = Hash::hash_char(&data3, 0xbc9f1d34); assert_eq!(0x323c078f, hash_val); - let hash_val = Hash::hash_char(&data4, data4.len(), 0xbc9f1d34); + let hash_val = Hash::hash_char(&data4, 0xbc9f1d34); assert_eq!(0xed21633a, hash_val); - let hash_val = Hash::hash_char(&data5, data5.len(), 0x12345678); + let hash_val = Hash::hash_char(&data5, 0x12345678); assert_eq!(0xf333dabb, hash_val); } -- Gitee From 8b2d857e28525bf9056059c24a1830150bc3d999 Mon Sep 17 00:00:00 2001 From: wangboo <5417808+wangboa@user.noreply.gitee.com> Date: Sat, 17 Dec 2022 11:04:11 +0800 Subject: [PATCH 2/5] add &str to impl ToHash trait --- src/util/hash.rs | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/util/hash.rs b/src/util/hash.rs index 2fd4e0b..ec6ef7e 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -32,6 +32,17 @@ impl ToHash for &[T] { } } +/// 实现了 &str 转 ToHash 的特质 +/// Sample: +/// ``` +/// let hash = "abc".to_hash(); +/// ``` +impl ToHash for &str { + fn to_hash(&self) -> u32 { + todo!() + } +} + impl ToHash for Slice { fn to_hash(&self) -> u32 { todo!() -- Gitee From a96294a65c4ed792b6041020fd678c47eb278865 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 17 Dec 2022 12:09:59 +0800 Subject: [PATCH 3/5] hash test fix --- src/util/hash.rs | 3 ++- src/util/hash_test.rs | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/util/hash.rs b/src/util/hash.rs index ec6ef7e..c60f0c1 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -72,7 +72,8 @@ impl Hash { while position + 4 <= limit { //每次解码前4个字节,直到最后剩下小于4个字节 // rust的 &[u8] 是胖指针,带长度信息的,会做range check,所以是安全的。 - let w = Coding::decode_fixed32(&data[position..]); + // 虽然decode_fixed32 中也是解码4字节,但传入整个data在方法上不明确,因此传 [position..(position + 4)], 可以更加方便理解,对性能无影响 + let w = Coding::decode_fixed32(&data[position..(position + 4)]); // 向后移动4个字节 position += 4; diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index e02bd4b..06b6168 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -34,7 +34,7 @@ fn test_hash_code() { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let hash_val = Hash::hash_char(&vec![0],0xbc9f1d34); + let hash_val = Hash::hash_char(&vec![],0xbc9f1d34); assert_eq!(0xbc9f1d34, hash_val); let hash_val = Hash::hash_char(&data1, 0xbc9f1d34); -- Gitee From dcee6c20f6195dec93050627c382b0fb64ca7705 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 17 Dec 2022 14:34:35 +0800 Subject: [PATCH 4/5] =?UTF-8?q?ToHash=20=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 4 +- src/util/const.rs | 3 ++ src/util/hash.rs | 28 +++++++++---- src/util/hash_test.rs | 93 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 109 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index dc51f61..da9d596 100644 --- a/README.md +++ b/README.md @@ -46,9 +46,9 @@ LevelDB for rust | Coding (Primitive Type SerDe) | colagy | | | Comparator | fengyang | 85% | | Status | fengyang | 100% | -| BloomFilter | fengyang | 0% | +| BloomFilter | fengyang | 10% | | CRC | wangboo、lxd5866 | | | Env | lxd5866 | | -| Hash | fengyang | 100% | +| Hash | fengyang | 100% | | MutexLock | kazeseiriou | | | Histgram | kazeseiriou | | \ No newline at end of file diff --git a/src/util/const.rs b/src/util/const.rs index e7d7eac..b57d833 100644 --- a/src/util/const.rs +++ b/src/util/const.rs @@ -2,3 +2,6 @@ /// 冒号 + 空格 pub const COLON_WHITE_SPACE: &'static str = ": "; + +/// hash 的默认seed +pub const HASH_DEFAULT_SEED: u32 = 0xbc9f1d34; diff --git a/src/util/hash.rs b/src/util/hash.rs index c60f0c1..bc8c601 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -1,7 +1,12 @@ use std::ops::{BitXor, Mul}; +use std::mem::size_of; +use std::slice as stds; use crate::traits::coding_trait::CodingTrait; use crate::util::coding::Coding; +use crate::util::crc::AsCrc; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice; use crate::util::slice::Slice; /// 一种可以计算 hash 的特质 @@ -16,7 +21,9 @@ pub trait ToHash { /// ``` impl ToHash for Vec { fn to_hash(&self) -> u32 { - todo!() + let v_v = self.as_slice(); + + v_v.to_hash() } } @@ -24,11 +31,18 @@ impl ToHash for Vec { /// Sample: /// ``` /// let buf = ['a','b','c']; -/// let hash = &buf.to_hash(); +/// let hash_val = &buf.as_slice().to_hash(); /// ``` impl ToHash for &[T] { + #[inline] fn to_hash(&self) -> u32 { - todo!() + let ptr_u8 = self.as_ptr() as *const _ as *const u8; + + let data = unsafe { + stds::from_raw_parts(ptr_u8, size_of::() * self.len()) + }; + + Hash::hash_code(data, HASH_DEFAULT_SEED) } } @@ -39,19 +53,19 @@ impl ToHash for &[T] { /// ``` impl ToHash for &str { fn to_hash(&self) -> u32 { - todo!() + Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) } } impl ToHash for Slice { fn to_hash(&self) -> u32 { - todo!() + Hash::hash_code(self.to_vec().as_slice(), HASH_DEFAULT_SEED) } } impl ToHash for String { fn to_hash(&self) -> u32 { - todo!() + Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) } } @@ -59,7 +73,7 @@ impl ToHash for String { pub struct Hash {} impl Hash { - pub fn hash_char(data: &[u8], seed: u32) -> u32 { + pub fn hash_code(data: &[u8], seed: u32) -> u32 { let murmur_hash: u32 = 0xc6a4a793; let r: u32 = 24; diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs index 06b6168..81dce89 100644 --- a/src/util/hash_test.rs +++ b/src/util/hash_test.rs @@ -1,17 +1,20 @@ -use crate::util::hash::{Hash}; +use crate::util::hash::{Hash, ToHash}; +use crate::util::r#const::HASH_DEFAULT_SEED; +use crate::util::slice::Slice; +use std::slice; #[test] fn test_hash() { let val = "aabbccd"; - let hash_val = Hash::hash_char(val.as_bytes(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbcc"; - let hash_val = Hash::hash_char(val.as_bytes(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); let val = "aabbc"; - let hash_val = Hash::hash_char(val.as_bytes(), 3); + let hash_val = Hash::hash_code(val.as_bytes(), 3); println!("hash:{}", hash_val); } @@ -34,20 +37,90 @@ fn test_hash_code() { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]; - let hash_val = Hash::hash_char(&vec![],0xbc9f1d34); + let hash_val = Hash::hash_code(&vec![], 0xbc9f1d34); assert_eq!(0xbc9f1d34, hash_val); - let hash_val = Hash::hash_char(&data1, 0xbc9f1d34); + let hash_val = Hash::hash_code(&data1, 0xbc9f1d34); assert_eq!(0xef1345c4, hash_val); - let hash_val = Hash::hash_char(&data2, 0xbc9f1d34); + let hash_val = Hash::hash_code(&data2, 0xbc9f1d34); assert_eq!(0x5b663814, hash_val); - let hash_val = Hash::hash_char(&data3, 0xbc9f1d34); + let hash_val = Hash::hash_code(&data3, 0xbc9f1d34); assert_eq!(0x323c078f, hash_val); - let hash_val = Hash::hash_char(&data4, 0xbc9f1d34); + let hash_val = Hash::hash_code(&data4, 0xbc9f1d34); assert_eq!(0xed21633a, hash_val); - let hash_val = Hash::hash_char(&data5, 0x12345678); + let hash_val = Hash::hash_code(&data5, 0x12345678); assert_eq!(0xf333dabb, hash_val); } + +#[test] +fn test_string_to_hash() { + let val = "aabbccd"; + let hash_val_get = Hash::hash_code(val.as_bytes(), HASH_DEFAULT_SEED); + println!("hash_val_get:{}", hash_val_get); + + let val_s = String::from(val); + let string_hash_val = val_s.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + assert_eq!(hash_val_get, string_hash_val); +} + +#[test] +fn test_slice_to_hash() { + let val = "aabbccd"; + let slice: Slice = Slice::from_buf(val.as_bytes()); + let slice_hash_val = slice.to_hash(); + println!("slice_hash_val:{}", slice_hash_val); + + let hash_val_get = Hash::hash_code(slice.to_vec().as_slice(), HASH_DEFAULT_SEED); + println!("hash_code:{}", hash_val_get); + + assert_eq!(hash_val_get, slice_hash_val); +} + +#[test] +fn test_str_to_hash() { + let str = "aabbccd"; + let str_hash_val = str.to_hash(); + println!("str_hash_val:{}", str_hash_val); + + let hash_val_get = Hash::hash_code(str.as_bytes(), HASH_DEFAULT_SEED); + println!("hash_code:{}", hash_val_get); + + assert_eq!(hash_val_get, str_hash_val); +} + +#[test] +fn test_size_base_to_hash() { + // 所有基本类型 u8, i8, u16, u32 + + let buf = ['a','b','c']; + let char_hash_val = &buf.as_slice().to_hash(); + println!("char_hash_val:{}", char_hash_val); + + let buf = ["aa", "bb", "cc"].as_slice(); + let string_hash_val = &buf.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + let buf = [1, 2, u32::MAX].as_slice(); + let u32_hash_val = &buf.to_hash(); + println!("u32_hash_val:{}", u32_hash_val); +} + +#[test] +fn test_size_vec_to_hash() { + let buf = vec!['a','b','c']; + let char_hash_val = buf.to_hash(); + println!("char_hash_val:{}", char_hash_val); + + let buf = ["aa", "bb", "cc"].as_slice(); + let string_hash_val = &buf.to_hash(); + println!("string_hash_val:{}", string_hash_val); + + let buf = [1, 2, u32::MAX].as_slice(); + let u32_hash_val = &buf.to_hash(); + println!("u32_hash_val:{}", u32_hash_val); +} -- Gitee From 49aac72cc39d37e76422d1af0fff52bf25d4e5f8 Mon Sep 17 00:00:00 2001 From: fengyang Date: Sat, 17 Dec 2022 14:38:52 +0800 Subject: [PATCH 5/5] ToHash doc --- README.md | 2 +- src/util/hash.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index da9d596..cb49cd3 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ LevelDB for rust | Random | colagy | | | Cache | colagy | | | Coding (Primitive Type SerDe) | colagy | | -| Comparator | fengyang | 85% | +| Comparator | fengyang | 90% | | Status | fengyang | 100% | | BloomFilter | fengyang | 10% | | CRC | wangboo、lxd5866 | | diff --git a/src/util/hash.rs b/src/util/hash.rs index bc8c601..fef5dc5 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -57,12 +57,26 @@ impl ToHash for &str { } } +/// 实现了 Slice 转 ToHash 的特质 +/// Sample: +/// ``` +/// let val = "aabbccd"; +/// let slice: Slice = Slice::from_buf(val.as_bytes()); +/// let slice_hash_val = slice.to_hash(); +/// ``` impl ToHash for Slice { fn to_hash(&self) -> u32 { Hash::hash_code(self.to_vec().as_slice(), HASH_DEFAULT_SEED) } } +/// 实现了 String 转 ToHash 的特质 +/// Sample: +/// ``` +/// let val = "aabbccd"; +/// let val_s = String::from(val); +/// let string_hash_val = val_s.to_hash(); +/// ``` impl ToHash for String { fn to_hash(&self) -> u32 { Hash::hash_code(self.as_bytes(), HASH_DEFAULT_SEED) -- Gitee