From 9f2ee41424fd89ae36386c4c34c2af62290a33ac Mon Sep 17 00:00:00 2001
From: fengyang <yueny09@163.com>
Date: Fri, 16 Dec 2022 01:03:38 +0800
Subject: [PATCH 1/5] =?UTF-8?q?=E5=88=9D=E6=AD=A5hash=E5=AE=9E=E7=8E=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 Cargo.toml            |  5 +++
 src/util/hash.rs      | 71 ++++++++++++++++++++++++++++++++++++++++---
 src/util/hash_test.rs | 11 +++++++
 3 files changed, 83 insertions(+), 4 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 7b77544..6f688ef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,3 +10,8 @@ path = "src/lib.rs"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+
+
+[profile.dev]
+
+[profile.release]
diff --git a/src/util/hash.rs b/src/util/hash.rs
index d383f2b..5a311c0 100644
--- a/src/util/hash.rs
+++ b/src/util/hash.rs
@@ -1,5 +1,11 @@
+use std::ops::{BitXor, Mul};
+use crate::traits::coding_trait::CodingTrait;
+use crate::util::coding::Coding;
 
-pub trait AsHash {
+/// 本方案中，采用的是MurMurHash的一种变体，是一种高效低碰撞的非加密型哈希函数。具有较高的平衡性与低碰撞率
+pub struct Hash {}
+
+impl<'a> Hash {
     ///
     ///
     /// # Arguments
@@ -15,7 +21,64 @@ pub trait AsHash {
     /// ```
     ///
     /// ```
-    fn hash(data: String, n: usize, seed: u32) -> u32;
-}
+    pub fn hash(data: String, data_size: usize, seed: u32) -> u32 {
+        let murmur_hash : u32 = 0xc6a4a793;
+        let r : u32 = 24;
+
+        let limit: usize = data_size;
+        let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash
+        let mut h: usize = seed.bitxor(mul_first as u32) as usize;  // h = seed ^ x
+        
+        // 每次按照四字节长度读取字节流中的数据 w，并使用普通的哈希函数计算哈希值。
+        let mut position: usize = 0;
+        while position + 4 <= limit {
+            //每次解码前4个字节，直到最后剩下小于4个字节
+            // rust的 &[u8] 是胖指针，带长度信息的，会做range check，所以是安全的。
+            let slice_str: &[u8] = data[position..(position + 4)].as_ref();
+            let w: u32 = Coding::decode_fixed32(slice_str);
+
+            // 向后移动4个字节
+            position += 4;
+
+            // /计算过程中使用了自然溢出特性
+            // h += w
+            h = h.wrapping_add(w as usize);
+            // h *= m
+            h = h.wrapping_mul(murmur_hash as usize);
+            // ^ 按位异或 bitxor , >> 右移位 shr, << 左移位 shl
+            // h ^= (h >> 16) == h ^= h.shr(16);
+            h = h.bitxor(h.wrapping_shr(16));
+        }
+
+        // 四字节读取则为了加速，最终可能剩下 3/2/1 个多余的字节，
+        // 将剩下的字节转化到 h 里面
+        let cu = limit - position;
+        match cu {
+            3 => {
+                let us: &[u8] = data[position..].as_ref();
+                h = h.wrapping_add((us[2] as u32).wrapping_shl(16) as usize);
+                h = h.wrapping_add((us[1] as u32).wrapping_shl(8) as usize);
+                h = h.wrapping_add(us[0].into());
+            },
+            2 => {
+                let us: &[u8] = data[position..].as_ref();
+                h = h.wrapping_add((us[1] as u32).wrapping_shl(8) as usize);
+                h = h.wrapping_add(us[0].into());
+            },
+            1 => {
+                let us: &[u8] = data[position..].as_ref();
+                h = h.wrapping_add(us[0].into());
+                // h *= m
+                h = h.wrapping_mul(murmur_hash as usize);
+                // h ^= (h >> r) == h ^= h.shr(r);
+                h = h.bitxor(h.wrapping_shr(r));
+            },
+            _ => {}
+        };
+
+        println!("hash usize: {}", h);
+        println!("hash u32: {}", h as u32);
 
-pub struct Hash {}
\ No newline at end of file
+        h as u32
+    }
+}
\ No newline at end of file
diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs
index 0384579..e2564a0 100644
--- a/src/util/hash_test.rs
+++ b/src/util/hash_test.rs
@@ -1,6 +1,17 @@
+use crate::util::hash::{Hash};
 
 #[test]
 fn test_hash() {
+    let val = "aabbccd";
+    let hash_val = Hash::hash(String::from(val), val.len(), 3);
+    println!("hash:{}", hash_val);
 
+    let val = "aabbcc";
+    let hash_val = Hash::hash(String::from(val), val.len(), 3);
+    println!("hash:{}", hash_val);
+
+    let val = "aabbc";
+    let hash_val = Hash::hash(String::from(val), val.len(), 3);
+    println!("hash:{}", hash_val);
 
 }
\ No newline at end of file
-- 
Gitee


From bff8833b7dba6058bcbc11e2bb215a133fba91b4 Mon Sep 17 00:00:00 2001
From: fengyang <yueny09@163.com>
Date: Fri, 16 Dec 2022 14:03:42 +0800
Subject: [PATCH 2/5] =?UTF-8?q?=E5=88=9D=E6=AD=A5hash=E5=AE=9E=E7=8E=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/util/hash.rs      | 13 +++++++++++--
 src/util/hash_test.rs |  9 +++++++++
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/src/util/hash.rs b/src/util/hash.rs
index 5a311c0..17aea26 100644
--- a/src/util/hash.rs
+++ b/src/util/hash.rs
@@ -21,14 +21,23 @@ impl<'a> Hash {
     /// ```
     ///
     /// ```
-    pub fn hash(data: String, data_size: usize, seed: u32) -> u32 {
+    pub fn hash(mut data: String, data_size: usize, seed: u32) -> u32 {
+        let data_u8_vec;
+        unsafe {
+            data_u8_vec = data.as_mut_vec();
+        }
+
+        Hash::hash_char(data_u8_vec, data_size, seed)
+    }
+
+    pub fn hash_char(data: &Vec<u8>, data_size: usize, seed: u32) -> u32 {
         let murmur_hash : u32 = 0xc6a4a793;
         let r : u32 = 24;
 
         let limit: usize = data_size;
         let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash
         let mut h: usize = seed.bitxor(mul_first as u32) as usize;  // h = seed ^ x
-        
+
         // 每次按照四字节长度读取字节流中的数据 w，并使用普通的哈希函数计算哈希值。
         let mut position: usize = 0;
         while position + 4 <= limit {
diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs
index e2564a0..29ece53 100644
--- a/src/util/hash_test.rs
+++ b/src/util/hash_test.rs
@@ -13,5 +13,14 @@ fn test_hash() {
     let val = "aabbc";
     let hash_val = Hash::hash(String::from(val), val.len(), 3);
     println!("hash:{}", hash_val);
+}
 
+#[test]
+fn test_hash_code() {
+    let data4: Vec<u8> = vec![0xe1, 0x80, 0xb9, 0x32];
+
+    let hash_val = Hash::hash_char(&data4, data4.len(), 3);
+    println!("hash:{}", hash_val);
+    // 3978388282
+    // assert_eq!(0xed21633a, hash_val);
 }
\ No newline at end of file
-- 
Gitee


From 7f5cef6fa5f234da367525f381f7c78f0dfa03b6 Mon Sep 17 00:00:00 2001
From: fengyang <yueny09@163.com>
Date: Fri, 16 Dec 2022 22:13:34 +0800
Subject: [PATCH 3/5] =?UTF-8?q?hash=E5=AE=9E=E7=8E=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/util/hash.rs      | 69 ++++++++++++++++++++++++-------------------
 src/util/hash_test.rs | 37 +++++++++++++++++++----
 2 files changed, 71 insertions(+), 35 deletions(-)

diff --git a/src/util/hash.rs b/src/util/hash.rs
index 17aea26..a134fc7 100644
--- a/src/util/hash.rs
+++ b/src/util/hash.rs
@@ -19,7 +19,9 @@ impl<'a> Hash {
     /// # Examples
     ///
     /// ```
-    ///
+    /// let data3: Vec<u8> = vec![0xe2, 0x99, 0xa5];
+    /// let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34);    ///
+    /// assert_eq!(0x323c078f, hash_val);
     /// ```
     pub fn hash(mut data: String, data_size: usize, seed: u32) -> u32 {
         let data_u8_vec;
@@ -36,7 +38,7 @@ impl<'a> Hash {
 
         let limit: usize = data_size;
         let mul_first = data_size.mul(murmur_hash as usize); // x = data_size * murmur_hash
-        let mut h: usize = seed.bitxor(mul_first as u32) as usize;  // h = seed ^ x
+        let mut h: u32 = seed.bitxor(mul_first as u32);  // h = seed ^ x
 
         // 每次按照四字节长度读取字节流中的数据 w，并使用普通的哈希函数计算哈希值。
         let mut position: usize = 0;
@@ -51,9 +53,9 @@ impl<'a> Hash {
 
             // /计算过程中使用了自然溢出特性
             // h += w
-            h = h.wrapping_add(w as usize);
+            h = h.wrapping_add(w);
             // h *= m
-            h = h.wrapping_mul(murmur_hash as usize);
+            h = h.wrapping_mul(murmur_hash);
             // ^ 按位异或 bitxor , >> 右移位 shr, << 左移位 shl
             // h ^= (h >> 16) == h ^= h.shr(16);
             h = h.bitxor(h.wrapping_shr(16));
@@ -61,33 +63,40 @@ impl<'a> Hash {
 
         // 四字节读取则为了加速，最终可能剩下 3/2/1 个多余的字节，
         // 将剩下的字节转化到 h 里面
-        let cu = limit - position;
-        match cu {
-            3 => {
-                let us: &[u8] = data[position..].as_ref();
-                h = h.wrapping_add((us[2] as u32).wrapping_shl(16) as usize);
-                h = h.wrapping_add((us[1] as u32).wrapping_shl(8) as usize);
-                h = h.wrapping_add(us[0].into());
-            },
-            2 => {
-                let us: &[u8] = data[position..].as_ref();
-                h = h.wrapping_add((us[1] as u32).wrapping_shl(8) as usize);
-                h = h.wrapping_add(us[0].into());
-            },
-            1 => {
-                let us: &[u8] = data[position..].as_ref();
-                h = h.wrapping_add(us[0].into());
-                // h *= m
-                h = h.wrapping_mul(murmur_hash as usize);
-                // h ^= (h >> r) == h ^= h.shr(r);
-                h = h.bitxor(h.wrapping_shr(r));
-            },
-            _ => {}
-        };
+        let mut mark: usize = 0;
+        while limit - position - mark != 0 {
+            match limit - position - mark {
+                3 => {
+                    let us: &[u8] = data[position..].as_ref();
+                    let as_us: u32 = us[2] as u32;
+                    h = h.wrapping_add(as_us.wrapping_shl(16));
+
+                    mark += 1;
+                },
+                2 => {
+                    let us: &[u8] = data[position..].as_ref();
+                    let as_us: u32 = us[1] as u32;
+                    h = h.wrapping_add( as_us.wrapping_shl(8));
 
-        println!("hash usize: {}", h);
-        println!("hash u32: {}", h as u32);
+                    mark += 1;
+                },
+                1 => {
+                    let us: &[u8] = data[position..].as_ref();
+                    let as_us: u32 = us[0] as u32;
+                    h = h.wrapping_add(as_us);
+                    // h *= m
+                    h = h.wrapping_mul(murmur_hash);
+                    // h ^= (h >> r) ==> h ^= h.shr(r);
+                    h = h.bitxor(h.wrapping_shr(r));
+
+                    mark += 1;
+                },
+                _ => {
+                    println!("0")
+                }
+            };
+        }
 
-        h as u32
+        h
     }
 }
\ No newline at end of file
diff --git a/src/util/hash_test.rs b/src/util/hash_test.rs
index 29ece53..770192b 100644
--- a/src/util/hash_test.rs
+++ b/src/util/hash_test.rs
@@ -17,10 +17,37 @@ fn test_hash() {
 
 #[test]
 fn test_hash_code() {
+    let data1: Vec<u8> = vec![0x62];
+    let data2: Vec<u8> = vec![0xc3, 0x97];
+    let data3: Vec<u8> = vec![0xe2, 0x99, 0xa5];
     let data4: Vec<u8> = vec![0xe1, 0x80, 0xb9, 0x32];
+    let data5: Vec<u8> = vec![0x01, 0xc0, 0x00, 0x00,
+                               0x00, 0x00, 0x00, 0x00,
+                               0x00, 0x00, 0x00, 0x00,
+                               0x00, 0x00, 0x00, 0x00,
+                               0x14, 0x00, 0x00, 0x00,
+                               0x00, 0x00, 0x04, 0x00,
+                               0x00, 0x00, 0x00, 0x14,
+                               0x00, 0x00, 0x00, 0x18,
+                               0x28, 0x00, 0x00, 0x00,
+                               0x00, 0x00, 0x00, 0x00,
+                               0x02, 0x00, 0x00, 0x00,
+                               0x00, 0x00, 0x00, 0x00];
 
-    let hash_val = Hash::hash_char(&data4, data4.len(), 3);
-    println!("hash:{}", hash_val);
-    // 3978388282
-    // assert_eq!(0xed21633a, hash_val);
-}
\ No newline at end of file
+    let hash_val = Hash::hash_char(&vec![0], 0, 0xbc9f1d34);
+    assert_eq!(0xbc9f1d34, hash_val);
+
+    let hash_val = Hash::hash_char(&data1, data1.len(), 0xbc9f1d34);
+    assert_eq!(0xef1345c4, hash_val);
+
+    let hash_val = Hash::hash_char(&data2, data2.len(), 0xbc9f1d34);
+    assert_eq!(0x5b663814, hash_val);
+    let hash_val = Hash::hash_char(&data3, data3.len(), 0xbc9f1d34);
+    assert_eq!(0x323c078f, hash_val);
+
+    let hash_val = Hash::hash_char(&data4, data4.len(), 0xbc9f1d34);
+    assert_eq!(0xed21633a, hash_val);
+
+    let hash_val = Hash::hash_char(&data5, data5.len(), 0x12345678);
+    assert_eq!(0xf333dabb, hash_val);
+}
-- 
Gitee


From 8a71d815a72a28a1c0af2913851af6980b24f499 Mon Sep 17 00:00:00 2001
From: fengyang <yueny09@163.com>
Date: Fri, 16 Dec 2022 22:24:03 +0800
Subject: [PATCH 4/5] =?UTF-8?q?FilterPolicy=20=E5=AE=9A=E4=B9=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                         | 2 +-
 src/traits/filter_policy_trait.rs | 7 +++++++
 src/util/filter_policy.rs         | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b7d8eb0..dc51f61 100644
--- a/README.md
+++ b/README.md
@@ -49,6 +49,6 @@ LevelDB for rust
 | BloomFilter                   | fengyang        | 0%   |
 | CRC                           | wangboo、lxd5866 |      |
 | Env                           | lxd5866         |      |
-| Hash                          | fengyang         | 30%  |
+| Hash                          | fengyang         | 100%  |
 | MutexLock                     | kazeseiriou     |      |
 | Histgram                      | kazeseiriou     |      |
\ No newline at end of file
diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs
index fcfd19d..c751282 100644
--- a/src/traits/filter_policy_trait.rs
+++ b/src/traits/filter_policy_trait.rs
@@ -1,9 +1,16 @@
 use crate::util::slice::Slice;
 
+/// 用于key过滤，可以快速的排除不存在的key
 pub trait FilterPolicy {
 
+    /// filter的名字
+    /// Return the name of this policy.  Note that if the filter encoding
+    /// changes in an incompatible way, the name returned by this method
+    /// must be changed.  Otherwise, old incompatible filters may be
+    /// passed to methods of this type.
     fn name() -> String;
 
     fn create_filter(&self, keys: Slice, n: u32) -> String;
 
+    fn key_may_match(key: &Slice, filter: &Slice) -> bool;
 }
\ No newline at end of file
diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs
index 9e9811e..ca88fa3 100644
--- a/src/util/filter_policy.rs
+++ b/src/util/filter_policy.rs
@@ -15,4 +15,8 @@ impl FilterPolicy for BloomFilterPolicy {
     fn create_filter(&self, keys: Slice, n: u32) -> String {
         todo!()
     }
+
+    fn key_may_match(key: &Slice, filter: &Slice) -> bool {
+        todo!()
+    }
 }
\ No newline at end of file
-- 
Gitee


From a4f1e02eb60070ed401cbd03da70f87414d895de Mon Sep 17 00:00:00 2001
From: fengyang <yueny09@163.com>
Date: Fri, 16 Dec 2022 22:33:01 +0800
Subject: [PATCH 5/5] =?UTF-8?q?FilterPolicy=20=E5=AE=9A=E4=B9=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/traits/filter_policy_trait.rs | 26 +++++++++++++++++++++++++-
 src/util/filter_policy.rs         |  2 +-
 src/util/hash.rs                  |  2 +-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/src/traits/filter_policy_trait.rs b/src/traits/filter_policy_trait.rs
index c751282..2ec339d 100644
--- a/src/traits/filter_policy_trait.rs
+++ b/src/traits/filter_policy_trait.rs
@@ -10,7 +10,31 @@ pub trait FilterPolicy {
     /// passed to methods of this type.
     fn name() -> String;
 
-    fn create_filter(&self, keys: Slice, n: u32) -> String;
+    /// 根据指定的参数创建过滤器，并返回结果， 结果为dst的原始内容 + append结果。
+    /// 参数keys[0,n-1]包含依据用户提供的comparator排序的key列表--可重复，
+    /// 并把根据这些key创建的filter追加到 dst中。
+    ///
+    /// keys[0,n-1] contains a list of keys (potentially with duplicates)
+    /// that are ordered according to the user supplied comparator.
+    /// Append a filter that summarizes keys[0,n-1] to *dst.
+    ///
+    /// Warning: do not change the initial contents of dst.  Instead,
+    /// append the newly constructed filter to dst.
+    ///
+    /// # Arguments
+    ///
+    /// * `keys`:
+    /// * `n`:
+    /// * `dst`:
+    ///
+    /// returns: String
+    ///
+    /// # Examples
+    ///
+    /// ```
+    ///
+    /// ```
+    fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String;
 
     fn key_may_match(key: &Slice, filter: &Slice) -> bool;
 }
\ No newline at end of file
diff --git a/src/util/filter_policy.rs b/src/util/filter_policy.rs
index ca88fa3..b40e0dd 100644
--- a/src/util/filter_policy.rs
+++ b/src/util/filter_policy.rs
@@ -12,7 +12,7 @@ impl FilterPolicy for BloomFilterPolicy {
         String::from("leveldb.BuiltinBloomFilter2")
     }
 
-    fn create_filter(&self, keys: Slice, n: u32) -> String {
+    fn create_filter(&self, keys: Slice, n: u32, dst: String) -> String {
         todo!()
     }
 
diff --git a/src/util/hash.rs b/src/util/hash.rs
index a134fc7..ea82575 100644
--- a/src/util/hash.rs
+++ b/src/util/hash.rs
@@ -6,7 +6,7 @@ use crate::util::coding::Coding;
 pub struct Hash {}
 
 impl<'a> Hash {
-    ///
+    /// 计算 data 的 hash
     ///
     /// # Arguments
     ///
-- 
Gitee