From 0903c1e5054638aba94785d0638ee84bfdf54ff1 Mon Sep 17 00:00:00 2001 From: xuxu9110 Date: Mon, 14 Jul 2025 01:43:40 +0000 Subject: [PATCH 1/2] update common_components/platform/arm64/string_hash_internal.h. Signed-off-by: xuxu9110 --- .../platform/arm64/string_hash_internal.h | 159 ++++++++---------- 1 file changed, 71 insertions(+), 88 deletions(-) diff --git a/common_components/platform/arm64/string_hash_internal.h b/common_components/platform/arm64/string_hash_internal.h index 699fdec49d..9559ab7ff8 100644 --- a/common_components/platform/arm64/string_hash_internal.h +++ b/common_components/platform/arm64/string_hash_internal.h @@ -27,103 +27,86 @@ namespace common { class StringHashInternal { friend class StringHashHelper; private: -#if ENABLE_NEXT_OPTIMIZATION + static const size_t UINT8_LOOP_SIZE = 16; + static const size_t UINT16_LOOP_SIZE = 8; +private: template - static uint32_t ComputeHashForDataOfLongString(const T *data, size_t size, - uint32_t hashSeed) + static uint32_t ComputeHashForDataOfLongString(const T* data, + size_t size, uint32_t hashSeed) { - /** - * process the first {remainder} items of data[] and hashSeed - * for example, if remainder = 2, - * then hash[2] = data[0] * 31^1, hash[3] = data[1] * 31^0; - * hash[0] = hashSeed * 31^{remainder} - * - * the rest elements in data[] will be processed with for loop as follows - * hash[0]: hash[0] * 31^4 + data[i] * 31^3 - * hash[1]: hash[1] * 31^4 + data[i+1] * 31^2 - * hash[2]: hash[2] * 31^4 + data[i+2] * 31^1 - * hash[3]: hash[3] * 31^4 + data[i+3] * 31^0 - * i starts at {remainder} and every time += 4, - * at last, totolHash = hash[0] + hash[1] + hash[2] + hash[3]; - */ - static_assert(std::is_same_v || std::is_same_v); - constexpr size_t blockSize = StringHash::BLOCK_SIZE; - constexpr size_t loopSize = StringHash::SIMD_U8_LOOP_SIZE; - uint32_t hash[blockSize] = {}; - uint32_t index = 0; - uint32_t remainder = size & (blockSize - 1); - switch (remainder) { -#define CASE(N) case (N): \ - hash[blockSize - (N)] = data[index++] * StringHash::MULTIPLIER[blockSize - (N)]; [[fallthrough]] - CASE(StringHash::SIZE_3); - CASE(StringHash::SIZE_2); - CASE(StringHash::SIZE_1); -#undef CASE - default: - break; - } - hash[0] += hashSeed * StringHash::MULTIPLIER[blockSize - 1 - remainder]; - - uint32x4_t dataVec; - uint32x4_t hashVec; - uint32x4_t multiplierVec = vld1q_u32(StringHash::MULTIPLIER); - uint32x4_t scaleVec = vdupq_n_u32(StringHash::BLOCK_MULTIPLY); - - if constexpr (std::is_same_v) { - // process 4 elements with for loop if (size-index) % 8 = 4 - if ((size - index) % loopSize == blockSize) { - for (size_t i = 0; i < blockSize; i++) { - hash[i] = hash[i] * StringHash::BLOCK_MULTIPLY + data[index++] * StringHash::MULTIPLIER[i]; - } - } - hashVec = vld1q_u32(hash); - for (; index < size; index += loopSize) { - uint8x8_t dataVec8 = vld1_u8(data + index); - uint16x8_t dataVec16 = vmovl_u8(dataVec8); - dataVec = vmovl_u16(vget_low_u16(dataVec16)); - hashVec = vaddq_u32(vmulq_u32(hashVec, scaleVec), vmulq_u32(dataVec, multiplierVec)); - dataVec = vmovl_u16(vget_high_u16(dataVec16)); - hashVec = vaddq_u32(vmulq_u32(hashVec, scaleVec), vmulq_u32(dataVec, multiplierVec)); - } - } else { - hashVec = vld1q_u32(hash); - for (; index < size; index += blockSize) { - dataVec = vmovl_u16(vld1_u16(data + index)); - hashVec = vaddq_u32(vmulq_u32(hashVec, scaleVec), vmulq_u32(dataVec, multiplierVec)); - } + uint32_t hash = hashSeed; + for (uint32_t i = 0; i < size; i++) { + hash = (hash << static_cast(StringHash::HASH_SHIFT)) - hash + data[i]; } - return vaddvq_u32(hashVec); + return hash; } -#else - template - static uint32_t ComputeHashForDataOfLongString(const T *data, size_t size, - uint32_t hashSeed) + + template <> + uint32_t ComputeHashForDataOfLongString(const uint8_t *data, + size_t size, uint32_t hashSeed) { - constexpr uint32_t hashShift = static_cast(StringHash::HASH_SHIFT); - constexpr uint32_t blockSize = static_cast(StringHash::BLOCK_SIZE); - uint32_t hash[blockSize] = {0}; - uint32_t index = 0; - uint32x4_t hashVec = vld1q_u32(hash); - uint32x4_t multiplier_vec = vdupq_n_u32(static_cast(StringHash::HASH_MULTIPLY)); - uint32x4_t dataVec; - for (; index + blockSize <= size; index += blockSize) { - dataVec[0] = data[index]; - dataVec[1] = data[index + 1]; // 1: the second element of the block - dataVec[2] = data[index + 2]; // 2: the third element of the block - dataVec[3] = data[index + 3]; // 3: the fourth element of the block - hashVec = vaddq_u32(vmulq_u32(hashVec, multiplier_vec), dataVec); + const uint32x4_t multiplierVec = vld1q_u32(StringHash::MULTIPLIER); + constexpr uint32_t multiplierHash = StringHash::MULTIPLIER[0] * StringHash::MULTIPLIER[2]; + + uint32_t hash = hashSeed; + const uint8_t *dataEnd = data + size; + const uint8_t *vecEnd = data + (size & (~15)); + const uint8_t *p = data; + for (; p < vecEnd; p += UINT8_LOOP_SIZE) { + uint8x16_t dataVec8 = vld1q_u8(p); + uint16x8_t dataVec16_1 = vmovl_u8(vget_low_u16(dataVec8)); + uint16x8_t dataVec16_2 = vmovl_u8(vget_high_u16(dataVec8)); + uint32x4_t dataVec32_1 = vmovl_u16(vget_low_u16(dataVec16_1)); + uint32x4_t dataVec32_3 = vmovl_u16(vget_low_u16(dataVec16_2)); + uint32x4_t dataVec32_2 = vmovl_u16(vget_high_u16(dataVec16_1)); + uint32x4_t dataVec32_4 = vmovl_u16(vget_high_u16(dataVec16_2)); + + dataVec32_1 = vmulq_u32(dataVec32_1, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_1); + + dataVec32_2 = vmulq_u32(dataVec32_2, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_2); + + dataVec32_3 = vmulq_u32(dataVec32_3, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_3); + + dataVec32_4 = vmulq_u32(dataVec32_4, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_4); } - vst1q_u32(hash, hashVec); - for (; index < size; ++index) { - hash[0] = (hash[0] << hashShift) - hash[0] + data[index]; + + for (; p < dataEnd; p++) { + hash = (hash << static_cast(StringHash::HASH_SHIFT)) - hash + *p; + } + return hash; + } + + template <> + uint32_t ComputeHashForDataOfLongString(const uint16_t *data, + size_t size, uint32_t hashSeed) + { + const uint32x4_t multiplierVec = vld1q_u32(StringHash::MULTIPLIER); + constexpr uint32_t multiplierHash = StringHash::MULTIPLIER[0] * StringHash::MULTIPLIER[2]; + + uint32_t hash = hashSeed; + const uint16_t *dataEnd = data + size; + const uint16_t *vecEnd = data + (size & (~7)); + const uint16_t *p = data; + for (; p < vecEnd; p += UINT16_LOOP_SIZE) { + uint16x8_t dataVec16 = vld1q_u16(p); + uint32x4_t dataVec32_1 = vmovl_u16(vget_low_u16(dataVec16)); + dataVec32_1 = vmulq_u32(dataVec32_1, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_1); + + uint32x4_t dataVec32_2 = vmovl_u16(vget_high_u16(dataVec16)); + dataVec32_2 = vmulq_u32(dataVec32_2, multiplierVec); + hash = hash * multiplierHash + vaddvq_u32(dataVec32_2); } - uint32_t totalHash = hashSeed; - for (uint32_t i = 0; i < blockSize; ++i) { - totalHash = (totalHash << hashShift) - totalHash + hash[i]; + + for (; p < dataEnd; p++) { + hash = (hash << static_cast(StringHash::HASH_SHIFT)) - hash + *p; } - return totalHash; + return hash; } -#endif }; } // namespace common #endif // COMMON_COMPONENTS_PLATFORM_STRING_HASH_ARM64_H \ No newline at end of file -- Gitee From 7cbb23a7644daed8b63215482e84cf4d64e366a3 Mon Sep 17 00:00:00 2001 From: xuxu9110 Date: Mon, 14 Jul 2025 01:44:22 +0000 Subject: [PATCH 2/2] update common_components/platform/common/string_hash_internal.h. Signed-off-by: xuxu9110 --- .../platform/common/string_hash_internal.h | 62 ++----------------- 1 file changed, 6 insertions(+), 56 deletions(-) diff --git a/common_components/platform/common/string_hash_internal.h b/common_components/platform/common/string_hash_internal.h index c1456274ba..2c89041d7a 100644 --- a/common_components/platform/common/string_hash_internal.h +++ b/common_components/platform/common/string_hash_internal.h @@ -25,66 +25,16 @@ namespace common { class StringHashInternal { friend class StringHashHelper; private: -#if ENABLE_NEXT_OPTIMIZATION template - static uint32_t ComputeHashForDataOfLongString(const T *data, size_t size, - uint32_t hashSeed) + static uint32_t ComputeHashForDataOfLongString(const T* data, + size_t size, uint32_t hashSeed) { - constexpr uint32_t blockSize = StringHash::BLOCK_SIZE; - constexpr uint32_t scale = StringHash::BLOCK_MULTIPLY; - uint32_t hash[blockSize] = {}; - uint32_t index = 0; - uint32_t remainder = size & (blockSize - 1); - switch (remainder) { -#define CASE(N) case (N): \ - hash[blockSize - (N)] = data[index++] * StringHash::MULTIPLIER[blockSize - (N)]; [[fallthrough]] - CASE(StringHash::SIZE_3); - CASE(StringHash::SIZE_2); - CASE(StringHash::SIZE_1); -#undef CASE - default: - break; + uint32_t hash = hashSeed; + for (uint32_t i = 0; i < size; i++) { + hash = (hash << static_cast(StringHash::HASH_SHIFT)) - hash + data[i]; } - hash[0] += hashSeed * StringHash::MULTIPLIER[blockSize - 1 - remainder]; - - uint32_t dataMul[blockSize] = {}; - for (; index < size; index += blockSize) { - for (size_t i = 0; i < blockSize; i++) { - dataMul[i] = data[index + i] * StringHash::MULTIPLIER[i]; - hash[i] = hash[i] * scale + dataMul[i]; - } - } - uint32_t hashTotal = 0; - for (size_t i = 0; i < blockSize; i++) { - hashTotal += hash[i]; - } - return hashTotal; - } -#else - template - static uint32_t ComputeHashForDataOfLongString(const T *data, size_t size, - uint32_t hashSeed) - { - constexpr uint32_t hashShift = static_cast(StringHash::HASH_SHIFT); - constexpr uint32_t blockSize = static_cast(StringHash::BLOCK_SIZE); - uint32_t hash[blockSize] = {0}; - uint32_t index = 0; - for (; index + blockSize <= size; index += blockSize) { - hash[0] = (hash[0] << hashShift) - hash[0] + data[index]; - hash[1] = (hash[1] << hashShift) - hash[1] + data[index + 1]; // 1: the second element of the block - hash[2] = (hash[2] << hashShift) - hash[2] + data[index + 2]; // 2: the third element of the block - hash[3] = (hash[3] << hashShift) - hash[3] + data[index + 3]; // 3: the fourth element of the block - } - for (; index < size; ++index) { - hash[0] = (hash[0] << hashShift) - hash[0] + data[index]; - } - uint32_t totalHash = hashSeed; - for (uint32_t i = 0; i < blockSize; ++i) { - totalHash = (totalHash << hashShift) - totalHash + hash[i]; - } - return totalHash; + return hash; } -#endif }; } // namespace common #endif // COMMON_COMPONENTS_PLATFORM_STRING_HASH_COMMON_H \ No newline at end of file -- Gitee