From fc8380526fcb3486f58f57ffbeb89a824b4a865e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B4=E7=B4=A0=E8=B4=9D=E5=8F=B6=E6=96=AF?= Date: Thu, 19 Sep 2024 08:56:42 +0000 Subject: [PATCH 1/3] =?UTF-8?q?=E4=BB=A3=E7=A0=81=E8=A7=84=E8=8C=83?= =?UTF-8?q?=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 朴素贝叶斯 --- Source/astcenc_compress_symbolic.cpp | 4 ++-- Source/astcenc_ideal_endpoints_and_weights.cpp | 2 +- Source/astcenc_internal.h | 2 +- Source/astcenc_pick_best_endpoint_format.cpp | 3 +-- Source/astcenc_vecmathlib_neon_4.h | 8 ++++---- Source/astcenc_weight_align.cpp | 3 +-- 6 files changed, 10 insertions(+), 12 deletions(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index ca1e981..52f1f4e 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -209,8 +209,8 @@ static bool realign_weights_undecimated( vfloat4 error_down_vec = vfloat4(error_down0, error_down1, error_down2, error_down3); vfloat4 error_up_vec = vfloat4(error_up0, error_up1, error_up2, error_up3); - vmask4 check_result_up = (error_up_vec < error_base_vec) & - (error_up_vec < error_down_vec) & (uqw_vec < vint4(64)); + vmask4 check_result_up = (error_up_vec < error_base_vec) & + (error_up_vec < error_down_vec) & (uqw_vec < vint4(64)); vmask4 check_result_down = (error_down_vec < error_base_vec) & (uqw_vec > vint4::zero()); check_result_down = check_result_down & (~check_result_up); diff --git a/Source/astcenc_ideal_endpoints_and_weights.cpp b/Source/astcenc_ideal_endpoints_and_weights.cpp index 29f5745..e41d4e0 100644 --- a/Source/astcenc_ideal_endpoints_and_weights.cpp +++ b/Source/astcenc_ideal_endpoints_and_weights.cpp @@ -364,7 +364,7 @@ static void compute_ideal_colors_and_weights_3_comp( unsigned int texel_count = blk.texel_count; promise(texel_count > 0); - partition_metrics *pms = (partition_metrics *)&blk.pms[0]; + partition_metrics *pms = reinterpret_cast(&blk.pms[0]); float error_weight; const float* data_vr = nullptr; diff --git a/Source/astcenc_internal.h b/Source/astcenc_internal.h index ba6a4b9..0c8248e 100644 --- a/Source/astcenc_internal.h +++ b/Source/astcenc_internal.h @@ -763,7 +763,7 @@ struct image_block /** @brief The input (compress) or output (decompress) data for the alpha color component. */ ASTCENC_ALIGNAS float data_a[BLOCK_MAX_TEXELS]; - partition_metrics pms[BLOCK_MAX_PARTITIONS]; + mutable partition_metrics pms[BLOCK_MAX_PARTITIONS]; /** @brief The number of texels in the block. */ uint8_t texel_count; diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index ff4e445..63f288c 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -209,7 +209,6 @@ static void compute_error_squared_rgb_single_partition( rgbl_errv = rgbl_errv0 * ews.lane<0>() + rgbl_errv1 * ews.lane<1>() + rgbl_errv2 * ews.lane<2>(); l_errv = l_errv0 * ews.lane<0>() + l_errv1 * ews.lane<1>() + l_errv2 * ews.lane<2>(); - if (i < texel_count) { vint lane_ids = vint::lane_id() + i; @@ -316,7 +315,7 @@ static void compute_encoding_choice_errors( int partition_count = pi.partition_count; promise(partition_count > 0); - partition_metrics *pms = (partition_metrics *)&blk.pms[0]; + partition_metrics *pms = reinterpret_cast(&blk.pms[0]); if (!blk.is_constant_channel(3) || (partition_count != 1 && privateProfile == HIGH_QUALITY_PROFILE)) { diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index f6f8de1..4fc83a4 100644 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -651,10 +651,10 @@ ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a) { uint8x16_t idx = { - 0, 4, 8, 12, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 + 0, 4, 8, 12, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0 }; int8x16_t av = vreinterpretq_s8_s32(a.m); return vint4(vreinterpretq_s32_s8(vqtbl1q_s8(av, idx))); diff --git a/Source/astcenc_weight_align.cpp b/Source/astcenc_weight_align.cpp index 703a0c6..14e736f 100644 --- a/Source/astcenc_weight_align.cpp +++ b/Source/astcenc_weight_align.cpp @@ -241,7 +241,6 @@ static void compute_lowest_and_highest_weight( rcp_stepsize = rcp_stepsize + vfloat(ASTCENC_SIMD_WIDTH); } - } #else static void compute_lowest_and_highest_weight( @@ -472,7 +471,7 @@ void compute_angular_endpoints_1plane( } compute_angular_endpoints_for_quant_levels( - privateProfile, + privateProfile, weight_count, dec_weight_ideal_value + i * BLOCK_MAX_WEIGHTS, max_precision, low_values[i], high_values[i]); -- Gitee From 850514a95826810cb2cf514c6796f5177acd68e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B4=E7=B4=A0=E8=B4=9D=E5=8F=B6=E6=96=AF?= Date: Thu, 19 Sep 2024 11:28:56 +0000 Subject: [PATCH 2/3] =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 朴素贝叶斯 --- Source/astcenc_vecmathlib_neon_4.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/Source/astcenc_vecmathlib_neon_4.h b/Source/astcenc_vecmathlib_neon_4.h index 4fc83a4..c22f80f 100644 --- a/Source/astcenc_vecmathlib_neon_4.h +++ b/Source/astcenc_vecmathlib_neon_4.h @@ -650,12 +650,7 @@ ASTCENC_SIMD_INLINE vint4 gatheri(const int* base, vint4 indices) */ ASTCENC_SIMD_INLINE vint4 pack_low_bytes(vint4 a) { - uint8x16_t idx = { - 0, 4, 8, 12, - 0, 0, 0, 0, - 0, 0, 0, 0, - 0, 0, 0, 0 - }; + uint8x16_t idx = {0, 4, 8, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; int8x16_t av = vreinterpretq_s8_s32(a.m); return vint4(vreinterpretq_s32_s8(vqtbl1q_s8(av, idx))); } -- Gitee From 32a9806030c046c1726fcd9d64b628889b1333fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9C=B4=E7=B4=A0=E8=B4=9D=E5=8F=B6=E6=96=AF?= Date: Wed, 25 Sep 2024 08:57:55 +0000 Subject: [PATCH 3/3] =?UTF-8?q?=E8=A7=84=E8=8C=83=E6=95=B4=E6=94=B9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 朴素贝叶斯 --- Source/astcenc_compress_symbolic.cpp | 4 ++-- Source/astcenc_find_best_partitioning.cpp | 2 +- Source/astcenc_pick_best_endpoint_format.cpp | 8 ++++---- Source/astcenc_vecmathlib_common_4.h | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Source/astcenc_compress_symbolic.cpp b/Source/astcenc_compress_symbolic.cpp index 52f1f4e..a93046a 100644 --- a/Source/astcenc_compress_symbolic.cpp +++ b/Source/astcenc_compress_symbolic.cpp @@ -222,8 +222,8 @@ static bool realign_weights_undecimated( dec_weights_uquant[texel] = uqw_vec.lane<0>(); dec_weights_uquant[texel + 1] = uqw_vec.lane<1>(); - dec_weights_uquant[texel + 2] = uqw_vec.lane<2>(); - dec_weights_uquant[texel + 3] = uqw_vec.lane<3>(); + dec_weights_uquant[texel + 2] = uqw_vec.lane<2>(); // channel 2 + dec_weights_uquant[texel + 3] = uqw_vec.lane<3>(); // channel 3 adjustments = true; } }; diff --git a/Source/astcenc_find_best_partitioning.cpp b/Source/astcenc_find_best_partitioning.cpp index 5d0682e..119e435 100644 --- a/Source/astcenc_find_best_partitioning.cpp +++ b/Source/astcenc_find_best_partitioning.cpp @@ -260,7 +260,7 @@ static inline uint8_t partition_mismatch2( uint64x2_t b10 = vextq_u64(b01, b01, 1); uint8_t c1 = popcount(veorq_u64(a01, b01)); uint8_t c2 = popcount(veorq_u64(a01, b10)); - return static_cast(astc::min(c1, c2) / 2); + return static_cast(astc::min(c1, c2) / 2); // 2 is the number of partitions } #else static inline uint8_t partition_mismatch2( diff --git a/Source/astcenc_pick_best_endpoint_format.cpp b/Source/astcenc_pick_best_endpoint_format.cpp index 63f288c..d3b9f6c 100644 --- a/Source/astcenc_pick_best_endpoint_format.cpp +++ b/Source/astcenc_pick_best_endpoint_format.cpp @@ -204,10 +204,10 @@ static void compute_error_squared_rgb_single_partition( haccumulate(l_errv2, dist2 * dist2); } - uncor_errv = uncor_errv0 * ews.lane<0>() + uncor_errv1 * ews.lane<1>() + uncor_errv2 * ews.lane<2>(); - samec_errv = samec_errv0 * ews.lane<0>() + samec_errv1 * ews.lane<1>() + samec_errv2 * ews.lane<2>(); - rgbl_errv = rgbl_errv0 * ews.lane<0>() + rgbl_errv1 * ews.lane<1>() + rgbl_errv2 * ews.lane<2>(); - l_errv = l_errv0 * ews.lane<0>() + l_errv1 * ews.lane<1>() + l_errv2 * ews.lane<2>(); + uncor_errv = uncor_errv0 * ews.lane<0>() + uncor_errv1 * ews.lane<1>() + uncor_errv2 * ews.lane<2>(); // channel 0,1,2 + samec_errv = samec_errv0 * ews.lane<0>() + samec_errv1 * ews.lane<1>() + samec_errv2 * ews.lane<2>(); // channel 0,1,2 + rgbl_errv = rgbl_errv0 * ews.lane<0>() + rgbl_errv1 * ews.lane<1>() + rgbl_errv2 * ews.lane<2>(); // channel 0,1,2 + l_errv = l_errv0 * ews.lane<0>() + l_errv1 * ews.lane<1>() + l_errv2 * ews.lane<2>(); // channel 0,1,2 if (i < texel_count) { diff --git a/Source/astcenc_vecmathlib_common_4.h b/Source/astcenc_vecmathlib_common_4.h index a19b954..9fbbad8 100644 --- a/Source/astcenc_vecmathlib_common_4.h +++ b/Source/astcenc_vecmathlib_common_4.h @@ -290,7 +290,7 @@ ASTCENC_SIMD_INLINE void haccumulate(vfloat4& accum, vfloat4 a, vmask4 m) #define ASTCENC_USE_COMMON_GATHERF ASTCENC_SIMD_INLINE vfloat4 gatherf(const float* base, const uint8_t* idx) { - return vfloat4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]); + return vfloat4(base[idx[0]], base[idx[1]], base[idx[2]], base[idx[3]]); // index 0,1,2,3 } /** @@ -307,7 +307,7 @@ ASTCENC_SIMD_INLINE float hadd_rgb_s(vfloat4 a) */ ASTCENC_SIMD_INLINE float hadd_rgba_s(vfloat4 a) { - return a.lane<0>() + a.lane<1>() + a.lane<2>() + a.lane<3>(); + return a.lane<0>() + a.lane<1>() + a.lane<2>() + a.lane<3>(); // channel 0,1,2,3 } #endif -- Gitee