diff --git a/0396-Add-pattern-for-cmlt-and-change-hip12.patch b/0396-Add-pattern-for-cmlt-and-change-hip12.patch new file mode 100644 index 0000000000000000000000000000000000000000..303a07aaa450c2960a1a92bba7a7f555b6341e69 --- /dev/null +++ b/0396-Add-pattern-for-cmlt-and-change-hip12.patch @@ -0,0 +1,137 @@ +diff --git a/gcc/ai-optimizer.cc b/gcc/ai-optimizer.cc +index 8908d1be3..e16ad59c8 100644 +--- a/gcc/ai-optimizer.cc ++++ b/gcc/ai-optimizer.cc +@@ -285,14 +285,15 @@ static int + graph_infer (int argc1, const char **argv1, const char *mops, + int argc2, int64_t *argv2) + { +- char gcc_exec_prefix[512]; ++ const int prefix_buff_len = 512; ++ char gcc_exec_prefix[prefix_buff_len] = {0}; + ssize_t len = readlink ("/proc/self/exe", gcc_exec_prefix, + sizeof (gcc_exec_prefix) - 1); + if (len == -1) + return 0; + +- char native_file[512]; +- strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1); ++ char native_file[prefix_buff_len] = {0}; ++ strncpy (native_file, gcc_exec_prefix, len); + const char *target = "bin/gcc"; + const char *target_cc1 = "cc1"; + const char *target_gpp = "bin/g++"; +@@ -330,6 +331,8 @@ graph_infer (int argc1, const char **argv1, const char *mops, + strlen (native_file) - 1); + } + } ++ else ++ return 0; + + if (access (native_file, F_OK) == 0) + fill_node (native_file); +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index fb5e355d0..bcf919ed0 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -6533,6 +6533,22 @@ + [(set_attr "type" "neon_compare, neon_compare_zero")] + ) + ++;; Use cmlt to replace vector arithmetic operations like this (SImode example): ++;; B = ((A >> 15) & 0x00010001) * 0x00001111 ++(define_insn "*aarch64_cmlt_as_arith2" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (mult: ++ (and: ++ (lshiftrt: ++ (match_operand:VDQHSD 1 "register_operand" "w") ++ (match_operand:VDQHSD 2 "half_size_minus_one_operand")) ++ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")) ++ (match_operand:VDQHSD 4 "half_bit_all_one_operand")))] ++ "TARGET_SIMD && flag_cmlt_arith" ++ "cmlt\t%0., %1., #0" ++ [(set_attr "type" "neon_compare_zero")] ++) ++ + ;; Use cmlt to replace vector arithmetic operations like this (SImode example): + ;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001) + ;; TODO: maybe extend to scalar operations or other cm** instructions. +diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc +index faa445e26..52cac0b82 100644 +--- a/gcc/config/aarch64/aarch64.cc ++++ b/gcc/config/aarch64/aarch64.cc +@@ -2326,7 +2326,7 @@ static const struct tune_params hip12_tunings = + 2, /* min_div_recip_mul_df. */ + 0, /* max_case_values. */ + tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ +- (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ ++ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ + &hip12_prefetch_tune + }; + +@@ -17310,6 +17310,18 @@ cost_plus: + return true; + + case MULT: ++ op0 = XEXP (x, 0); ++ op1 = XEXP (x, 1); ++ if (flag_cmlt_arith && GET_CODE (op0) == AND) ++ { ++ rtx op0_subop0 = XEXP (op0, 0); ++ if (GET_CODE (op0_subop0) == LSHIFTRT) ++ { ++ *cost += rtx_cost (op0, mode, MULT, 0, speed); ++ *cost += rtx_cost (op1, mode, MULT, 0, speed); ++ return true; ++ } ++ } + *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); + /* aarch64_rtx_mult_cost always handles recursion to its + operands. */ +diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md +index 3ec9e9103..281328734 100644 +--- a/gcc/config/aarch64/predicates.md ++++ b/gcc/config/aarch64/predicates.md +@@ -74,6 +74,15 @@ + return CONST_INT_P (op) && (UINTVAL (op) == mask); + }) + ++(define_predicate "half_bit_all_one_operand" ++ (match_code "const_vector") ++{ ++ op = unwrap_const_vec_duplicate (op); ++ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; ++ unsigned long long mask = ((unsigned long long) 1 << size) - 1; ++ return CONST_INT_P (op) && (UINTVAL (op) == mask); ++}) ++ + (define_predicate "subreg_lowpart_operator" + (ior (match_code "truncate") + (and (match_code "subreg") +diff --git a/gcc/testsuite/gcc.dg/combine-cmlt-2.c b/gcc/testsuite/gcc.dg/combine-cmlt-2.c +new file mode 100755 +index 000000000..bb6a92b2d +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/combine-cmlt-2.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile { target aarch64-*-* } } */ ++/* { dg-options "-O3 -mcmlt-arith -mcpu=hip12" } */ ++ ++/* The test checks usage of cmlt insns for arithmetic/logic calculations ++ * in foo (). It's inspired by sources of x264 codec. */ ++ ++typedef unsigned short int uint16_t; ++typedef unsigned int uint32_t; ++ ++void foo( uint32_t *a, uint32_t *b) ++{ ++ for (unsigned i = 0; i < 4; i++) ++ { ++ uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1)) ++ &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1); ++ b[i] = (a[i]+s)^s; ++ } ++} ++ ++/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */ diff --git a/gcc.spec b/gcc.spec index 8b70df99cb378f9f85d6f87b4703ef268b090119..ce79695ad9d38b5a867baefd399f46484f6befb8 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 96 +%global gcc_release 97 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -505,6 +505,7 @@ Patch392: 0392-Modify-cores-definition-for-hip-cores.patch Patch393: 0393-array-dse-Ignore-debug-stmt-add-testsuites.patch Patch394: 0394-SVE-Add-SVE-constraint.patch Patch395: 0395-update-ai-model.patch +Patch396: 0396-Add-pattern-for-cmlt-and-change-hip12.patch # Part 1001-1999 %ifarch sw_64 @@ -1686,6 +1687,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P393 -p1 %patch -P394 -p1 %patch -P395 -p1 +%patch -P396 -p1 %ifarch sw_64 %patch -P1001 -p1 @@ -4313,6 +4315,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Mon Jul 21 2025 linhouzhong - 12.3.1-97 +- Type: Sync +- DESC: Sync patches from openeuler/gcc. + * Sat Jul 5 2025 yinchuang - 12.3.1-96 - Type: Sync - DESC: Sync patches from openeuler/gcc.