diff --git a/0396-Add-pattern-for-cmlt-and-change-hip12.patch b/0396-Add-pattern-for-cmlt-and-change-hip12.patch
new file mode 100644
index 0000000000000000000000000000000000000000..303a07aaa450c2960a1a92bba7a7f555b6341e69
--- /dev/null
+++ b/0396-Add-pattern-for-cmlt-and-change-hip12.patch
@@ -0,0 +1,137 @@
+diff --git a/gcc/ai-optimizer.cc b/gcc/ai-optimizer.cc
+index 8908d1be3..e16ad59c8 100644
+--- a/gcc/ai-optimizer.cc
++++ b/gcc/ai-optimizer.cc
+@@ -285,14 +285,15 @@ static int
+ graph_infer (int argc1, const char **argv1, const char *mops,
+              int argc2, int64_t *argv2)
+ {
+-  char gcc_exec_prefix[512];
++  const int prefix_buff_len = 512;
++  char gcc_exec_prefix[prefix_buff_len] = {0};
+   ssize_t len = readlink ("/proc/self/exe", gcc_exec_prefix,
+   			  sizeof (gcc_exec_prefix) - 1);
+   if (len == -1)
+     return 0;
+ 
+-  char native_file[512];
+-  strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1);
++  char native_file[prefix_buff_len] = {0};
++  strncpy (native_file, gcc_exec_prefix, len);
+   const char *target = "bin/gcc";
+   const char *target_cc1 = "cc1";
+   const char *target_gpp = "bin/g++";
+@@ -330,6 +331,8 @@ graph_infer (int argc1, const char **argv1, const char *mops,
+ 		   strlen (native_file) - 1);
+ 	}
+     }
++  else
++	return 0;
+ 
+   if (access (native_file, F_OK) == 0)
+     fill_node (native_file);
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index fb5e355d0..bcf919ed0 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -6533,6 +6533,22 @@
+   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
+ )
+ 
++;; Use cmlt to replace vector arithmetic operations like this (SImode example):
++;; B = ((A >> 15) & 0x00010001) * 0x00001111
++(define_insn "*aarch64_cmlt_as_arith2<mode>"
++  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
++      (mult:<V_INT_EQUIV>
++        (and:<V_INT_EQUIV>
++          (lshiftrt:<V_INT_EQUIV>
++            (match_operand:VDQHSD 1 "register_operand" "w")
++            (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
++          (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
++        (match_operand:VDQHSD 4 "half_bit_all_one_operand")))]
++  "TARGET_SIMD && flag_cmlt_arith"
++  "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
++  [(set_attr "type" "neon_compare_zero")]
++)
++
+ ;; Use cmlt to replace vector arithmetic operations like this (SImode example):
+ ;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
+ ;; TODO: maybe extend to scalar operations or other cm** instructions.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index faa445e26..52cac0b82 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -2326,7 +2326,7 @@ static const struct tune_params hip12_tunings =
+   2,    /* min_div_recip_mul_df.  */
+   0,    /* max_case_values.  */
+   tune_params::AUTOPREFETCHER_WEAK,     /* autoprefetcher_model.  */
+-  (AARCH64_EXTRA_TUNE_NONE),     /* tune_flags.  */
++  (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC),     /* tune_flags.  */
+   &hip12_prefetch_tune
+ };
+ 
+@@ -17310,6 +17310,18 @@ cost_plus:
+       return true;
+ 
+     case MULT:
++	  op0 = XEXP (x, 0);
++	  op1 = XEXP (x, 1);
++	  if (flag_cmlt_arith && GET_CODE (op0) == AND)
++	{
++	  rtx op0_subop0 = XEXP (op0, 0);
++	  if (GET_CODE (op0_subop0) == LSHIFTRT)
++	    {
++	      *cost += rtx_cost (op0, mode, MULT, 0, speed);
++	      *cost += rtx_cost (op1, mode, MULT, 0, speed);
++	      return true;
++	    }
++	}
+       *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed);
+       /* aarch64_rtx_mult_cost always handles recursion to its
+ 	 operands.  */
+diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+index 3ec9e9103..281328734 100644
+--- a/gcc/config/aarch64/predicates.md
++++ b/gcc/config/aarch64/predicates.md
+@@ -74,6 +74,15 @@
+   return CONST_INT_P (op) && (UINTVAL (op) == mask);
+ })
+ 
++(define_predicate "half_bit_all_one_operand"
++  (match_code "const_vector")
++{
++  op = unwrap_const_vec_duplicate (op);
++  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
++  unsigned long long mask = ((unsigned long long) 1 << size) - 1;
++  return CONST_INT_P (op) && (UINTVAL (op) == mask);
++})
++
+ (define_predicate "subreg_lowpart_operator"
+   (ior (match_code "truncate")
+        (and (match_code "subreg")
+diff --git a/gcc/testsuite/gcc.dg/combine-cmlt-2.c b/gcc/testsuite/gcc.dg/combine-cmlt-2.c
+new file mode 100755
+index 000000000..bb6a92b2d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/combine-cmlt-2.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile { target aarch64-*-* } } */
++/* { dg-options "-O3 -mcmlt-arith -mcpu=hip12" } */
++
++/* The test checks usage of cmlt insns for arithmetic/logic calculations
++ * in foo ().  It's inspired by sources of x264 codec.  */
++
++typedef unsigned short int uint16_t;
++typedef unsigned int uint32_t;
++
++void foo( uint32_t *a, uint32_t *b)
++{
++  for (unsigned i = 0; i < 4; i++)
++    {
++      uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1))
++		    &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
++      b[i] = (a[i]+s)^s;
++    }
++}
++
++/* { dg-final { scan-assembler-times {cmlt\t} 1 } }  */
diff --git a/gcc.spec b/gcc.spec
index 8b70df99cb378f9f85d6f87b4703ef268b090119..ce79695ad9d38b5a867baefd399f46484f6befb8 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 96
+%global gcc_release 97
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -505,6 +505,7 @@ Patch392: 0392-Modify-cores-definition-for-hip-cores.patch
 Patch393: 0393-array-dse-Ignore-debug-stmt-add-testsuites.patch
 Patch394: 0394-SVE-Add-SVE-constraint.patch
 Patch395: 0395-update-ai-model.patch
+Patch396: 0396-Add-pattern-for-cmlt-and-change-hip12.patch
 
 # Part 1001-1999
 %ifarch sw_64
@@ -1686,6 +1687,7 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch -P393 -p1
 %patch -P394 -p1
 %patch -P395 -p1
+%patch -P396 -p1
 
 %ifarch sw_64
 %patch -P1001 -p1
@@ -4313,6 +4315,10 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Mon Jul 21 2025 linhouzhong <hz_lin8@163.com> - 12.3.1-97
+- Type: Sync
+- DESC: Sync patches from openeuler/gcc.
+
 * Sat Jul 5 2025 yinchuang <yinchuang@huawei.com> - 12.3.1-96
 - Type: Sync
 - DESC: Sync patches from openeuler/gcc.