diff --git a/gcc/ai-optimizer.cc b/gcc/ai-optimizer.cc index 8908d1be326f9f8943862d916e82ba384adaf2f0..e16ad59c889fe2db0c6da063b8d2b764fda20639 100644 --- a/gcc/ai-optimizer.cc +++ b/gcc/ai-optimizer.cc @@ -285,14 +285,15 @@ static int graph_infer (int argc1, const char **argv1, const char *mops, int argc2, int64_t *argv2) { - char gcc_exec_prefix[512]; + const int prefix_buff_len = 512; + char gcc_exec_prefix[prefix_buff_len] = {0}; ssize_t len = readlink ("/proc/self/exe", gcc_exec_prefix, sizeof (gcc_exec_prefix) - 1); if (len == -1) return 0; - char native_file[512]; - strncpy (native_file, gcc_exec_prefix, sizeof (native_file) - 1); + char native_file[prefix_buff_len] = {0}; + strncpy (native_file, gcc_exec_prefix, len); const char *target = "bin/gcc"; const char *target_cc1 = "cc1"; const char *target_gpp = "bin/g++"; @@ -330,6 +331,8 @@ graph_infer (int argc1, const char **argv1, const char *mops, strlen (native_file) - 1); } } + else + return 0; if (access (native_file, F_OK) == 0) fill_node (native_file); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index fb5e355d0a855d6d28101b169cf2a08ca73b6caf..bcf919ed0cf26e79216a75aad2d464ef5d29f506 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -6533,6 +6533,22 @@ [(set_attr "type" "neon_compare, neon_compare_zero")] ) +;; Use cmlt to replace vector arithmetic operations like this (SImode example): +;; B = ((A >> 15) & 0x00010001) * 0x00001111 +(define_insn "*aarch64_cmlt_as_arith2" + [(set (match_operand: 0 "register_operand" "=w") + (mult: + (and: + (lshiftrt: + (match_operand:VDQHSD 1 "register_operand" "w") + (match_operand:VDQHSD 2 "half_size_minus_one_operand")) + (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")) + (match_operand:VDQHSD 4 "half_bit_all_one_operand")))] + "TARGET_SIMD && flag_cmlt_arith" + "cmlt\t%0., %1., #0" + [(set_attr "type" "neon_compare_zero")] +) + ;; Use cmlt to replace vector arithmetic operations like this (SImode example): ;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001) ;; TODO: maybe extend to scalar operations or other cm** instructions. diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index faa445e26219d96173c34f60dc28ef4be1ebc583..52cac0b82d79393c30c1281a935ff5db5947a831 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -2326,7 +2326,7 @@ static const struct tune_params hip12_tunings = 2, /* min_div_recip_mul_df. */ 0, /* max_case_values. */ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */ - (AARCH64_EXTRA_TUNE_NONE), /* tune_flags. */ + (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */ &hip12_prefetch_tune }; @@ -17310,6 +17310,18 @@ cost_plus: return true; case MULT: + op0 = XEXP (x, 0); + op1 = XEXP (x, 1); + if (flag_cmlt_arith && GET_CODE (op0) == AND) + { + rtx op0_subop0 = XEXP (op0, 0); + if (GET_CODE (op0_subop0) == LSHIFTRT) + { + *cost += rtx_cost (op0, mode, MULT, 0, speed); + *cost += rtx_cost (op1, mode, MULT, 0, speed); + return true; + } + } *cost += aarch64_rtx_mult_cost (x, MULT, 0, speed); /* aarch64_rtx_mult_cost always handles recursion to its operands. */ diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 3ec9e9103f5e51d5a9ba323ab679ada7b3177ee1..28132873441ee67db369bdfaee32d6bed8fb46de 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -74,6 +74,15 @@ return CONST_INT_P (op) && (UINTVAL (op) == mask); }) +(define_predicate "half_bit_all_one_operand" + (match_code "const_vector") +{ + op = unwrap_const_vec_duplicate (op); + unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; + unsigned long long mask = ((unsigned long long) 1 << size) - 1; + return CONST_INT_P (op) && (UINTVAL (op) == mask); +}) + (define_predicate "subreg_lowpart_operator" (ior (match_code "truncate") (and (match_code "subreg") diff --git a/gcc/testsuite/gcc.dg/combine-cmlt-2.c b/gcc/testsuite/gcc.dg/combine-cmlt-2.c new file mode 100755 index 0000000000000000000000000000000000000000..bb6a92b2d8c213baa6a256e3a53924ed8a3c4db8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/combine-cmlt-2.c @@ -0,0 +1,20 @@ +/* { dg-do compile { target aarch64-*-* } } */ +/* { dg-options "-O3 -mcmlt-arith -mcpu=hip12" } */ + +/* The test checks usage of cmlt insns for arithmetic/logic calculations + * in foo (). It's inspired by sources of x264 codec. */ + +typedef unsigned short int uint16_t; +typedef unsigned int uint32_t; + +void foo( uint32_t *a, uint32_t *b) +{ + for (unsigned i = 0; i < 4; i++) + { + uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1)) + &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1); + b[i] = (a[i]+s)^s; + } +} + +/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */