diff --git a/0398-Backport-zhaoxin-lujiazui-yongfeng-shijidadao-enable.patch b/0398-Backport-zhaoxin-lujiazui-yongfeng-shijidadao-enable.patch new file mode 100644 index 0000000000000000000000000000000000000000..62f6efa087c6890911d33a36bd5ba3c497446eeb --- /dev/null +++ b/0398-Backport-zhaoxin-lujiazui-yongfeng-shijidadao-enable.patch @@ -0,0 +1,2989 @@ +From f6b863ca94e2e68001ba448f41ea55c55af92d2d Mon Sep 17 00:00:00 2001 +From: timhu_806d +Date: Fri, 12 Sep 2025 14:45:07 +0800 +Subject: [PATCH] Backport zhaoxin lujiazui,yongfeng,shijidadao enablements + from community's trunk + +--- + gcc/common/config/i386/cpuinfo.h | 87 +- + gcc/common/config/i386/i386-common.cc | 12 + + gcc/common/config/i386/i386-cpuinfo.h | 5 + + gcc/config.gcc | 26 +- + gcc/config/i386/cpuid.h | 4 + + gcc/config/i386/driver-i386.cc | 30 +- + gcc/config/i386/i386-c.cc | 21 + + gcc/config/i386/i386-options.cc | 8 + + gcc/config/i386/i386.h | 11 + + gcc/config/i386/i386.md | 6 +- + gcc/config/i386/lujiazui.md | 846 +++++++++++++++++ + gcc/config/i386/x86-tune-costs.h | 341 +++++++ + gcc/config/i386/x86-tune-sched.cc | 29 + + gcc/config/i386/x86-tune.def | 79 +- + gcc/config/i386/yongfeng.md | 848 ++++++++++++++++++ + gcc/doc/extend.texi | 9 + + gcc/doc/invoke.texi | 17 + + gcc/testsuite/g++.target/i386/mv32.C | 42 + + gcc/testsuite/gcc.target/i386/funcspec-56.inc | 6 + + 19 files changed, 2383 insertions(+), 44 deletions(-) + create mode 100644 gcc/config/i386/lujiazui.md + create mode 100644 gcc/config/i386/yongfeng.md + create mode 100644 gcc/testsuite/g++.target/i386/mv32.C + +diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h +index da1568fd1..4e865e2e0 100644 +--- a/gcc/common/config/i386/cpuinfo.h ++++ b/gcc/common/config/i386/cpuinfo.h +@@ -100,6 +100,32 @@ set_cpu_feature (struct __processor_model *cpu_model, + } + } + ++/* Drop FEATURE from either CPU_MODEL or CPU_FEATURES2. */ ++ ++static inline void ++reset_cpu_feature (struct __processor_model *cpu_model, ++ unsigned int *cpu_features2, ++ enum processor_features feature) ++{ ++ unsigned index, offset; ++ unsigned f = feature; ++ ++ if (f < 32) ++ { ++ /* The first 32 features. */ ++ cpu_model->__cpu_features[0] &= ~(1U << f); ++ } ++ else ++ { ++ /* The rest of features. cpu_features2[i] contains features from ++ (32 + i * 32) to (31 + 32 + i * 32), inclusively. */ ++ f -= 32; ++ index = f / 32; ++ offset = f % 32; ++ cpu_features2[index] &= ~(1U << offset); ++ } ++} ++ + /* Get the specific type of AMD CPU and return AMD CPU name. Return + NULL for unknown AMD CPU. */ + +@@ -561,6 +587,51 @@ get_intel_cpu (struct __processor_model *cpu_model, + return cpu; + } + ++/* Get the specific type of ZHAOXIN CPU and return ZHAOXIN CPU name. ++ Return NULL for unknown ZHAOXIN CPU. */ ++ ++static inline const char * ++get_zhaoxin_cpu (struct __processor_model *cpu_model, ++ struct __processor_model2 *cpu_model2, ++ unsigned int *cpu_features2) ++{ ++ const char *cpu = NULL; ++ unsigned int family = cpu_model2->__cpu_family; ++ unsigned int model = cpu_model2->__cpu_model; ++ ++ switch (family) ++ { ++ /* ZHAOXIN family 7h. */ ++ case 0x07: ++ cpu_model->__cpu_type = ZHAOXIN_FAM7H; ++ if (model == 0x3b) ++ { ++ cpu = "lujiazui"; ++ CHECK___builtin_cpu_is ("lujiazui"); ++ reset_cpu_feature (cpu_model, cpu_features2, FEATURE_AVX); ++ reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C); ++ cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI; ++ } ++ else if (model == 0x5b) ++ { ++ cpu = "yongfeng"; ++ CHECK___builtin_cpu_is ("yongfeng"); ++ cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG; ++ } ++ else if (model >= 0x6b) ++ { ++ cpu = "shijidadao"; ++ CHECK___builtin_cpu_is ("shijidadao"); ++ cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_SHIJIDADAO; ++ } ++ break; ++ default: ++ break; ++ } ++ ++ return cpu; ++} ++ + /* ECX and EDX are output of CPUID at level one. */ + static inline void + get_available_features (struct __processor_model *cpu_model, +@@ -983,8 +1054,22 @@ cpu_indicator_init (struct __processor_model *cpu_model, + get_amd_cpu (cpu_model, cpu_model2, cpu_features2); + cpu_model->__cpu_vendor = VENDOR_AMD; + } +- else if (vendor == signature_CENTAUR_ebx) ++ else if (vendor == signature_CENTAUR_ebx && family < 0x07) + cpu_model->__cpu_vendor = VENDOR_CENTAUR; ++ else if (vendor == signature_SHANGHAI_ebx ++ || vendor == signature_CENTAUR_ebx) ++ { ++ /* Adjust model and family for ZHAOXIN CPUS. */ ++ if (family == 0x07) ++ model += extended_model; ++ ++ cpu_model2->__cpu_family = family; ++ cpu_model2->__cpu_model = model; ++ ++ /* Get CPU type. */ ++ get_zhaoxin_cpu (cpu_model, cpu_model2, cpu_features2); ++ cpu_model->__cpu_vendor = VENDOR_ZHAOXIN; ++ } + else if (vendor == signature_CYRIX_ebx) + cpu_model->__cpu_vendor = VENDOR_CYRIX; + else if (vendor == signature_NSC_ebx) +diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc +index bec6801ce..eb0d94409 100644 +--- a/gcc/common/config/i386/i386-common.cc ++++ b/gcc/common/config/i386/i386-common.cc +@@ -1875,6 +1875,9 @@ const char *const processor_names[] = + "graniterapids", + "graniterapids-d", + "intel", ++ "lujiazui", ++ "yongfeng", ++ "shijidadao", + "geode", + "k6", + "athlon", +@@ -2063,6 +2066,15 @@ const pta processor_alias_table[] = + {"nano-x4", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 + | PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE}, ++ {"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI, ++ PTA_LUJIAZUI, ++ M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_PROC_BMI}, ++ {"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG, ++ PTA_YONGFENG, ++ M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_PROC_AVX2}, ++ {"shijidadao", PROCESSOR_SHIJIDADAO, CPU_YONGFENG, ++ PTA_YONGFENG, ++ M_CPU_SUBTYPE (ZHAOXIN_FAM7H_SHIJIDADAO), P_PROC_AVX2}, + {"k8", PROCESSOR_K8, CPU_K8, + PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE + | PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE}, +diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h +index a32f32c97..249744080 100644 +--- a/gcc/common/config/i386/i386-cpuinfo.h ++++ b/gcc/common/config/i386/i386-cpuinfo.h +@@ -29,6 +29,7 @@ enum processor_vendor + { + VENDOR_INTEL = 1, + VENDOR_AMD, ++ VENDOR_ZHAOXIN, + VENDOR_OTHER, + VENDOR_CENTAUR, + VENDOR_CYRIX, +@@ -58,6 +59,7 @@ enum processor_types + INTEL_GOLDMONT_PLUS, + INTEL_TREMONT, + AMDFAM19H, ++ ZHAOXIN_FAM7H, + CPU_TYPE_MAX, + BUILTIN_CPU_TYPE_MAX = CPU_TYPE_MAX + }; +@@ -91,9 +93,12 @@ enum processor_subtypes + INTEL_COREI7_ALDERLAKE, + AMDFAM19H_ZNVER3, + INTEL_COREI7_ROCKETLAKE, ++ ZHAOXIN_FAM7H_LUJIAZUI, + AMDFAM19H_ZNVER4, + INTEL_COREI7_GRANITERAPIDS, + INTEL_COREI7_GRANITERAPIDS_D, ++ ZHAOXIN_FAM7H_YONGFENG, ++ ZHAOXIN_FAM7H_SHIJIDADAO, + CPU_SUBTYPE_MAX + }; + +diff --git a/gcc/config.gcc b/gcc/config.gcc +index c6b5e362b..13de6b3a0 100644 +--- a/gcc/config.gcc ++++ b/gcc/config.gcc +@@ -671,7 +671,7 @@ silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \ + skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \ + sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \ + nano-x2 eden-x4 nano-x4 x86-64 x86-64-v2 x86-64-v3 x86-64-v4 graniterapids \ +-graniterapids-d native" ++graniterapids-d lujiazui yongfeng shijidadao native" + + # Additional x86 processors supported by --with-cpu=. Each processor + # MUST be separated by exactly one space. +@@ -3801,6 +3801,18 @@ case ${target} in + cpu=geode + arch_without_sse2=yes + ;; ++ lujiazui-*) ++ arch=lujiazui ++ cpu=lujiazui ++ ;; ++ yongfeng-*) ++ arch=yongfeng ++ cpu=yongfeng ++ ;; ++ shijidadao-*) ++ arch=shijidadao ++ cpu=shijidadao ++ ;; + pentium2-*) + arch=pentium2 + cpu=pentium2 +@@ -3914,6 +3926,18 @@ case ${target} in + arch=k8 + cpu=k8 + ;; ++ lujiazui-*) ++ arch=lujiazui ++ cpu=lujiazui ++ ;; ++ yongfeng-*) ++ arch=yongfeng ++ cpu=yongfeng ++ ;; ++ shijidadao-*) ++ arch=shijidadao ++ cpu=shijidadao ++ ;; + nocona-*) + arch=nocona + cpu=nocona +diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h +index 530a45fad..cdbb62d59 100644 +--- a/gcc/config/i386/cpuid.h ++++ b/gcc/config/i386/cpuid.h +@@ -207,6 +207,10 @@ + #define signature_VORTEX_ecx 0x436f5320 + #define signature_VORTEX_edx 0x36387865 + ++#define signature_SHANGHAI_ebx 0x68532020 ++#define signature_SHANGHAI_ecx 0x20206961 ++#define signature_SHANGHAI_edx 0x68676e61 ++ + #ifndef __x86_64__ + /* At least one cpu (Winchip 2) does not set %ebx and %ecx + for cpuid leaf 1. Forcibly zero the two registers before +diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc +index e3bca4b49..34b6c31db 100644 +--- a/gcc/config/i386/driver-i386.cc ++++ b/gcc/config/i386/driver-i386.cc +@@ -438,7 +438,8 @@ const char *host_detect_local_cpu (int argc, const char **argv) + || vendor == VENDOR_CYRIX + || vendor == VENDOR_NSC) + cache = detect_caches_amd (ext_level); +- else if (vendor == VENDOR_INTEL) ++ else if (vendor == VENDOR_INTEL ++ || vendor == VENDOR_ZHAOXIN) + { + bool xeon_mp = (family == 15 && model == 6); + cache = detect_caches_intel (xeon_mp, max_level, +@@ -520,6 +521,24 @@ const char *host_detect_local_cpu (int argc, const char **argv) + processor = PROCESSOR_I486; + } + } ++ else if (vendor == VENDOR_ZHAOXIN) ++ { ++ processor = PROCESSOR_GENERIC; ++ ++ switch (family) ++ { ++ case 7: ++ if (model >= 0x6b) ++ processor = PROCESSOR_SHIJIDADAO; ++ else if (model == 0x5b) ++ processor = PROCESSOR_YONGFENG; ++ else if (model == 0x3b) ++ processor = PROCESSOR_LUJIAZUI; ++ break; ++ default: ++ break; ++ } ++ } + else + { + switch (family) +@@ -781,6 +800,15 @@ const char *host_detect_local_cpu (int argc, const char **argv) + case PROCESSOR_BTVER2: + cpu = "btver2"; + break; ++ case PROCESSOR_LUJIAZUI: ++ cpu = "lujiazui"; ++ break; ++ case PROCESSOR_YONGFENG: ++ cpu = "yongfeng"; ++ break; ++ case PROCESSOR_SHIJIDADAO: ++ cpu = "shijidadao"; ++ break; + + default: + /* Use something reasonable. */ +diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc +index 49f0db2b8..75aab8b50 100644 +--- a/gcc/config/i386/i386-c.cc ++++ b/gcc/config/i386/i386-c.cc +@@ -144,6 +144,18 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + def_or_undef (parse_in, "__btver2"); + def_or_undef (parse_in, "__btver2__"); + break; ++ case PROCESSOR_LUJIAZUI: ++ def_or_undef (parse_in, "__lujiazui"); ++ def_or_undef (parse_in, "__lujiazui__"); ++ break; ++ case PROCESSOR_YONGFENG: ++ def_or_undef (parse_in, "__yongfeng"); ++ def_or_undef (parse_in, "__yongfeng__"); ++ break; ++ case PROCESSOR_SHIJIDADAO: ++ def_or_undef (parse_in, "__shijidadao"); ++ def_or_undef (parse_in, "__shijidadao__"); ++ break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__pentium4"); + def_or_undef (parse_in, "__pentium4__"); +@@ -348,6 +360,15 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, + case PROCESSOR_BTVER2: + def_or_undef (parse_in, "__tune_btver2__"); + break; ++ case PROCESSOR_LUJIAZUI: ++ def_or_undef (parse_in, "__tune_lujiazui__"); ++ break; ++ case PROCESSOR_YONGFENG: ++ def_or_undef (parse_in, "__tune_yongfeng__"); ++ break; ++ case PROCESSOR_SHIJIDADAO: ++ def_or_undef (parse_in, "__tune_shijidadao__"); ++ break; + case PROCESSOR_PENTIUM4: + def_or_undef (parse_in, "__tune_pentium4__"); + break; +diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc +index 061a15843..a987b379e 100644 +--- a/gcc/config/i386/i386-options.cc ++++ b/gcc/config/i386/i386-options.cc +@@ -145,6 +145,11 @@ along with GCC; see the file COPYING3. If not see + | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \ + | m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE) + ++#define m_LUJIAZUI (HOST_WIDE_INT_1U<. ++;; ++ ++;; Scheduling for ZHAOXIN lujiazui processor. ++ ++;; Modeling automatons for decoders, execution pipes, AGU pipes, and divider. ++(define_automaton "lujiazui_decoder,lujiazui_core,lujiazui_agu,lujiazui_div") ++ ++;; The rules for the decoder are simple: ++;; - an instruction with 1 uop can be decoded by any of the three ++;; decoders in one cycle. ++;; - an instruction with 2 uops can be decoded by decoder 0 or decoder 1 ++;; but still in only one cycle. ++;; - a complex (microcode) instruction can only be decoded by ++;; decoder 0, and this takes an unspecified number of cycles. ++;; ++;; The goal is to schedule such that we have a few-one-two uops sequence ++;; in each cycle, to decode as many instructions per cycle as possible. ++(define_cpu_unit "lua_decoder0" "lujiazui_decoder") ++(define_cpu_unit "lua_decoder1" "lujiazui_decoder") ++(define_cpu_unit "lua_decoder2" "lujiazui_decoder") ++ ++;; We first wish to find an instruction for lua_decoder0, so exclude ++;; lua_decoder1 and lua_decoder2 from being reserved until ++;; lua_decoder0 is reserved, and also exclude lua_decoder2 ++;; from being reserved until lua_decoder1 is reserved. ++(presence_set "lua_decoder1" "lua_decoder0") ++(presence_set "lua_decoder2" "lua_decoder0") ++(presence_set "lua_decoder2" "lua_decoder1") ++ ++;; Most instructions can be decoded on any of the three decoders. ++(define_reservation "lua_decodern" "lua_decoder0|lua_decoder1|lua_decoder2") ++(define_reservation "lua_decoder01" "lua_decoder0|lua_decoder1") ++ ++;; The out-of-order core has six pipelines. ++;; Port 4, 5 are responsible for address calculations, load or store. ++;; Port 0, 1, 2, 3 for everything else. ++ ++(define_cpu_unit "lua_p0,lua_p1,lua_p2,lua_p3" "lujiazui_core") ++(define_cpu_unit "lua_p4,lua_p5" "lujiazui_agu") ++ ++(define_cpu_unit "lua_div" "lujiazui_div") ++ ++(define_reservation "lua_p03" "lua_p0|lua_p3") ++(define_reservation "lua_p12" "lua_p1|lua_p2") ++(define_reservation "lua_p1p2" "lua_p1+lua_p2") ++(define_reservation "lua_p45" "lua_p4|lua_p5") ++(define_reservation "lua_p4p5" "lua_p4+lua_p5") ++(define_reservation "lua_p0p1p2p3" "lua_p0+lua_p1+lua_p2+lua_p3") ++ ++;; Only the irregular instructions have to be modeled here. ++ ++;; Complex instruction. ++(define_insn_reservation "lua_complex_insn" 6 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "other,multi,str")) ++ "lua_decoder0") ++ ++;; Call instruction. ++(define_insn_reservation "lua_call" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "call,callv")) ++ "lua_decoder0,lua_p45,lua_p1") ++ ++;; MOV - integer moves. ++(define_insn_reservation "lua_imov" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "imov,imovx"))) ++ "lua_decodern,lua_p12") ++ ++(define_insn_reservation "lua_imov_load" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "imov,imovx"))) ++ "lua_decoder01,lua_p45") ++ ++(define_insn_reservation "lua_imov_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "imov"))) ++ "lua_decodern,lua_p12+lua_p45") ++ ++(define_insn_reservation "lua_icmov" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "icmov"))) ++ "lua_decodern,lua_p2") ++ ++(define_insn_reservation "lua_icmov_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "icmov"))) ++ "lua_decoder01,lua_p45,lua_p2") ++ ++;; Push and pop. ++(define_insn_reservation "lua_push_reg" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "push"))) ++ "lua_decodern,lua_p12+lua_p45") ++ ++(define_insn_reservation "lua_push_mem" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "push"))) ++ "lua_decoder01,lua_p45,lua_p12+lua_p45") ++ ++(define_insn_reservation "lua_pop_reg" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "pop"))) ++ "lua_decoder01,lua_p45") ++ ++(define_insn_reservation "lua_pop_mem" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "pop"))) ++ "lua_decoder0,lua_p45,lua_p12+lua_p45") ++ ++(define_insn_reservation "lua_lea" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "lea")) ++ "lua_decodern,lua_p45") ++ ++(define_insn_reservation "lua_shift_rotate" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) ++ "lua_decodern,lua_p2") ++ ++(define_insn_reservation "lua_shift_rotate_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) ++ "lua_decoder01,lua_p45,lua_p2") ++ ++(define_insn_reservation "lua_shift_rotate_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) ++ "lua_decoder01,lua_p2,lua_p45") ++ ++(define_insn_reservation "lua_shift_rotate_both" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "ishift,ishift1,rotate,rotate1"))) ++ "lua_decoder0,lua_p45,lua_p2,lua_p45") ++ ++(define_insn_reservation "lua_branch" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ibr"))) ++ "lua_decodern,lua_p1") ++ ++(define_insn_reservation "lua_indirect_branch_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ibr"))) ++ "lua_decodern,lua_p45,lua_p1") ++ ++(define_insn_reservation "lua_leave" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "leave")) ++ "lua_decoder0,lua_p45+lua_p12,lua_p12") ++ ++;; Multiplication instructions. ++ ++(define_insn_reservation "lua_imul_qi" 2 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decodern,lua_p1p2") ++ ++(define_insn_reservation "lua_imul_qi_mem" 6 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decoder01,lua_p1p2+lua_p45") ++ ++(define_insn_reservation "lua_imul_hisi" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "HI,SI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decoder0,lua_p1p2") ++ ++(define_insn_reservation "lua_imul_hisi_mem" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "HI,SI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decoder0,lua_p1p2+lua_p45") ++ ++(define_insn_reservation "lua_imul_di" 12 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decoder0,lua_p0p1p2p3") ++ ++(define_insn_reservation "lua_imul_di_mem" 16 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "imul,imulx")))) ++ "lua_decoder0,lua_p0p1p2p3+lua_p45") ++ ++;; Division instructions. ++ ++(define_insn_reservation "lua_idiv_qi" 21 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p0p1p2p3,lua_div*21") ++ ++(define_insn_reservation "lua_idiv_qi_load" 25 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*21") ++ ++(define_insn_reservation "lua_idiv_hi" 22 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p0p1p2p3,lua_div*22") ++ ++(define_insn_reservation "lua_idiv_hi_load" 26 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*22") ++ ++(define_insn_reservation "lua_idiv_si" 20 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p0p1p2p3,lua_div*20") ++ ++(define_insn_reservation "lua_idiv_si_load" 24 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*20") ++ ++(define_insn_reservation "lua_idiv_di" 150 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p0p1p2p3,lua_div*150") ++ ++(define_insn_reservation "lua_idiv_di_load" 154 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "idiv")))) ++ "lua_decoder0,lua_p45,lua_p0p1p2p3,lua_div*150") ++ ++;; x87 floating point operations. ++ ++(define_insn_reservation "lua_fxch" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "fxch")) ++ "lua_decodern,lua_p1") ++ ++(define_insn_reservation "lua_fop" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "fop"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fop_load" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fop"))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_fop_store" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "fop"))) ++ "lua_decodern,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_fop_both" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "fop"))) ++ "lua_decoder0,lua_p45,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_fsgn" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "fsgn")) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fistp" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fistp"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fistp_mem" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "type" "fistp"))) ++ "lua_decoder0,lua_p0+lua_p45") ++ ++(define_insn_reservation "lua_fcmov" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (eq_attr "type" "fcmov")) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fcmp" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fcmp"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fcmp_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fcmp"))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_fmov" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fmov"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_fmov_load" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "!XF") ++ (eq_attr "type" "fmov")))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_fmov_XF_load" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "XF") ++ (eq_attr "type" "fmov")))) ++ "lua_decoder0,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_fmov_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (and (eq_attr "mode" "!XF") ++ (eq_attr "type" "fmov")))) ++ "lua_decoder0,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_fmov_XF_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (and (eq_attr "mode" "XF") ++ (eq_attr "type" "fmov")))) ++ "lua_decoder0,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_fmul" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fmul"))) ++ "lua_decodern,lua_p3") ++ ++(define_insn_reservation "lua_fmul_load" 8 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "fp_int_src" "false") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fmul")))) ++ "lua_decoder01,lua_p45,lua_p3") ++ ++(define_insn_reservation "lua_fimul_load" 8 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "fp_int_src" "true") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fmul")))) ++ "lua_decoder0,lua_p45,lua_p3") ++ ++;; fdiv instructions. ++ ++(define_insn_reservation "lua_fdiv_SF" 15 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decodern,lua_p0,lua_div*15") ++ ++(define_insn_reservation "lua_fdiv_SF_load" 19 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*15") ++ ++(define_insn_reservation "lua_fdiv_DF" 18 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decodern,lua_p0,lua_div*18") ++ ++(define_insn_reservation "lua_fdiv_DF_load" 22 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "DF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*18") ++ ++(define_insn_reservation "lua_fdiv_XF" 22 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "XF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decoder0,lua_p0,lua_div*22") ++ ++(define_insn_reservation "lua_fdiv_XF_load" 26 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "XF") ++ (eq_attr "type" "fdiv,fpspc")))) ++ "lua_decoder0,lua_p45,lua_p0,lua_div*22") ++ ++;; MMX instructions. ++ ++(define_insn_reservation "lua_mmx_sse_add_shft" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxadd,sseiadd,mmxshft,sseishft"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_mmx_sse_add_shft_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxadd,sseiadd,mmxshft,sseishft"))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_mmx_sse_add_shft_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "mmxadd,sseiadd,mmxshft,sseishft"))) ++ "lua_decodern,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_mmx_mul" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxmul,sseimul"))) ++ "lua_decodern,lua_p3") ++ ++(define_insn_reservation "lua_mmx_mul_load" 9 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxmul,sseimul"))) ++ "lua_decoder01,lua_p45,lua_p3") ++ ++(define_insn_reservation "lua_mmxcvt" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxcvt"))) ++ "lua_decodern,lua_p03") ++ ++(define_insn_reservation "lua_mmxcvt_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxcvt"))) ++ "lua_decoder01,lua_p45,lua_p03") ++ ++;; The sfence instruction. ++(define_insn_reservation "lua_sse_sfence" 13 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "unknown") ++ (eq_attr "type" "sse"))) ++ "lua_decoder0,lua_p45") ++ ++(define_insn_reservation "lua_sse_SFDF" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "mode" "SF,DF") ++ (eq_attr "type" "sse"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sse_V4SF" 13 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "mode" "V4SF") ++ (eq_attr "type" "sse"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sse_V8SF" 19 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (eq_attr "type" "sse"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sse_add1" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseadd1"))) ++ "lua_decoder0,lua_p0") ++ ++(define_insn_reservation "lua_sse_add1_load" 8 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseadd1"))) ++ "lua_decoder0,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_sse_cmp" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecmp,ssecomi"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sse_cmp_load" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecmp,ssecomi"))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_sse_logic" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sselog,sselog1"))) ++ "lua_decodern,lua_p03") ++ ++(define_insn_reservation "lua_sse_logic_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sselog,sselog1"))) ++ "lua_decoder01,lua_p45,lua_p03") ++ ++(define_insn_reservation "lua_sse_add" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseadd"))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sse_add_load" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseadd"))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_ssemul_ss_ps" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SF,V4SF,V8SF") ++ (eq_attr "type" "ssemul")))) ++ "lua_decodern,lua_p3") ++ ++(define_insn_reservation "lua_ssemul_ss_ps_load" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SF,V4SF,V8SF") ++ (eq_attr "type" "ssemul")))) ++ "lua_decoder01,lua_p45,lua_p3") ++ ++(define_insn_reservation "lua_ssemul_sd_pd" 4 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DF,V2DF,V4DF") ++ (eq_attr "type" "ssemul")))) ++ "lua_decodern,lua_p3") ++ ++(define_insn_reservation "lua_ssemul_sd_pd_load" 8 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "DF,V2DF,V4DF") ++ (eq_attr "type" "ssemul")))) ++ "lua_decoder01,lua_p45,lua_p3") ++ ++(define_insn_reservation "lua_ssediv_SF" 13 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decodern,lua_p0,lua_div*13") ++ ++(define_insn_reservation "lua_ssediv_load_SF" 17 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*13") ++ ++(define_insn_reservation "lua_ssediv_V4SF" 23 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "V4SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decodern,lua_p0,lua_div*23") ++ ++(define_insn_reservation "lua_ssediv_load_V4SF" 27 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "V4SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*23") ++ ++(define_insn_reservation "lua_ssediv_V8SF" 47 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "V8SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder0,lua_p0,lua_div*47") ++ ++(define_insn_reservation "lua_ssediv_load_V8SF" 51 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "V8SF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder0,lua_p45,lua_p0,lua_div*47") ++ ++(define_insn_reservation "lua_ssediv_SD" 17 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decodern,lua_p0,lua_div*17") ++ ++(define_insn_reservation "lua_ssediv_load_SD" 21 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*17") ++ ++(define_insn_reservation "lua_ssediv_V2DF" 30 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "V2DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decodern,lua_p0,lua_div*30") ++ ++(define_insn_reservation "lua_ssediv_load_V2DF" 34 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "V2DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder01,lua_p45,lua_p0,lua_div*30") ++ ++(define_insn_reservation "lua_ssediv_V4DF" 56 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "V4DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder0,lua_p0,lua_div*56") ++ ++(define_insn_reservation "lua_ssediv_load_V4DF" 60 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "V4DF") ++ (eq_attr "type" "ssediv")))) ++ "lua_decoder0,lua_p4p5,lua_p0,lua_div*56") ++ ++ ++(define_insn_reservation "lua_sseicvt_si" 2 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:SF 1 "memory_operand") ++ (eq_attr "type" "sseicvt"))))) ++ "lua_decoder01,lua_p0") ++ ++(define_insn_reservation "lua_sseicvt_si_load" 6 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:SF 1 "memory_operand") ++ (eq_attr "type" "sseicvt"))))) ++ "lua_decoder0,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_sseicvtdf_si" 3 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:DF 1 "memory_operand") ++ (eq_attr "type" "sseicvt"))))) ++ "lua_decodern,lua_p0") ++ ++(define_insn_reservation "lua_sseicvtdf_si_load" 7 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "SI") ++ (and (match_operand:DF 1 "memory_operand") ++ (eq_attr "type" "sseicvt"))))) ++ "lua_decoder01,lua_p45,lua_p0") ++ ++(define_insn_reservation "lua_ssecvt" 6 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecvt"))) ++ "lua_decoder01,lua_p03") ++ ++(define_insn_reservation "lua_ssecvt_load" 10 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecvt"))) ++ "lua_decoder0,lua_p45,lua_p03") ++ ++(define_insn_reservation "lua_sse_mov" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssemov"))) ++ "lua_decodern,lua_p03") ++ ++(define_insn_reservation "lua_sse_mov_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssemov"))) ++ "lua_decoder01,lua_p45,lua_p03") ++ ++(define_insn_reservation "lua_sse_mov_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "ssemov"))) ++ "lua_decoder01,lua_p0,lua_p45") ++ ++(define_insn_reservation "lua_insn_alu" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "alu"))) ++ "lua_decodern,lua_p12") ++ ++(define_insn_reservation "lua_insn_alu_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "alu"))) ++ "lua_decoder01,lua_p45,lua_p12") ++ ++(define_insn_reservation "lua_insn_alu_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "alu"))) ++ "lua_decoder01,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_alu_both" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "alu"))) ++ "lua_decoder0,lua_p45,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_alu1" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "alu1"))) ++ "lua_decodern,lua_p12") ++ ++(define_insn_reservation "lua_insn_alu1_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "alu1"))) ++ "lua_decoder01,lua_p45,lua_p12") ++ ++(define_insn_reservation "lua_insn_alu1_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "alu1"))) ++ "lua_decoder01,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_alu1_both" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "alu1"))) ++ "lua_decoder0,lua_p45,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_negnot_incdec" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "negnot,incdec"))) ++ "lua_decodern,lua_p12") ++ ++(define_insn_reservation "lua_insn_negnot_setcc" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "setcc"))) ++ "lua_decodern,lua_p2") ++ ++(define_insn_reservation "lua_insn_negnot_setcc_mem" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "type" "negnot,setcc"))) ++ "lua_decoder01,lua_p45,lua_p2,lua_p45") ++ ++(define_insn_reservation "lua_insn_incdec_mem" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "type" "incdec"))) ++ "lua_decoder0,lua_p45,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_icmptest" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "icmp,test"))) ++ "lua_decodern,lua_p12") ++ ++(define_insn_reservation "lua_insn_icmptest_load" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "icmp,test"))) ++ "lua_decoder01,lua_p45,lua_p12") ++ ++(define_insn_reservation "lua_insn_icmptest_store" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "icmp,test"))) ++ "lua_decoder01,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_icmptest_both" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "icmp,test"))) ++ "lua_decoder0,lua_p45,lua_p12,lua_p45") ++ ++(define_insn_reservation "lua_insn_sseishft1_mmx" 1 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseishft1,mmx,mmxcmp"))) ++ "lua_decodern,lua_p03") ++ ++(define_insn_reservation "lua_insn_sseishft1_mmx_mem" 5 ++ (and (eq_attr "cpu" "lujiazui") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseishft1,mmx,mmxcmp"))) ++ "lua_decoder01,lua_p45,lua_p03") +diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h +index db4c2da34..c7a4191fa 100644 +--- a/gcc/config/i386/x86-tune-costs.h ++++ b/gcc/config/i386/x86-tune-costs.h +@@ -3276,6 +3276,347 @@ struct processor_costs intel_cost = { + 2, /* Small unroll factor. */ + }; + ++/* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU. */ ++static stringop_algs lujiazui_memcpy[2] = { ++ {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{12, unrolled_loop, true}, {32, loop, false}, ++ {6144, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++static stringop_algs lujiazui_memset[2] = { ++ {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{12, loop, true}, {32, loop, false}, ++ {640, rep_prefix_8_byte, false}, ++ {-1, libcall, false}}}}; ++static const ++struct processor_costs lujiazui_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ 6, /* cost for loading QImode using movzbl. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {6, 6, 6}, /* cost of storing integer registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {6, 6, 8}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {6, 6, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {6, 6}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {6, 6}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ {6, 6, 6, 10, 15}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {6, 6, 6, 10, 15}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 6, 6, /* SSE->integer and integer->SSE moves. */ ++ 6, 6, /* mask->integer and integer->mask moves. */ ++ {6, 6, 6}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {6, 6, 6}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (2), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (3), /* SI. */ ++ COSTS_N_INSNS (12), /* DI. */ ++ COSTS_N_INSNS (14)}, /* other. */ ++ 0, /* cost of multiply per each bit set. */ ++ {COSTS_N_INSNS (22), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (24), /* HI. */ ++ COSTS_N_INSNS (24), /* SI. */ ++ COSTS_N_INSNS (150), /* DI. */ ++ COSTS_N_INSNS (152)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 17, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO. */ ++ {6, 6, 6}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {6, 6, 6}, /* cost of storing integer registers. */ ++ {6, 6, 6, 10, 15}, /* cost of loading SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {6, 6, 6, 10, 15}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {6, 6, 6, 10, 15}, /* cost of unaligned loads. */ ++ {6, 6, 6, 10, 15}, /* cost of unaligned storess. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ 6, /* cost of moving SSE register to integer. */ ++ 18, 6, /* Gather load static, per_elt. */ ++ 18, 6, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 4096, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ /* Lujiazui processor never drop prefetches, like AMD processors. */ ++ 100, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (4), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (22), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (4), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (6), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (6), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (13), /* cost of DIVSS instruction. */ ++ COSTS_N_INSNS (17), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (32), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (60), /* cost of SQRTSD instruction. */ ++ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ ++ lujiazui_memcpy, ++ lujiazui_memset, ++ COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (2), /* cond_not_taken_branch_cost. */ ++ "16:11:8", /* Loop alignment. */ ++ "16:11:8", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; ++ ++/* yongfeng_cost should produce code tuned for ZHAOXIN yongfeng CPU. */ ++static stringop_algs yongfeng_memcpy[2] = { ++ {libcall, {{6, unrolled_loop, true}, {256, unrolled_loop, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{8, loop, false}, {512, unrolled_loop, false}, ++ {-1, libcall, false}}}}; ++static stringop_algs yongfeng_memset[2] = { ++ {libcall, {{6, loop_1_byte, false}, {128, loop, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{2, rep_prefix_4_byte, false}, {64, loop, false}, ++ {1024, vector_loop, false}, ++ {-1, libcall, false}}}}; ++static const ++struct processor_costs yongfeng_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ 8, /* cost for loading QImode using movzbl. */ ++ {8, 8, 8}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {8, 8, 8}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {8, 8, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ {8, 8, 8, 10, 15}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 10, 15}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 8, 8, /* SSE->integer and integer->SSE moves. */ ++ 8, 8, /* mask->integer and integer->mask moves. */ ++ {8, 8, 8}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (2), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (2), /* SI. */ ++ COSTS_N_INSNS (2), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit set. */ ++ {COSTS_N_INSNS (8), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (9), /* HI. */ ++ COSTS_N_INSNS (8), /* SI. */ ++ COSTS_N_INSNS (41), /* DI. */ ++ COSTS_N_INSNS (41)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 17, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO. */ ++ {8, 8, 8}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer registers. */ ++ {8, 8, 8, 12, 15}, /* cost of loading SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {8, 8, 8, 12, 15}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {8, 8, 8, 12, 15}, /* cost of unaligned loads. */ ++ {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ 8, /* cost of moving SSE register to integer. */ ++ 18, 6, /* Gather load static, per_elt. */ ++ 18, 6, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ 12, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (3), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (14), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (1), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (1), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (10), /* cost of DIVSS instruction. */ ++ COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */ ++ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ ++ yongfeng_memcpy, ++ yongfeng_memset, ++ COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ ++ "16:11:8", /* Loop alignment. */ ++ "16:11:8", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; ++ ++/* shijidadao_cost should produce code tuned for ZHAOXIN shijidadao CPU. */ ++static stringop_algs shijidadao_memcpy[2] = { ++ {libcall, {{8, unrolled_loop, true}, {256, unrolled_loop, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{10, loop, true}, {256, unrolled_loop, false}, ++ {-1, libcall, false}}}}; ++static stringop_algs shijidadao_memset[2] = { ++ {libcall, {{4, loop, true}, {128, unrolled_loop, false}, ++ {-1, libcall, false}}}, ++ {libcall, {{1, rep_prefix_4_byte, false}, {14, loop, true}, ++ {1024, vector_loop, false}, ++ {-1, libcall, false}}}}; ++static const ++struct processor_costs shijidadao_cost = { ++ { ++ /* Start of register allocator costs. integer->integer move cost is 2. */ ++ 8, /* cost for loading QImode using movzbl. */ ++ {8, 8, 8}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer registers. */ ++ 2, /* cost of reg,reg fld/fst. */ ++ {8, 8, 8}, /* cost of loading fp registers ++ in SFmode, DFmode and XFmode. */ ++ {8, 8, 8}, /* cost of storing fp registers ++ in SFmode, DFmode and XFmode. */ ++ 2, /* cost of moving MMX register. */ ++ {8, 8}, /* cost of loading MMX registers ++ in SImode and DImode. */ ++ {8, 8}, /* cost of storing MMX registers ++ in SImode and DImode. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ {8, 8, 8, 10, 15}, /* cost of loading SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ {8, 8, 8, 10, 15}, /* cost of storing SSE registers ++ in 32,64,128,256 and 512-bit. */ ++ 8, 8, /* SSE->integer and integer->SSE moves. */ ++ 8, 8, /* mask->integer and integer->mask moves. */ ++ {8, 8, 8}, /* cost of loading mask register ++ in QImode, HImode, SImode. */ ++ {8, 8, 8}, /* cost if storing mask register ++ in QImode, HImode, SImode. */ ++ 2, /* cost of moving mask register. */ ++ /* End of register allocator costs. */ ++ }, ++ ++ COSTS_N_INSNS (1), /* cost of an add instruction. */ ++ COSTS_N_INSNS (1), /* cost of a lea instruction. */ ++ COSTS_N_INSNS (1), /* variable shift costs. */ ++ COSTS_N_INSNS (1), /* constant shift costs. */ ++ {COSTS_N_INSNS (2), /* cost of starting multiply for QI. */ ++ COSTS_N_INSNS (3), /* HI. */ ++ COSTS_N_INSNS (2), /* SI. */ ++ COSTS_N_INSNS (2), /* DI. */ ++ COSTS_N_INSNS (3)}, /* other. */ ++ 0, /* cost of multiply per each bit set. */ ++ {COSTS_N_INSNS (9), /* cost of a divide/mod for QI. */ ++ COSTS_N_INSNS (10), /* HI. */ ++ COSTS_N_INSNS (9), /* SI. */ ++ COSTS_N_INSNS (50), /* DI. */ ++ COSTS_N_INSNS (50)}, /* other. */ ++ COSTS_N_INSNS (1), /* cost of movsx. */ ++ COSTS_N_INSNS (1), /* cost of movzx. */ ++ 8, /* "large" insn. */ ++ 17, /* MOVE_RATIO. */ ++ 6, /* CLEAR_RATIO. */ ++ {8, 8, 8}, /* cost of loading integer registers ++ in QImode, HImode and SImode. ++ Relative to reg-reg move (2). */ ++ {8, 8, 8}, /* cost of storing integer registers. */ ++ {8, 8, 8, 12, 15}, /* cost of loading SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {8, 8, 8, 12, 15}, /* cost of storing SSE register ++ in 32bit, 64bit, 128bit, 256bit and 512bit. */ ++ {8, 8, 8, 12, 15}, /* cost of unaligned loads. */ ++ {8, 8, 8, 12, 15}, /* cost of unaligned storess. */ ++ 2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */ ++ 8, /* cost of moving SSE register to integer. */ ++ 18, 6, /* Gather load static, per_elt. */ ++ 18, 6, /* Gather store static, per_elt. */ ++ 32, /* size of l1 cache. */ ++ 256, /* size of l2 cache. */ ++ 64, /* size of prefetch block. */ ++ 12, /* number of parallel prefetches. */ ++ 3, /* Branch cost. */ ++ COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ ++ COSTS_N_INSNS (3), /* cost of FMUL instruction. */ ++ COSTS_N_INSNS (13), /* cost of FDIV instruction. */ ++ COSTS_N_INSNS (2), /* cost of FABS instruction. */ ++ COSTS_N_INSNS (2), /* cost of FCHS instruction. */ ++ COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ ++ ++ COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */ ++ COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */ ++ COSTS_N_INSNS (3), /* cost of MULSS instruction. */ ++ COSTS_N_INSNS (3), /* cost of MULSD instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SS instruction. */ ++ COSTS_N_INSNS (5), /* cost of FMA SD instruction. */ ++ COSTS_N_INSNS (11), /* cost of DIVSS instruction. */ ++ COSTS_N_INSNS (14), /* cost of DIVSD instruction. */ ++ COSTS_N_INSNS (11), /* cost of SQRTSS instruction. */ ++ COSTS_N_INSNS (18), /* cost of SQRTSD instruction. */ ++ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ ++ shijidadao_memcpy, ++ shijidadao_memset, ++ COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ ++ COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */ ++ "16:11:8", /* Loop alignment. */ ++ "16:11:8", /* Jump alignment. */ ++ "0:0:8", /* Label alignment. */ ++ "16", /* Func alignment. */ ++}; ++ + /* Generic should produce code tuned for Core-i7 (and newer chips) + and btver1 (and newer chips). */ + +diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc +index 2ead7ac55..13b1ba43f 100644 +--- a/gcc/config/i386/x86-tune-sched.cc ++++ b/gcc/config/i386/x86-tune-sched.cc +@@ -58,6 +58,7 @@ ix86_issue_rate (void) + case PROCESSOR_K8: + case PROCESSOR_AMDFAM10: + case PROCESSOR_BTVER1: ++ case PROCESSOR_LUJIAZUI: + return 3; + + case PROCESSOR_BDVER1: +@@ -74,6 +75,8 @@ ix86_issue_rate (void) + case PROCESSOR_HASWELL: + case PROCESSOR_TREMONT: + case PROCESSOR_ALDERLAKE: ++ case PROCESSOR_YONGFENG: ++ case PROCESSOR_SHIJIDADAO: + case PROCESSOR_GENERIC: + return 4; + +@@ -429,6 +432,32 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, + } + break; + ++ case PROCESSOR_YONGFENG: ++ case PROCESSOR_SHIJIDADAO: ++ /* Stack engine allows to execute push&pop instructions in parallel. */ ++ if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) ++ && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) ++ return 0; ++ /* FALLTHRU */ ++ ++ case PROCESSOR_LUJIAZUI: ++ memory = get_attr_memory (insn); ++ ++ /* Show ability of reorder buffer to hide latency of load by executing ++ in parallel with previous instruction in case ++ previous instruction is not needed to compute the address. */ ++ if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) ++ && !ix86_agi_dependent (dep_insn, insn)) ++ { ++ int loadcost = 4; ++ ++ if (cost >= loadcost) ++ cost -= loadcost; ++ else ++ cost = 0; ++ } ++ break; ++ + case PROCESSOR_CORE2: + case PROCESSOR_NEHALEM: + case PROCESSOR_SANDYBRIDGE: +diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def +index fd095f3ec..34eff2712 100644 +--- a/gcc/config/i386/x86-tune.def ++++ b/gcc/config/i386/x86-tune.def +@@ -42,7 +42,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + DEF_TUNE (X86_TUNE_SCHEDULE, "schedule", + m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_PARTIAL_REG_DEPENDENCY: Enable more register renaming + on modern chips. Prefer stores affecting whole integer register +@@ -52,7 +52,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL + | m_KNL | m_KNM | m_AMD_MULTIPLE | m_TREMONT | m_ALDERLAKE +- | m_GENERIC) ++ | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store + destinations to be 128bit to allow register renaming on 128bit SSE units, +@@ -62,7 +62,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency", + that can be partly masked by careful scheduling of moves. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids + partial write to the destination in scalar SSE conversion from FP +@@ -70,14 +70,14 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY, + "sse_partial_reg_fp_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial + write to the destination in scalar SSE conversion from integer to FP. */ + DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, + "sse_partial_reg_converts_dependency", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10 +- | m_BDVER | m_ZNVER | m_ALDERLAKE | m_GENERIC) ++ | m_BDVER | m_ZNVER | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before + several insns to break false dependency on the dest register for GLC +@@ -108,39 +108,39 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall", + DEF_TUNE (X86_TUNE_MOVX, "movx", + m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE + | m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL +- | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE +- | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_CORE_AVX2 ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by + full sized loads. */ + DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall", + m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL + | m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent + conditional jump instruction for 32 bit TARGET. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent + conditional jump instruction for TARGET_64BIT. */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_GENERIC) ++ | m_ZNVER | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a + subsequent conditional jump instruction when the condition jump + check sign flag (SF) or overflow flag (OF). */ + DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER +- | m_ZNVER | m_GENERIC) ++ | m_ZNVER | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional + jump instruction when the alu instruction produces the CCFLAG consumed by + the conditional jump instruction. */ + DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", +- m_SANDYBRIDGE | m_CORE_AVX2 | m_GENERIC) ++ m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC) + + + /*****************************************************************************/ +@@ -157,7 +157,7 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", + + DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args", + m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL +- | m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8) ++ | m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_ZHAOXIN) + + /* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are + considered on critical path. */ +@@ -172,14 +172,14 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move", + /* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */ + DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave", + m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions. + Some chips, like 486 and Pentium works faster with separate load + and push instructions. */ + DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory", + m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred + over esp subtraction. */ +@@ -234,7 +234,7 @@ DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, "software_prefetching_benefi + + /* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall + on 16-bit immediate moves into memory on Core2 and Corei7. */ +-DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_GENERIC) ++DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such + as "add mem, reg". */ +@@ -249,19 +249,19 @@ DEF_TUNE (X86_TUNE_READ_MODIFY, "read_modify", ~(m_PENT | m_LAKEMONT | m_PPRO)) + DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec", + ~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE + | m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC)) + + /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred + for DFmode copies */ + DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves", + ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC)) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC)) + + /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag + will impact LEA instruction selection. */ + DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL +- | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL) ++ | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ZHAOXIN) + + /* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */ + DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr", +@@ -294,7 +294,7 @@ DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA) + move/set sequences of bytes with known size. */ + DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, + "prefer_known_rep_movsb_stosb", +- m_SKYLAKE | m_ALDERLAKE | m_TREMONT | m_CORE_AVX512) ++ m_SKYLAKE | m_ALDERLAKE | m_TREMONT | m_CORE_AVX512 | m_ZHAOXIN) + + /* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of + compact prologues and epilogues by issuing a misaligned moves. This +@@ -303,15 +303,15 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB, + FIXME: This may actualy be a win on more targets than listed here. */ + DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES, + "misaligned_move_string_pro_epilogues", +- m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT ++ | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_USE_SAHF: Controls use of SAHF. */ + DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER + | m_BTVER | m_ZNVER | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT +- | m_ALDERLAKE | m_GENERIC) ++ | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */ + DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", +@@ -322,12 +322,12 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd", + DEF_TUNE (X86_TUNE_USE_BT, "use_bt", + m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL + | m_LAKEMONT | m_AMD_MULTIPLE | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency + for bit-manipulation instructions. */ + DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi", +- m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ m_SANDYBRIDGE | m_CORE_AVX2 | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based + on hardware capabilities. Bdver3 hardware has a loop buffer which makes +@@ -339,18 +339,18 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4) + if-converted sequence to one. */ + DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn", + m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */ + DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence", +- m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ m_CORE_ALL | m_BDVER | m_ZNVER | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by + generating instructions for abs (x) = (((signed) x >> (W-1) ^ x) - + (signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */ + DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs", + m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT +- | m_GOLDMONT_PLUS) ++ | m_GOLDMONT_PLUS | m_ZHAOXIN) + + /*****************************************************************************/ + /* 387 instruction selection tuning */ +@@ -368,16 +368,16 @@ DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop", + ~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL + | m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE + | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_GENERIC)) ++ | m_ZHAOXIN | m_GENERIC)) + + /* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */ +-DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE) ++DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN) + + /* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */ + DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT + | m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_GOLDMONT +- | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /*****************************************************************************/ + /* SSE instruction selection tuning */ +@@ -393,14 +393,14 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill", + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_ALDERLAKE +- | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_GENERIC) ++ | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores + instead of a sequence loading registers by parts. */ + DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", + m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM + | m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS +- | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single + precision 128bit instructions instead of double where possible. */ +@@ -409,13 +409,14 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim + + /* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */ + DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores", +- m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ m_AMD_MULTIPLE | m_CORE_ALL | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN ++ | m_GENERIC) + + /* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to + xorps/xorpd and other variants. */ + DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor", + m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER +- | m_TREMONT | m_ALDERLAKE | m_GENERIC) ++ | m_TREMONT | m_ALDERLAKE | m_ZHAOXIN | m_GENERIC) + + /* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer + to SSE registers. If disabled, the moves will be done by storing +@@ -468,7 +469,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes", + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + + /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2 + elements. */ +@@ -479,7 +480,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts", + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + + /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4 + elements. */ +@@ -490,7 +491,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts", + elements. */ + DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts", + ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_ALDERLAKE +- | m_GENERIC | m_GDS)) ++ | m_GENERIC | m_GDS | m_YONGFENG | m_SHIJIDADAO)) + + /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more + elements. */ +@@ -500,7 +501,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts", + /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or + smaller FMA chain. */ + DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 +- | m_ZNVER3 | m_ZNVER4 | m_GENERIC) ++ | m_ZNVER3 | m_ZNVER4 | m_GENERIC | m_YONGFENG | m_SHIJIDADAO) + + /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or + smaller FMA chain. */ +diff --git a/gcc/config/i386/yongfeng.md b/gcc/config/i386/yongfeng.md +new file mode 100644 +index 000000000..a225b409c +--- /dev/null ++++ b/gcc/config/i386/yongfeng.md +@@ -0,0 +1,848 @@ ++;; Copyright (C) 2012-2023 Free Software Foundation, Inc. ++;; ++;; This file is part of GCC. ++;; ++;; GCC is free software; you can redistribute it and/or modify ++;; it under the terms of the GNU General Public License as published by ++;; the Free Software Foundation; either version 3, or (at your option) ++;; any later version. ++;; ++;; GCC is distributed in the hope that it will be useful, ++;; but WITHOUT ANY WARRANTY; without even the implied warranty of ++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++;; GNU General Public License for more details. ++;; ++;; You should have received a copy of the GNU General Public License ++;; along with GCC; see the file COPYING3. If not see ++;; . ++;; ++ ++;; ZHAOXIN yongfeng processor Scheduling ++;; Modeling automatons for yongfeng decoders, integer execution pipes, ++;; FP execution pipes, AGU pipes, and dividers. ++(define_automaton "yongfeng_decoder,yongfeng_ieu,yongfeng_fp,yongfeng_agu,yongfeng_idiv,yongfeng_fdiv") ++ ++;; The rules for the decoder are simple: ++;; - an instruction with 1 uop can be decoded by any of the four ++;; decoders in one cycle. ++;; - an instruction with 2 uops can be decoded by decoder 0 or decoder 1 ++;; or decoder 2 but still in only one cycle. ++;; - a complex (microcode) instruction can only be decoded by ++;; decoder 0, and this takes an unspecified number of cycles. ++;; ++;; The goal is to schedule such that we have a few-one-two uops sequence ++;; in each cycle, to decode as many instructions per cycle as possible. ++(define_cpu_unit "yf_decoder0" "yongfeng_decoder") ++(define_cpu_unit "yf_decoder1" "yongfeng_decoder") ++(define_cpu_unit "yf_decoder2" "yongfeng_decoder") ++(define_cpu_unit "yf_decoder3" "yongfeng_decoder") ++ ++;; We first wish to find an instruction for yf_decoder0, so exclude ++;; other decoders from being reserved until yf_decoder0 is ++;; reserved ++(presence_set "yf_decoder1" "yf_decoder0") ++(presence_set "yf_decoder2" "yf_decoder0") ++(presence_set "yf_decoder3" "yf_decoder0") ++ ++;; Most instructions can be decoded on any of the three decoders. ++(define_reservation "yf_decodern" "yf_decoder0|yf_decoder1|yf_decoder2|yf_decoder3") ++(define_reservation "yf_decoder012" "yf_decoder0|yf_decoder1|yf_decoder2") ++ ++;; The out-of-order core has ten pipelines. Port 0,1,2,3 are integer execution ++;; pipelines, port 4, 5 are responsible for address calculation, load and store, ++;; port 6,7,8,9 are FP pipelines. ++(define_cpu_unit "yf_p0,yf_p1,yf_p2,yf_p3" "yongfeng_ieu") ++(define_cpu_unit "yf_p4,yf_p5" "yongfeng_agu") ++(define_cpu_unit "yf_p6,yf_p7,yf_p8,yf_p9" "yongfeng_fp") ++ ++(define_cpu_unit "yf_idiv" "yongfeng_idiv") ++(define_cpu_unit "yf_fdiv" "yongfeng_fdiv") ++ ++(define_reservation "yf_ieu" "yf_p0|yf_p1|yf_p2|yf_p3") ++(define_reservation "yf_p01" "yf_p0|yf_p1") ++(define_reservation "yf_agu" "yf_p4|yf_p5") ++(define_reservation "yf_feu" "yf_p6|yf_p7|yf_p8|yf_p9") ++ ++;; Only the irregular instructions have to be modeled here. ++ ++;; Complex instruction. ++(define_insn_reservation "yongfeng_complex_insn" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "other,multi,str")) ++ "yf_decoder0") ++ ++;; Call instruction. ++(define_insn_reservation "yongfeng_call" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "call,callv")) ++ "yf_decoder012,yf_agu,yf_ieu*3") ++;; Push and pop. ++(define_insn_reservation "yongfeng_push_reg" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "push"))) ++ "yf_decodern,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_push_mem" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "push"))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_pop_reg" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "pop"))) ++ "yf_decoder012,yf_p01,yf_agu") ++ ++(define_insn_reservation "yongfeng_pop_mem" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "pop"))) ++ "yf_decoder0,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_leave" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "leave")) ++ "yf_decoder0,yf_agu,yf_p01*3") ++ ++;; MOV - integer moves. ++(define_insn_reservation "yongfeng_imov" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "imov,imovx"))) ++ "yf_decodern,yf_ieu") ++ ++(define_insn_reservation "yongfeng_imov_load" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "imov"))) ++ "yf_decodern,yf_agu") ++ ++(define_insn_reservation "yongfeng_imovx_load" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "imovx"))) ++ "yf_decoder012,yf_agu,yf_ieu|yf_ieu") ++ ++(define_insn_reservation "yongfeng_imov_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "imov"))) ++ "yf_decodern,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_int_insn" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "alu,alu1,icmov,icmp,test,lea,ishift1,rotate,rotate1,setcc,incdec"))) ++ "yf_decodern,yf_ieu") ++ ++(define_insn_reservation "yongfeng_int_insn_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "alu,alu1,icmov,icmp,test,ishift1,rotate,rotate1,setcc"))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_int_insn_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc"))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_int_insn_both" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "both") ++ (eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc,incdec"))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_shift_HI" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "ishift")))) ++ "yf_decoder0,yf_ieu") ++ ++(define_insn_reservation "yongfeng_shift_SIDI" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "ishift")))) ++ "yf_decoder0,yf_ieu") ++ ++(define_insn_reservation "yongfeng_shift_HI_mem" 9 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "ishift")))) ++ "yf_decoder0,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_shift_SIDI_mem" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "ishift")))) ++ "yf_decoder0,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_negnot_QIHI" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "mode" "QI,HI") ++ (eq_attr "type" "negnot")))) ++ "yf_decoder012,yf_ieu|yf_ieu") ++ ++(define_insn_reservation "yongfeng_negnot_SIDI" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "negnot")))) ++ "yf_decodern,yf_ieu") ++ ++(define_insn_reservation "yongfeng_negnot_QIHI_mem" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "QI,HI") ++ (eq_attr "type" "negnot")))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_negnot_SIDI_mem" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "negnot")))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++;; branch instruction ++(define_insn_reservation "yongfeng_branch" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ibr"))) ++ "yf_decodern,yf_p2*3") ++ ++(define_insn_reservation "yongfeng_branch_mem" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (eq_attr "type" "ibr"))) ++ "yf_decodern,yf_agu,yf_p2") ++ ++;; Integer Multiplication instructions. ++ ++(define_insn_reservation "yongfeng_imul_QI" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "imul")))) ++ "yf_decodern,yf_ieu|yf_ieu") ++ ++(define_insn_reservation "yongfeng_imul_HI" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "imul")))) ++ "yf_decoder0,yf_ieu") ++ ++(define_insn_reservation "yongfeng_imul_SIDI" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "imul")))) ++ "yf_decoder0,yf_ieu|yf_ieu") ++ ++(define_insn_reservation "yongfeng_imul_QI_mem" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "QI") ++ (eq_attr "type" "imul")))) ++ "yf_decoder012,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_imul_SIDI_mem" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "SI,DI") ++ (eq_attr "type" "imul")))) ++ "yf_decoder0,yf_agu,yf_ieu") ++ ++(define_insn_reservation "yongfeng_imul_HI_mem" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "!none") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "imul")))) ++ "yf_decoder0,yf_agu,yf_ieu") ++ ++;; Integer Division instructions. ++ ++(define_insn_reservation "yongfeng_idiv_DI" 41 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*41") ++ ++(define_insn_reservation "yongfeng_idiv_HI" 9 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*3") ++ ++(define_insn_reservation "yongfeng_idiv_QISI" 8 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (and (eq_attr "mode" "QI,SI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_ieu,yf_feu,yf_idiv*3") ++ ++ ++(define_insn_reservation "yongfeng_idiv_mem_DI" 45 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "DI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*41") ++ ++(define_insn_reservation "yongfeng_idiv_HI_mem" 13 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "HI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3") ++ ++ ++(define_insn_reservation "yongfeng_idiv_QISI_mem" 12 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (and (eq_attr "mode" "QI,SI") ++ (eq_attr "type" "idiv")))) ++ "yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3") ++ ++;; MMX,SSE,AVX,AVX2 instructions ++;; sse moves ++ ++(define_insn_reservation "yongfeng_sse_mov" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssemov")))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_mov_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "ssemov")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_mov_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssemov")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_mov" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssemov")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_mov_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "ssemov")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_mov_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssemov")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++;;sse general instructions ++(define_insn_reservation "yongfeng_sse_insns" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp")))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_insns_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_insns" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_insns_load" 8 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_iadd" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI,TI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseiadd")))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_iadd_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI,TI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseiadd")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_iadd" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseiadd")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_iadd_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseiadd")))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_iadd1" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseiadd1"))) ++ "yf_decoder0,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_iadd1_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseiadd1"))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++;;sse imul ++(define_insn_reservation "yongfeng_sse_imul" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI,TI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseimul")))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_imul_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI,TI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseimul")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_imul" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseimul")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_imul_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseimul")))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++;; sse FMA ++(define_insn_reservation "yongfeng_sse_fma" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssemuladd")))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_fma_load" 9 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,DF,V4SF,V2DF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssemuladd")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_fma" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssemuladd")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_fma_load" 10 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssemuladd")))) ++ "yf_decoder0,yf_agu,yf_feu") ++;; sse div ++(define_insn_reservation "yongfeng_ssediv_s" 10 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,V4SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "yf_decodern,yf_fdiv*2") ++ ++(define_insn_reservation "yongfeng_ssediv_s_load" 14 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SF,V4SF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssediv")))) ++ "yf_decodern,yf_agu,yf_fdiv*2") ++ ++(define_insn_reservation "yongfeng_ssediv_d" 14 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DF,V2DF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "yf_decodern,yf_fdiv*3") ++ ++(define_insn_reservation "yongfeng_ssediv_d_load" 18 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DF,V2DF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssediv")))) ++ "yf_decodern,yf_agu,yf_fdiv*3") ++ ++(define_insn_reservation "yongfeng_ssediv_avx256_s" 10 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "yf_decoder012,yf_fdiv*10") ++ ++(define_insn_reservation "yongfeng_ssediv_avx256_s_load" 15 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "yf_decoder012,yf_agu,yf_fdiv*10") ++ ++(define_insn_reservation "yongfeng_ssediv_avx256_d" 14 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssediv")))) ++ "yf_decoder012,yf_fdiv*14") ++ ++(define_insn_reservation "yongfeng_ssediv_avx256_d_load" 19 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssediv")))) ++ "yf_decoder012,yf_fdiv*14") ++ ++;;sse logical and shuffle instructions ++(define_insn_reservation "yongfeng_avx256_log_shuf" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_log_shuf_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_log_shuf" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_log_shuf_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))) ++ "yf_decodern,yf_agu,yf_feu") ++;;sse shift ++ ++(define_insn_reservation "yongfeng_avx256_shift" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseishft,sseishft1")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx256_shift_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseishft,sseishft1")))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_shift" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseishft,sseishft1"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_shift_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseishft,sseishft1"))) ++ "yf_decodern,yf_agu,yf_feu") ++;;sse comi ++(define_insn_reservation "yongfeng_avx256_test" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecomi"))))) ++ "yf_decoder012,yf_ieu*3") ++ ++(define_insn_reservation "yongfeng_avx256_test_load" 9 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V8SF,V4DF,OI") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecomi"))))) ++ "yf_decoder012,yf_agu,yf_ieu,yf_p6*3") ++ ++(define_insn_reservation "yongfeng_sse_test" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecomi")))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_test_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "prefix_extra" "1") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecomi")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_comi" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecomi")))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_comi_load" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "prefix_extra" "0") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecomi")))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++;;sse conversion ++(define_insn_reservation "yongfeng_avx_cvt_ps" 4 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4SF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecvt")))) ++ "yf_decoder0,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx_cvt_ps_load" 8 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4SF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecvt")))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx_cvt_pd" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecvt")))) ++ "yf_decoder0,yf_feu") ++ ++(define_insn_reservation "yongfeng_avx_cvt_pd_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "V4DF") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecvt")))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_cvt" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "ssecvt"))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_cvt_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "ssecvt"))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseicvt"))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseicvt"))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt_SI" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseicvt")))) ++ "yf_decoder012,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt_SI_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "SI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseicvt")))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt_DI" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "sseicvt")))) ++ "yf_decoder0,yf_feu") ++ ++(define_insn_reservation "yongfeng_sse_icvt_DI_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "mode" "DI") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "sseicvt")))) ++ "yf_decoder0,yf_agu,yf_feu") ++;; MMX ++(define_insn_reservation "yongfeng_mmx_move" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxmov"))) ++ "yf_decodern,yf_p0") ++ ++(define_insn_reservation "yongfeng_mmx_move_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxmov"))) ++ "yf_decodern,yf_agu,yf_p0") ++ ++(define_insn_reservation "yongfeng_mmx_move_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "mmxmov"))) ++ "yf_decodern,yf_agu,yf_p0") ++ ++(define_insn_reservation "yongfeng_mmx_mul" 2 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxmul"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_mmx_mul_load" 6 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxmul"))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++;; MMX general instructions ++(define_insn_reservation "yongfeng_mmx_insns" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt"))) ++ "yf_decodern,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_mmx_insns_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt"))) ++ "yf_decodern,yf_agu,yf_feu|yf_feu") ++ ++(define_insn_reservation "yongfeng_mmx_insns_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt"))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++;; x87 floating point operations. ++ ++(define_insn_reservation "yongfeng_fxch" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "fxch")) ++ "yf_decodern,yf_p0|yf_p1") ++ ++(define_insn_reservation "yongfeng_fcmov_sgn" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "fcmov,fsgn")) ++ "yf_decodern,yf_p0|yf_p1,yf_feu") ++ ++(define_insn_reservation "yongfeng_fcmp" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fcmp"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_fcmp_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fcmp"))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_fmov" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fmov"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_fmov_store" 1 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "fmov"))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_fmov_load" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fmov"))) ++ "yf_decoder0,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_fistp" 5 ++ (and (eq_attr "cpu" "yongfeng") ++ (eq_attr "type" "fistp,fisttp")) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_fop_mul" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none,unknown") ++ (eq_attr "type" "fop,fmul"))) ++ "yf_decodern,yf_feu") ++ ++(define_insn_reservation "yongfeng_fop_mul_load" 7 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load,both") ++ (eq_attr "type" "fop,fmul"))) ++ "yf_decoder012,yf_agu,yf_feu") ++ ++(define_insn_reservation "yf_fop_store" 3 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "store") ++ (eq_attr "type" "fop"))) ++ "yf_decodern,yf_agu,yf_feu") ++ ++(define_insn_reservation "yongfeng_fdiv_fpspc" 14 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "none") ++ (eq_attr "type" "fdiv,fpspc"))) ++ "yf_decodern,yf_fdiv*7") ++ ++(define_insn_reservation "yongfeng_fdiv_fpspc_load" 18 ++ (and (eq_attr "cpu" "yongfeng") ++ (and (eq_attr "memory" "load") ++ (eq_attr "type" "fdiv,fpspc"))) ++ "yf_decoder012,yf_agu,yf_fdiv*7") +diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi +index 3cfecee53..929d69581 100644 +--- a/gcc/doc/extend.texi ++++ b/gcc/doc/extend.texi +@@ -21880,6 +21880,15 @@ Intel Knights Landing CPU. + @item knm + Intel Knights Mill CPU. + ++@item lujiazui ++ZHAOXIN lujiazui CPU. ++ ++@item yongfeng ++ZHAOXIN yongfeng CPU. ++ ++@item shijidadao ++ZHAOXIN shijidadao CPU. ++ + @item amdfam10h + AMD Family 10h CPU. + +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index d91eb44e3..3f20cd3b4 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -31803,6 +31803,23 @@ VIA Nano Quad Core CPU with x86-64, MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 + instruction set support. + (No scheduling is implemented for this chip.) + ++@item lujiazui ++ZHAOXIN lujiazui CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, ++SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16, ++ABM, BMI, BMI2, F16C, FXSR, RDSEED instruction set support. ++ ++@item yongfeng ++ZHAOXIN yongfeng CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, ++SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16, ++ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT ++instruction set support. ++ ++@item shijidadao ++ZHAOXIN shijidadao CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, ++SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16, ++ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT ++instruction set support. ++ + @item geode + AMD Geode embedded processor with MMX and 3DNow!@: instruction set support. + @end table +diff --git a/gcc/testsuite/g++.target/i386/mv32.C b/gcc/testsuite/g++.target/i386/mv32.C +new file mode 100644 +index 000000000..b311c35ba +--- /dev/null ++++ b/gcc/testsuite/g++.target/i386/mv32.C +@@ -0,0 +1,42 @@ ++// Test that dispatching can choose the right multiversion ++// for ZHAOXIN CPU with the same internal GCC processor id ++ ++// { dg-do run } ++// { dg-require-ifunc "" } ++// { dg-options "-O2" } ++ ++#include ++ ++int __attribute__ ((target("default"))) ++foo () ++{ ++ return 0; ++} ++ ++int __attribute__ ((target("arch=lujiazui"))) foo () { ++ return 1; ++} ++ ++int __attribute__ ((target("arch=yongfeng"))) foo () { ++ return 2; ++} ++ ++int __attribute__ ((target("arch=shijidadao"))) foo () { ++ return 3; ++} ++ ++int main () ++{ ++ int val = foo (); ++ ++ if (__builtin_cpu_is ("lujiazui")) ++ assert (val == 1); ++ else if (__builtin_cpu_is ("yongfeng")) ++ assert (val == 2); ++ else if (__builtin_cpu_is ("shijidadao")) ++ assert (val == 3); ++ else ++ assert (val == 0); ++ ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +index f0f3397a7..db406f427 100644 +--- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc ++++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc +@@ -192,6 +192,9 @@ extern void test_arch_alderlake (void) __attribute__((__target__("arch= + extern void test_arch_rocketlake (void) __attribute__((__target__("arch=rocketlake"))); + extern void test_arch_graniterapids (void) __attribute__((__target__("arch=graniterapids"))); + extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=graniterapids-d"))); ++extern void test_arch_lujiazui (void) __attribute__((__target__("arch=lujiazui"))); ++extern void test_arch_yongfeng (void) __attribute__((__target__("arch=yongfeng"))); ++extern void test_arch_shijidadao (void) __attribute__((__target__("arch=shijidadao"))); + extern void test_arch_k8 (void) __attribute__((__target__("arch=k8"))); + extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3"))); + extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron"))); +@@ -214,6 +217,9 @@ extern void test_tune_core2 (void) __attribute__((__target__("tune=core2"))); + extern void test_tune_corei7 (void) __attribute__((__target__("tune=corei7"))); + extern void test_tune_corei7_avx (void) __attribute__((__target__("tune=corei7-avx"))); + extern void test_tune_core_avx2 (void) __attribute__((__target__("tune=core-avx2"))); ++extern void test_tune_lujiazui (void) __attribute__((__target__("tune=lujiazui"))); ++extern void test_tune_yongfeng (void) __attribute__((__target__("tune=yongfeng"))); ++extern void test_tune_shijidadao (void) __attribute__((__target__("tune=shijidadao"))); + extern void test_tune_k8 (void) __attribute__((__target__("tune=k8"))); + extern void test_tune_k8_sse3 (void) __attribute__((__target__("tune=k8-sse3"))); + extern void test_tune_opteron (void) __attribute__((__target__("tune=opteron"))); +-- +2.33.0 + diff --git a/gcc.spec b/gcc.spec index f424b3d7aa3e719726343162a9c04ac80c34cb26..ad72a890e0894d02ca238c49109b85e8c2ed5d08 100644 --- a/gcc.spec +++ b/gcc.spec @@ -2,7 +2,7 @@ %global gcc_major 12 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 98 +%global gcc_release 99 %global _unpackaged_files_terminate_build 0 %global _performance_build 1 @@ -507,6 +507,7 @@ Patch394: 0394-SVE-Add-SVE-constraint.patch Patch395: 0395-update-ai-model.patch Patch396: 0396-Add-pattern-for-cmlt-and-change-hip12.patch Patch397: 0397-SVE-Fix-cross-compile-error.patch +Patch398: 0398-Backport-zhaoxin-lujiazui-yongfeng-shijidadao-enable.patch # Part 1001-1999 %ifarch sw_64 @@ -1690,6 +1691,7 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch -P395 -p1 %patch -P396 -p1 %patch -P397 -p1 +%patch -P398 -p1 %ifarch sw_64 %patch -P1001 -p1 @@ -4317,6 +4319,10 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Fri Sep 12 2025 timhu_806d - 12.3.1-99 +- Type:Sync +- DESC: Sync patches from openeuler/gcc. + * Mon Sep 1 2025 zhangjingwang - 12.3.1-98 - Type: Sync - DESC: Sync patches from openeuler/gcc.