diff --git a/GCC14-1001-libstdc++-compat.patch b/GCC14-1001-libstdc++-compat.patch index 1bbdae5bb4348d6c3366e3992f2f769d81f174b8..b188d09bfb72e05f97e189c2e60936ca85cb77fe 100644 --- a/GCC14-1001-libstdc++-compat.patch +++ b/GCC14-1001-libstdc++-compat.patch @@ -3,6 +3,197 @@ From: zhaoshujian Date: Tue, 27 Aug 2024 15:56:36 +0800 Subject: [PATCH] add libstdc++_nonshared patch +Tested-by: jchzhou +--- + libstdc++-v3/acinclude.m4 | 2 +- + libstdc++-v3/config/locale/gnu/c_locale.cc | 4 + + libstdc++-v3/configure | 81 +- + libstdc++-v3/configure.ac | 5 + + libstdc++-v3/libsupc++/eh_exception.cc | 6 +- + libstdc++-v3/scripts/check_compile | 0 + libstdc++-v3/scripts/check_performance | 0 + libstdc++-v3/scripts/check_simd | 0 + libstdc++-v3/scripts/create_testsuite_files | 0 + libstdc++-v3/scripts/extract_symvers.in | 0 + .../scripts/gen_text_encoding_data.py | 0 + libstdc++-v3/scripts/make_graph.py | 0 + libstdc++-v3/scripts/testsuite_flags.in | 0 + libstdc++-v3/src/Makefile.am | 33 +- + libstdc++-v3/src/Makefile.in | 110 ++- + libstdc++-v3/src/c++11/codecvt.cc | 6 + + libstdc++-v3/src/c++11/condition_variable.cc | 4 + + libstdc++-v3/src/c++11/cxx11-ios_failure.cc | 2 + + libstdc++-v3/src/c++11/futex.cc | 6 + + libstdc++-v3/src/c++11/ios.cc | 2 + + libstdc++-v3/src/c++11/shared_ptr.cc | 2 + + libstdc++-v3/src/c++11/system_error.cc | 4 + + libstdc++-v3/src/c++17/floating_from_chars.cc | 4 + + libstdc++-v3/src/c++17/floating_to_chars.cc | 4 + + libstdc++-v3/src/c++98/ios_failure.cc | 2 + + libstdc++-v3/src/c++98/locale_facets.cc | 6 + + libstdc++-v3/src/nonshared11/Makefile.am | 139 +++ + libstdc++-v3/src/nonshared11/Makefile.in | 816 +++++++++++++++++ + libstdc++-v3/src/nonshared11/basic_file.cc | 82 ++ + libstdc++-v3/src/nonshared11/codecvt80.cc | 38 + + .../src/nonshared11/condition_variable80.cc | 44 + + .../src/nonshared11/cow-sstream-inst80.cc | 53 ++ + .../src/nonshared11/cow-stdexcept80.cc | 76 ++ + .../src/nonshared11/cow-string-inst80.cc | 52 ++ + .../src/nonshared11/cow-wstring-inst80.cc | 35 + + .../src/nonshared11/cxx11-ios_failure80.cc | 50 ++ + .../src/nonshared11/cxx11-locale-inst110.cc | 33 + + .../src/nonshared11/cxx11-locale-inst80.cc | 69 ++ + .../src/nonshared11/cxx11-wlocale-inst110.cc | 30 + + .../src/nonshared11/cxx11-wlocale-inst80.cc | 30 + + libstdc++-v3/src/nonshared11/debug.cc | 131 +++ + libstdc++-v3/src/nonshared11/debug110.cc | 30 + + libstdc++-v3/src/nonshared11/eh_ptr80.cc | 64 ++ + libstdc++-v3/src/nonshared11/functexcept80.cc | 48 + + libstdc++-v3/src/nonshared11/futex80.cc | 24 + + .../src/nonshared11/istream-inst80.cc | 121 +++ + libstdc++-v3/src/nonshared11/limits.cc | 57 ++ + .../src/nonshared11/locale-inst110.cc | 24 + + libstdc++-v3/src/nonshared11/locale-inst80.cc | 99 +++ + libstdc++-v3/src/nonshared11/shared_ptr80.cc | 25 + + .../src/nonshared11/sstream-inst80.cc | 55 ++ + .../src/nonshared11/streambuf-inst.cc | 42 + + .../src/nonshared11/string-inst110.cc | 26 + + libstdc++-v3/src/nonshared11/string-inst80.cc | 68 ++ + .../src/nonshared11/wlocale-inst110.cc | 32 + + .../src/nonshared11/wlocale-inst80.cc | 31 + + .../src/nonshared11/wstring-inst110.cc | 26 + + .../src/nonshared11/wstring-inst80.cc | 29 + + libstdc++-v3/src/nonshared17/Makefile.am | 146 ++++ + libstdc++-v3/src/nonshared17/Makefile.in | 817 ++++++++++++++++++ + libstdc++-v3/src/nonshared17/cow-fs_dir.cc | 103 +++ + libstdc++-v3/src/nonshared17/cow-fs_dir110.cc | 26 + + libstdc++-v3/src/nonshared17/cow-fs_ops.cc | 81 ++ + libstdc++-v3/src/nonshared17/cow-fs_path.cc | 131 +++ + .../src/nonshared17/cow-string-inst.cc | 23 + + .../src/nonshared17/cow-string-inst110.cc | 37 + + libstdc++-v3/src/nonshared17/eh_call.cc | 23 + + libstdc++-v3/src/nonshared17/eh_terminate.cc | 46 + + .../src/nonshared17/floating_from_chars.cc | 49 ++ + .../src/nonshared17/floating_from_chars110.cc | 36 + + .../src/nonshared17/floating_to_chars.cc | 100 +++ + .../src/nonshared17/floating_to_chars110.cc | 30 + + libstdc++-v3/src/nonshared17/fs_dir.cc | 105 +++ + libstdc++-v3/src/nonshared17/fs_dir110.cc | 41 + + libstdc++-v3/src/nonshared17/fs_ops80.cc | 103 +++ + libstdc++-v3/src/nonshared17/fs_path80.cc | 155 ++++ + .../src/nonshared17/memory_resource.cc | 75 ++ + libstdc++-v3/src/nonshared17/ostream-inst.cc | 23 + + .../src/nonshared17/ostream-inst110.cc | 25 + + libstdc++-v3/src/nonshared17/string-inst.cc | 23 + + .../src/nonshared17/string-inst110.cc | 37 + + libstdc++-v3/src/nonshared20/Makefile.am | 124 +++ + libstdc++-v3/src/nonshared20/Makefile.in | 777 +++++++++++++++++ + .../src/nonshared20/sstream-inst80.cc | 34 + + libstdc++-v3/src/nonshared20/tzdb110.cc | 27 + + libstdc++-v3/src/nonshared20/tzdb80.cc | 127 +++ + libstdc++-v3/src/nonshared23/Makefile.am | 112 +++ + libstdc++-v3/src/nonshared23/Makefile.in | 767 ++++++++++++++++ + libstdc++-v3/src/nonshared23/print.cc | 23 + + libstdc++-v3/src/nonshared23/stacktrace.cc | 23 + + libstdc++-v3/src/nonshared26/Makefile.am | 106 +++ + libstdc++-v3/src/nonshared26/Makefile.in | 761 ++++++++++++++++ + libstdc++-v3/src/nonshared26/text_encoding.cc | 23 + + libstdc++-v3/src/nonshared98/Makefile.am | 102 +++ + libstdc++-v3/src/nonshared98/Makefile.in | 802 +++++++++++++++++ + libstdc++-v3/src/nonshared98/char8_t-rtti.S | 166 ++++ + libstdc++-v3/src/nonshared98/extfloat.S | 547 ++++++++++++ + libstdc++-v3/src/nonshared98/ios_failure.cc | 27 + + libstdc++-v3/src/nonshared98/ios_init.cc | 38 + + libstdc++-v3/src/nonshared98/locale_facets.cc | 24 + + libstdc++-v3/src/shared/hashtable-aux.cc | 5 + + .../testsuite/experimental/simd/driver.sh | 0 + .../experimental/simd/generate_makefile.sh | 0 + 103 files changed, 9588 insertions(+), 24 deletions(-) + mode change 100755 => 100644 libstdc++-v3/configure + mode change 100755 => 100644 libstdc++-v3/scripts/check_compile + mode change 100755 => 100644 libstdc++-v3/scripts/check_performance + mode change 100755 => 100644 libstdc++-v3/scripts/check_simd + mode change 100755 => 100644 libstdc++-v3/scripts/create_testsuite_files + mode change 100755 => 100644 libstdc++-v3/scripts/extract_symvers.in + mode change 100755 => 100644 libstdc++-v3/scripts/gen_text_encoding_data.py + mode change 100755 => 100644 libstdc++-v3/scripts/make_graph.py + mode change 100755 => 100644 libstdc++-v3/scripts/testsuite_flags.in + create mode 100644 libstdc++-v3/src/nonshared11/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared11/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared11/basic_file.cc + create mode 100644 libstdc++-v3/src/nonshared11/codecvt80.cc + create mode 100644 libstdc++-v3/src/nonshared11/condition_variable80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cow-sstream-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cow-stdexcept80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cow-string-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cow-wstring-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cxx11-ios_failure80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cxx11-locale-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/cxx11-locale-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/cxx11-wlocale-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/cxx11-wlocale-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/debug.cc + create mode 100644 libstdc++-v3/src/nonshared11/debug110.cc + create mode 100644 libstdc++-v3/src/nonshared11/eh_ptr80.cc + create mode 100644 libstdc++-v3/src/nonshared11/functexcept80.cc + create mode 100644 libstdc++-v3/src/nonshared11/futex80.cc + create mode 100644 libstdc++-v3/src/nonshared11/istream-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/limits.cc + create mode 100644 libstdc++-v3/src/nonshared11/locale-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/locale-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/shared_ptr80.cc + create mode 100644 libstdc++-v3/src/nonshared11/sstream-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/streambuf-inst.cc + create mode 100644 libstdc++-v3/src/nonshared11/string-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/string-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/wlocale-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/wlocale-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared11/wstring-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared11/wstring-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared17/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared17/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared17/cow-fs_dir.cc + create mode 100644 libstdc++-v3/src/nonshared17/cow-fs_dir110.cc + create mode 100644 libstdc++-v3/src/nonshared17/cow-fs_ops.cc + create mode 100644 libstdc++-v3/src/nonshared17/cow-fs_path.cc + create mode 100644 libstdc++-v3/src/nonshared17/cow-string-inst.cc + create mode 100644 libstdc++-v3/src/nonshared17/cow-string-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared17/eh_call.cc + create mode 100644 libstdc++-v3/src/nonshared17/eh_terminate.cc + create mode 100644 libstdc++-v3/src/nonshared17/floating_from_chars.cc + create mode 100644 libstdc++-v3/src/nonshared17/floating_from_chars110.cc + create mode 100644 libstdc++-v3/src/nonshared17/floating_to_chars.cc + create mode 100644 libstdc++-v3/src/nonshared17/floating_to_chars110.cc + create mode 100644 libstdc++-v3/src/nonshared17/fs_dir.cc + create mode 100644 libstdc++-v3/src/nonshared17/fs_dir110.cc + create mode 100644 libstdc++-v3/src/nonshared17/fs_ops80.cc + create mode 100644 libstdc++-v3/src/nonshared17/fs_path80.cc + create mode 100644 libstdc++-v3/src/nonshared17/memory_resource.cc + create mode 100644 libstdc++-v3/src/nonshared17/ostream-inst.cc + create mode 100644 libstdc++-v3/src/nonshared17/ostream-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared17/string-inst.cc + create mode 100644 libstdc++-v3/src/nonshared17/string-inst110.cc + create mode 100644 libstdc++-v3/src/nonshared20/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared20/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared20/sstream-inst80.cc + create mode 100644 libstdc++-v3/src/nonshared20/tzdb110.cc + create mode 100644 libstdc++-v3/src/nonshared20/tzdb80.cc + create mode 100644 libstdc++-v3/src/nonshared23/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared23/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared23/print.cc + create mode 100644 libstdc++-v3/src/nonshared23/stacktrace.cc + create mode 100644 libstdc++-v3/src/nonshared26/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared26/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared26/text_encoding.cc + create mode 100644 libstdc++-v3/src/nonshared98/Makefile.am + create mode 100644 libstdc++-v3/src/nonshared98/Makefile.in + create mode 100644 libstdc++-v3/src/nonshared98/char8_t-rtti.S + create mode 100644 libstdc++-v3/src/nonshared98/extfloat.S + create mode 100644 libstdc++-v3/src/nonshared98/ios_failure.cc + create mode 100644 libstdc++-v3/src/nonshared98/ios_init.cc + create mode 100644 libstdc++-v3/src/nonshared98/locale_facets.cc + mode change 100755 => 100644 libstdc++-v3/testsuite/experimental/simd/driver.sh + mode change 100755 => 100644 libstdc++-v3/testsuite/experimental/simd/generate_makefile.sh diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4 index 51a08bcc8..c75dda506 100644 @@ -53,7 +244,7 @@ index 3c1cb0c85..a86b7b082 100644 diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure old mode 100755 new mode 100644 -index 21abaeb07..0a09feeae +index 18053ab7e..db78e1ebb --- a/libstdc++-v3/configure +++ b/libstdc++-v3/configure @@ -633,6 +633,8 @@ ac_subst_vars='am__EXEEXT_FALSE @@ -74,7 +265,7 @@ index 21abaeb07..0a09feeae # These need to be absolute paths, yet at the same time need to # canonicalize only relative paths, because then amd will not unmount -@@ -55688,7 +55690,8 @@ if test "${with_gcc_major_version_only+set}" = set; then : +@@ -55690,7 +55692,8 @@ if test "${with_gcc_major_version_only+set}" = set; then : fi @@ -84,7 +275,7 @@ index 21abaeb07..0a09feeae ac_config_files="$ac_config_files Makefile" -@@ -55709,7 +55712,7 @@ ac_config_files="$ac_config_files src/libbacktrace/backtrace-supported.h" +@@ -55711,7 +55714,7 @@ ac_config_files="$ac_config_files src/libbacktrace/backtrace-supported.h" # append it here. Only modify Makefiles that have just been created. # # Also, get rid of this simulated-VPATH thing that automake does. @@ -93,7 +284,7 @@ index 21abaeb07..0a09feeae ac_config_commands="$ac_config_commands generate-headers" -@@ -56970,6 +56973,12 @@ do +@@ -56972,6 +56975,12 @@ do "src/c++20/Makefile") CONFIG_FILES="$CONFIG_FILES src/c++20/Makefile" ;; "src/c++23/Makefile") CONFIG_FILES="$CONFIG_FILES src/c++23/Makefile" ;; "src/c++26/Makefile") CONFIG_FILES="$CONFIG_FILES src/c++26/Makefile" ;; @@ -106,7 +297,7 @@ index 21abaeb07..0a09feeae "src/filesystem/Makefile") CONFIG_FILES="$CONFIG_FILES src/filesystem/Makefile" ;; "src/libbacktrace/Makefile") CONFIG_FILES="$CONFIG_FILES src/libbacktrace/Makefile" ;; "src/experimental/Makefile") CONFIG_FILES="$CONFIG_FILES src/experimental/Makefile" ;; -@@ -58483,6 +58492,72 @@ _EOF +@@ -58485,6 +58494,72 @@ _EOF ;; "src/c++26/Makefile":F) cat > vpsed$$ << \_EOF s!`test -f '$<' || echo '$(srcdir)/'`!! @@ -180,10 +371,10 @@ index 21abaeb07..0a09feeae sed -f vpsed$$ $ac_file > tmp$$ mv tmp$$ $ac_file diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac -index 37396bd6e..907b8534f 100644 +index a6525a9d3..32a2210df 100644 --- a/libstdc++-v3/configure.ac +++ b/libstdc++-v3/configure.ac -@@ -685,6 +685,11 @@ GLIBCXX_EXPORT_FLAGS +@@ -688,6 +688,11 @@ GLIBCXX_EXPORT_FLAGS # Determine what GCC version number to use in filesystem paths. GCC_BASE_VER @@ -591,10 +782,10 @@ index cd39a96b8..e7b4d39df 100644 extern void __at_thread_exit(__at_thread_exit_elt*); diff --git a/libstdc++-v3/src/c++11/cxx11-ios_failure.cc b/libstdc++-v3/src/c++11/cxx11-ios_failure.cc -index bd3fd556b..a409fd116 100644 +index 70dddc823..ecb0f001d 100644 --- a/libstdc++-v3/src/c++11/cxx11-ios_failure.cc +++ b/libstdc++-v3/src/c++11/cxx11-ios_failure.cc -@@ -122,9 +122,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION +@@ -124,9 +124,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION using __ios_failure = ios::failure; #endif @@ -607,7 +798,7 @@ index bd3fd556b..a409fd116 100644 void __throw_ios_failure(const char* str __attribute__((unused)), diff --git a/libstdc++-v3/src/c++11/futex.cc b/libstdc++-v3/src/c++11/futex.cc -index ada2fdf3c..d70625254 100644 +index 6139fc354..1249f8f7d 100644 --- a/libstdc++-v3/src/c++11/futex.cc +++ b/libstdc++-v3/src/c++11/futex.cc @@ -55,7 +55,9 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION @@ -4497,7 +4688,7 @@ index 000000000..3c5ce4aaa +.NOEXPORT: diff --git a/libstdc++-v3/src/nonshared17/cow-fs_dir.cc b/libstdc++-v3/src/nonshared17/cow-fs_dir.cc new file mode 100644 -index 000000000..1af9c4b97 +index 000000000..59538ebfa --- /dev/null +++ b/libstdc++-v3/src/nonshared17/cow-fs_dir.cc @@ -0,0 +1,103 @@ @@ -5254,7 +5445,7 @@ index 000000000..ff2c57c5c +#endif diff --git a/libstdc++-v3/src/nonshared17/fs_dir.cc b/libstdc++-v3/src/nonshared17/fs_dir.cc new file mode 100644 -index 000000000..616b849c8 +index 000000000..e73ae6bee --- /dev/null +++ b/libstdc++-v3/src/nonshared17/fs_dir.cc @@ -0,0 +1,105 @@ @@ -5521,10 +5712,10 @@ index 000000000..eed139f91 +#endif diff --git a/libstdc++-v3/src/nonshared17/fs_path80.cc b/libstdc++-v3/src/nonshared17/fs_path80.cc new file mode 100644 -index 000000000..6ea0bac35 +index 000000000..894af5d5c --- /dev/null +++ b/libstdc++-v3/src/nonshared17/fs_path80.cc -@@ -0,0 +1,157 @@ +@@ -0,0 +1,155 @@ +// Copyright (C) 2019-2024 Free Software Foundation, Inc. +// +// This file is part of the GNU ISO C++ Library. This library is free @@ -5590,8 +5781,6 @@ index 000000000..6ea0bac35 +asm (".hidden _ZTVSt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx1116filesystem_error5_ImplESaIS3_ELN9__gnu_cxx12_Lock_policyE2EE"); +asm (".hidden _ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag"); +asm (".hidden _ZNSt10filesystem7__cxx1116filesystem_error5_Impl9make_whatESt17basic_string_viewIcSt11char_traitsIcEEPKNS0_4pathES9_"); -+asm (".hidden _ZNSt10filesystem7__cxx114path5_CmptD1Ev"); -+asm (".hidden _ZNSt10filesystem7__cxx114path5_CmptD2Ev"); +asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED1Ev"); +asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED2Ev"); +asm (".hidden _ZNSt15__allocated_ptrISaISt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx1116filesystem_error5_ImplESaIS4_ELN9__gnu_cxx12_Lock_policyE2EEEED1Ev"); @@ -10646,5 +10835,5 @@ diff --git a/libstdc++-v3/testsuite/experimental/simd/generate_makefile.sh b/lib old mode 100755 new mode 100644 -- -2.43.0 +2.50.0 diff --git a/GCC14-1002-change-gcc-version.patch b/GCC14-1002-change-gcc-version.patch index 7e8c991704c3ca87794ca7c8ecce202f11188967..3d90dee02f2e5eb69a5def5ca2fe912476200b36 100644 --- a/GCC14-1002-change-gcc-version.patch +++ b/GCC14-1002-change-gcc-version.patch @@ -3,8 +3,8 @@ index 07ea9fa43..dab6a80f4 100644 --- a/gcc/BASE-VER +++ b/gcc/BASE-VER @@ -1 +1 @@ --14.2.0 -+14.2.1 +-14.3.0 ++14.3.1 -- 2.43.0 diff --git a/GCC14-1003-i386-Add-non-optimize-prefetchi-intrins.patch b/GCC14-1003-i386-Add-non-optimize-prefetchi-intrins.patch deleted file mode 100644 index 195c31175b3dd8572ca3914c8ce66b460486d85f..0000000000000000000000000000000000000000 --- a/GCC14-1003-i386-Add-non-optimize-prefetchi-intrins.patch +++ /dev/null @@ -1,88 +0,0 @@ -From b4524c4430ba9771265bd9fc31e69a3f35dfe117 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Thu, 25 Jul 2024 16:16:05 +0800 -Subject: [PATCH] i386: Add non-optimize prefetchi intrins - -Under -O0, with the "newly" introduced intrins, the variable will be -transformed as mem instead of the origin symbol_ref. The compiler will -then treat the operand as invalid and turn the operation into nop, which -is not expected. Use macro for non-optimize to keep the variable as -symbol_ref just as how prefetch intrin does. - -gcc/ChangeLog: - - * config/i386/prfchiintrin.h - (_m_prefetchit0): Add macro for non-optimized option. - (_m_prefetchit1): Ditto. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/prefetchi-1b.c: New test. ---- - gcc/config/i386/prfchiintrin.h | 9 +++++++ - gcc/testsuite/gcc.target/i386/prefetchi-1b.c | 26 ++++++++++++++++++++ - 2 files changed, 35 insertions(+) - create mode 100644 gcc/testsuite/gcc.target/i386/prefetchi-1b.c - -diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h -index dfca89c7d16..d6580e504c0 100644 ---- a/gcc/config/i386/prfchiintrin.h -+++ b/gcc/config/i386/prfchiintrin.h -@@ -37,6 +37,7 @@ - #define __DISABLE_PREFETCHI__ - #endif /* __PREFETCHI__ */ - -+#ifdef __OPTIMIZE__ - extern __inline void - __attribute__((__gnu_inline__, __always_inline__, __artificial__)) - _m_prefetchit0 (void* __P) -@@ -50,6 +51,14 @@ _m_prefetchit1 (void* __P) - { - __builtin_ia32_prefetchi (__P, 2); - } -+#else -+#define _m_prefetchit0(P) \ -+ __builtin_ia32_prefetchi(P, 3) -+ -+#define _m_prefetchit1(P) \ -+ __builtin_ia32_prefetchi(P, 2) -+ -+#endif - - #ifdef __DISABLE_PREFETCHI__ - #undef __DISABLE_PREFETCHI__ -diff --git a/gcc/testsuite/gcc.target/i386/prefetchi-1b.c b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c -new file mode 100644 -index 00000000000..93139554d3c ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/prefetchi-1b.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-mprefetchi -O0" } */ -+/* { dg-final { scan-assembler-times "\[ \\t\]+prefetchit0\[ \\t\]+bar\\(%rip\\)" 1 } } */ -+/* { dg-final { scan-assembler-times "\[ \\t\]+prefetchit1\[ \\t\]+bar\\(%rip\\)" 1 } } */ -+ -+#include -+ -+int -+bar (int a) -+{ -+ return a + 1; -+} -+ -+int -+foo1 (int b) -+{ -+ _m_prefetchit0 (bar); -+ return bar (b) + 1; -+} -+ -+int -+foo2 (int b) -+{ -+ _m_prefetchit1 (bar); -+ return bar (b) + 1; -+} --- -2.31.1 - diff --git a/GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch b/GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch index 5be830ef9ac99e0f92049f8efdc24e29d84cfca3..74693044d31446ac88ab4c6d9fba319d37761f9f 100644 --- a/GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch +++ b/GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch @@ -3,6 +3,7 @@ From: Your Name Date: Mon, 9 Dec 2024 15:16:01 +0800 Subject: [PATCH 2/2] libstdc++-compat: Update symbol list for RISC-V 64 +Tested-by: jchzhou --- libstdc++-v3/src/nonshared11/codecvt80.cc | 2 + .../src/nonshared11/cow-sstream-inst80.cc | 4 ++ @@ -17,13 +18,13 @@ Subject: [PATCH 2/2] libstdc++-compat: Update symbol list for RISC-V 64 .../src/nonshared17/floating_to_chars110.cc | 2 + libstdc++-v3/src/nonshared17/fs_dir.cc | 16 +++++++- libstdc++-v3/src/nonshared17/fs_ops80.cc | 10 +++++ - libstdc++-v3/src/nonshared17/fs_path80.cc | 26 ++++++++++++ + libstdc++-v3/src/nonshared17/fs_path80.cc | 24 +++++++++++ .../src/nonshared17/memory_resource.cc | 10 +++++ .../src/nonshared17/string-inst110.cc | 8 ++++ libstdc++-v3/src/nonshared20/tzdb110.cc | 2 + libstdc++-v3/src/nonshared20/tzdb80.cc | 40 +++++++++++++++++++ libstdc++-v3/src/nonshared98/extfloat.S | 5 ++- - 19 files changed, 190 insertions(+), 3 deletions(-) + 19 files changed, 188 insertions(+), 3 deletions(-) diff --git a/libstdc++-v3/src/nonshared11/codecvt80.cc b/libstdc++-v3/src/nonshared11/codecvt80.cc index c903548a8..fb42c0451 100644 @@ -487,7 +488,7 @@ index eed139f91..9035d3f77 100644 asm (".hidden _ZNSt11_Deque_baseINSt10filesystem7__cxx114pathESaIS2_EE17_M_initialize_mapEm"); asm (".hidden _ZNSt5dequeINSt10filesystem7__cxx114pathESaIS2_EE13_M_insert_auxINS2_8iteratorEEEvSt15_Deque_iteratorIS2_RS2_PS2_ET_SB_m"); diff --git a/libstdc++-v3/src/nonshared17/fs_path80.cc b/libstdc++-v3/src/nonshared17/fs_path80.cc -index 0100106a3..16576bbd3 100644 +index 894af5d5c..3469d9ff8 100644 --- a/libstdc++-v3/src/nonshared17/fs_path80.cc +++ b/libstdc++-v3/src/nonshared17/fs_path80.cc @@ -21,18 +21,23 @@ @@ -514,16 +515,12 @@ index 0100106a3..16576bbd3 100644 asm (".hidden _ZNSt10filesystem7__cxx114path5_ListC2ERKS2_"); asm (".hidden _ZNSt10filesystem7__cxx114path5_ListC2Ev"); asm (".hidden _ZNSt12__shared_ptrIKNSt10filesystem7__cxx1116filesystem_error5_ImplELN9__gnu_cxx12_Lock_policyE2EE4swapERS7_"); -@@ -62,13 +67,18 @@ asm (".hidden _ZTSSt19_Sp_make_shared_tag"); +@@ -62,11 +67,14 @@ asm (".hidden _ZTSSt19_Sp_make_shared_tag"); asm (".hidden _ZTSSt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx1116filesystem_error5_ImplESaIS3_ELN9__gnu_cxx12_Lock_policyE2EE"); asm (".hidden _ZTVSt23_Sp_counted_ptr_inplaceINSt10filesystem7__cxx1116filesystem_error5_ImplESaIS3_ELN9__gnu_cxx12_Lock_policyE2EE"); asm (".hidden _ZZNSt19_Sp_make_shared_tag5_S_tiEvE5__tag"); +#endif asm (".hidden _ZNSt10filesystem7__cxx1116filesystem_error5_Impl9make_whatESt17basic_string_viewIcSt11char_traitsIcEEPKNS0_4pathES9_"); - asm (".hidden _ZNSt10filesystem7__cxx114path5_CmptD1Ev"); -+#ifndef __riscv - asm (".hidden _ZNSt10filesystem7__cxx114path5_CmptD2Ev"); -+#endif asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED1Ev"); +#ifndef __riscv asm (".hidden _ZNSt10unique_ptrINSt10filesystem7__cxx114path5_List5_ImplENS3_13_Impl_deleterEED2Ev"); @@ -533,7 +530,7 @@ index 0100106a3..16576bbd3 100644 #if !defined (__aarch64__) && !defined (__x86_64__) #ifndef __i386__ //asm (".hidden _ZNSt10filesystem7__cxx114pathaSISt17basic_string_viewIcSt11char_traitsIcEEEERNSt9enable_ifIXsrSt6__and_IISt6__not_ISt7is_sameINSt9remove_cvIT_E4typeES1_EES9_ISt7is_voidINSt14remove_pointerISC_E4typeEEENS0_8__detail20__constructible_fromISC_vEEEE5valueES1_E4typeERKSC_"); -@@ -91,15 +101,21 @@ asm (".hidden _ZNKSt10filesystem7__cxx114path5_List5_Impl4copyEv"); +@@ -89,15 +97,21 @@ asm (".hidden _ZNKSt10filesystem7__cxx114path5_List5_Impl4copyEv"); asm (".hidden _ZNSt10filesystem7__cxx114path7_Parser4nextEv"); asm (".hidden _ZNSt10filesystem7__cxx114path10_S_convertIwEEDaPKT_S5_"); // bad ppc64le #endif @@ -555,7 +552,7 @@ index 0100106a3..16576bbd3 100644 asm (".hidden _ZNSt10filesystem7__cxx118__detail24__throw_conversion_errorEv"); //asm (".hidden _ZTIZNSt10filesystem7__cxx114path10_S_convertIwEEDaPKT_S5_E5_UCvt"); //asm (".hidden _ZTSZNSt10filesystem7__cxx114path10_S_convertIwEEDaPKT_S5_E5_UCvt"); -@@ -108,7 +124,9 @@ asm (".hidden _ZNSt10filesystem7__cxx118__detail24__throw_conversion_errorEv"); +@@ -106,7 +120,9 @@ asm (".hidden _ZNSt10filesystem7__cxx118__detail24__throw_conversion_errorEv"); //asm (".hidden _ZZNSt10filesystem7__cxx114path10_S_convertIwEEDaPKT_S5_EN5_UCvtD1Ev"); //asm (".hidden _ZZNSt10filesystem7__cxx114path10_S_convertIwEEDaPKT_S5_EN5_UCvtD2Ev"); asm (".hidden _ZNSt12system_errorC1ESt10error_codeRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE"); @@ -565,7 +562,7 @@ index 0100106a3..16576bbd3 100644 //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_assignERKS4_"); asm (".hidden _ZNSt10filesystem7__cxx114path5_List5beginEv"); #ifndef __i386__ -@@ -118,7 +136,9 @@ asm (".hidden _ZNSt10filesystem7__cxx114path5_List5beginEv"); +@@ -116,7 +132,9 @@ asm (".hidden _ZNSt10filesystem7__cxx114path5_List5beginEv"); asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE6resizeEmc"); //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE7reserveEm"); //asm (".hidden _ZNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEE9_M_mutateEmmPKcm"); @@ -575,7 +572,7 @@ index 0100106a3..16576bbd3 100644 //asm (".hidden _ZNSt12_Destroy_auxILb0EE9__destroyIPNSt10filesystem7__cxx114path5_CmptEEEvT_S7_"); asm (".hidden _ZNKSt10filesystem7__cxx114path5_List5_Impl4copyEv"); #endif -@@ -142,9 +162,12 @@ asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE6resizeEjw"); +@@ -140,9 +158,12 @@ asm (".hidden _ZNSt7__cxx1112basic_stringIwSt11char_traitsIwESaIwEE6resizeEjw"); #endif asm (".hidden _ZNSt10filesystem7__cxx114path8_CodecvtIwED0Ev"); asm (".hidden _ZNSt10filesystem7__cxx114path8_CodecvtIwED1Ev"); @@ -588,7 +585,7 @@ index 0100106a3..16576bbd3 100644 //asm (".hidden _ZNSt12codecvt_utf8IwLm1114111ELSt12codecvt_mode0EED2Ev"); asm (".hidden _ZTINSt10filesystem7__cxx114path8_CodecvtIwEE"); //asm (".hidden _ZTISt12codecvt_utf8IwLm1114111ELSt12codecvt_mode0EE"); -@@ -152,6 +175,9 @@ asm (".hidden _ZTSNSt10filesystem7__cxx114path8_CodecvtIwEE"); +@@ -150,6 +171,9 @@ asm (".hidden _ZTSNSt10filesystem7__cxx114path8_CodecvtIwEE"); //asm (".hidden _ZTSSt12codecvt_utf8IwLm1114111ELSt12codecvt_mode0EE"); asm (".hidden _ZTVNSt10filesystem7__cxx114path8_CodecvtIwEE"); //asm (".hidden _ZTVSt12codecvt_utf8IwLm1114111ELSt12codecvt_mode0EE"); @@ -822,5 +819,5 @@ index c6270e618..b6e4164b5 100644 #define SIZE2 16 #define OFF 16 -- -2.43.0 +2.50.0 diff --git a/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch b/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch deleted file mode 100644 index c68c3122359eecc3fae752f2d6f5428b46d900c4..0000000000000000000000000000000000000000 --- a/GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch +++ /dev/null @@ -1,107 +0,0 @@ -From d77237154f3f79ac83af459a0517a4472a35fb24 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Wed, 24 Jul 2024 11:29:23 +0800 -Subject: [PATCH 01/21] Refine constraint "Bk" to - define_special_memory_constraint. - -For below pattern, RA may still allocate r162 as v/k register, try to -reload for address with leaq __libc_tsd_CTYPE_B@gottpoff(%rip), %rsi -which result a linker error. - -(set (reg:DI 162) - (mem/u/c:DI - (const:DI (unspec:DI - [(symbol_ref:DI ("a") [flags 0x60] )] - UNSPEC_GOTNTPOFF)) - -Quote from H.J for why linker issue an error. ->What do these do: -> -> leaq __libc_tsd_CTYPE_B@gottpoff(%rip), %rax -> vmovq (%rax), %xmm0 -> ->From x86-64 TLS psABI: -> ->The assembler generates for the x@gottpoff(%rip) expressions a R X86 ->64 GOTTPOFF relocation for the symbol x which requests the linker to ->generate a GOT entry with a R X86 64 TPOFF64 relocation. The offset of ->the GOT entry relative to the end of the instruction is then used in ->the instruction. The R X86 64 TPOFF64 relocation is pro- cessed at ->program startup time by the dynamic linker by looking up the symbol x ->in the modules loaded at that point. The offset is written in the GOT ->entry and later loaded by the addq instruction. -> ->The above code sequence looks wrong to me. - -gcc/ChangeLog: - - PR target/116043 - * config/i386/constraints.md (Bk): Refine to - define_special_memory_constraint. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr116043.c: New test. - -(cherry picked from commit a295076bee293aa3112c615f9af7a27231816a36) ---- - gcc/config/i386/constraints.md | 2 +- - gcc/testsuite/gcc.target/i386/pr116043.c | 33 ++++++++++++++++++++++++ - 2 files changed, 34 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116043.c - -diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md -index 7508d7a58bd..b760e7c221a 100644 ---- a/gcc/config/i386/constraints.md -+++ b/gcc/config/i386/constraints.md -@@ -187,7 +187,7 @@ - "@internal Vector memory operand." - (match_operand 0 "vector_memory_operand")) - --(define_memory_constraint "Bk" -+(define_special_memory_constraint "Bk" - "@internal TLS address that allows insn using non-integer registers." - (and (match_operand 0 "memory_operand") - (not (match_test "ix86_gpr_tls_address_pattern_p (op)")))) -diff --git a/gcc/testsuite/gcc.target/i386/pr116043.c b/gcc/testsuite/gcc.target/i386/pr116043.c -new file mode 100644 -index 00000000000..76553496c10 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116043.c -@@ -0,0 +1,33 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mavx512bf16 -O3" } */ -+/* { dg-final { scan-assembler-not {(?n)lea.*@gottpoff} } } */ -+ -+extern __thread int a, c, i, j, k, l; -+int *b; -+struct d { -+ int e; -+} f, g; -+char *h; -+ -+void m(struct d *n) { -+ b = &k; -+ for (; n->e; b++, n--) { -+ i = b && a; -+ if (i) -+ j = c; -+ } -+} -+ -+char *o(struct d *n) { -+ for (; n->e;) -+ return h; -+} -+ -+int q() { -+ if (l) -+ return 1; -+ int p = *o(&g); -+ m(&f); -+ m(&g); -+ l = p; -+} --- -2.31.1 - diff --git a/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch b/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch deleted file mode 100644 index be5843eba13b8bd553a047f375f18349d39a5e0d..0000000000000000000000000000000000000000 --- a/GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch +++ /dev/null @@ -1,263 +0,0 @@ -From b1d999fd3609fb4649540952396131370769da65 Mon Sep 17 00:00:00 2001 -From: Lingling Kong -Date: Fri, 16 Aug 2024 15:52:27 +0800 -Subject: [PATCH 02/21] i386: Fix some vex insns that prohibit egpr - -Although these vex insn have evex counterpart, but when it -uses the displayed vex prefix should not support APX EGPR. -Like TARGET_AVXVNNI, TARGET_IFMA and TARGET_AVXNECONVERT. -TARGET_AVXVNNIINT8 and TARGET_AVXVNNITINT16 also are vex -insn should not support egpr. - -gcc/ChangeLog: - - * config/i386/sse.md (vpmadd52): - Prohibit egpr for vex version. - (vpdpbusd_): Ditto. - (vpdpbusds_): Ditto. - (vpdpwssd_): Ditto. - (vpdpwssds_): Ditto. - (*vcvtneps2bf16_v4sf): Ditto. - (*vcvtneps2bf16_v8sf): Ditto. - (vpdp_): Ditto. - (vbcstnebf162ps_): Ditto. - (vbcstnesh2ps_): Ditto. - (vcvtnee2ps_): Ditto. - (vcvtneo2ps_): Ditto. - (vpdp_): Ditto. - -(cherry picked from commit 45a771d22e3090c42a4934a49da9924165e080af) ---- - gcc/config/i386/sse.md | 49 +++++++++++++++++++++++++++--------------- - 1 file changed, 32 insertions(+), 17 deletions(-) - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 1bf50726e83..218aa412c33 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -29364,7 +29364,7 @@ - (unspec:VI8_AVX2 - [(match_operand:VI8_AVX2 1 "register_operand" "0,0") - (match_operand:VI8_AVX2 2 "register_operand" "x,v") -- (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm,vm")] -+ (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xjm,vm")] - VPMADD52))] - "TARGET_AVXIFMA || (TARGET_AVX512IFMA && TARGET_AVX512VL)" - "@ -@@ -29372,6 +29372,7 @@ - vpmadd52\t{%3, %2, %0|%0, %2, %3}" - [(set_attr "isa" "avxifma,avx512ifmavl") - (set_attr "type" "ssemuladd") -+ (set_attr "addr" "gpr16,*") - (set_attr "prefix" "vex,evex") - (set_attr "mode" "")]) - -@@ -29989,13 +29990,14 @@ - (unspec:VI4_AVX2 - [(match_operand:VI4_AVX2 1 "register_operand" "0,0") - (match_operand:VI4_AVX2 2 "register_operand" "x,v") -- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] -+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] - UNSPEC_VPDPBUSD))] - "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" - "@ - %{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3} - vpdpbusd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr ("prefix") ("vex,evex")) -+ (set_attr "addr" "gpr16,*") - (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) - - (define_insn "vpdpbusd__mask" -@@ -30057,13 +30059,14 @@ - (unspec:VI4_AVX2 - [(match_operand:VI4_AVX2 1 "register_operand" "0,0") - (match_operand:VI4_AVX2 2 "register_operand" "x,v") -- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] -+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] - UNSPEC_VPDPBUSDS))] - "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" - "@ - %{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3} - vpdpbusds\t{%3, %2, %0|%0, %2, %3}" - [(set_attr ("prefix") ("vex,evex")) -+ (set_attr "addr" "gpr16,*") - (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) - - (define_insn "vpdpbusds__mask" -@@ -30125,13 +30128,14 @@ - (unspec:VI4_AVX2 - [(match_operand:VI4_AVX2 1 "register_operand" "0,0") - (match_operand:VI4_AVX2 2 "register_operand" "x,v") -- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] -+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] - UNSPEC_VPDPWSSD))] - "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" - "@ - %{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3} - vpdpwssd\t{%3, %2, %0|%0, %2, %3}" - [(set_attr ("prefix") ("vex,evex")) -+ (set_attr "addr" "gpr16,*") - (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) - - (define_insn "vpdpwssd__mask" -@@ -30193,13 +30197,14 @@ - (unspec:VI4_AVX2 - [(match_operand:VI4_AVX2 1 "register_operand" "0,0") - (match_operand:VI4_AVX2 2 "register_operand" "x,v") -- (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")] -+ (match_operand:VI4_AVX2 3 "nonimmediate_operand" "xjm,vm")] - UNSPEC_VPDPWSSDS))] - "TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)" - "@ - %{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3} - vpdpwssds\t{%3, %2, %0|%0, %2, %3}" - [(set_attr ("prefix") ("vex,evex")) -+ (set_attr "addr" "gpr16,*") - (set_attr ("isa") ("avxvnni,avx512vnnivl"))]) - - (define_insn "vpdpwssds__mask" -@@ -30417,13 +30422,14 @@ - [(set (match_operand:V8BF 0 "register_operand" "=x,v") - (vec_concat:V8BF - (float_truncate:V4BF -- (match_operand:V4SF 1 "nonimmediate_operand" "xm,vm")) -+ (match_operand:V4SF 1 "nonimmediate_operand" "xjm,vm")) - (match_operand:V4BF 2 "const0_operand")))] - "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)" - "@ - %{vex%} vcvtneps2bf16{x}\t{%1, %0|%0, %1} - vcvtneps2bf16{x}\t{%1, %0|%0, %1}" - [(set_attr "isa" "avxneconvert,avx512bf16vl") -+ (set_attr "addr" "gpr16,*") - (set_attr "prefix" "vex,evex")]) - - (define_expand "avx512f_cvtneps2bf16_v4sf_maskz" -@@ -30481,12 +30487,13 @@ - (define_insn "vcvtneps2bf16_v8sf" - [(set (match_operand:V8BF 0 "register_operand" "=x,v") - (float_truncate:V8BF -- (match_operand:V8SF 1 "nonimmediate_operand" "xm,vm")))] -+ (match_operand:V8SF 1 "nonimmediate_operand" "xjm,vm")))] - "TARGET_AVXNECONVERT || (TARGET_AVX512BF16 && TARGET_AVX512VL)" - "@ - %{vex%} vcvtneps2bf16{y}\t{%1, %0|%0, %1} - vcvtneps2bf16{y}\t{%1, %0|%0, %1}" - [(set_attr "isa" "avxneconvert,avx512bf16vl") -+ (set_attr "addr" "gpr16,*") - (set_attr "prefix" "vex,evex")]) - - -@@ -30942,30 +30949,33 @@ - (unspec:VI4_AVX - [(match_operand:VI4_AVX 1 "register_operand" "0") - (match_operand:VI4_AVX 2 "register_operand" "x") -- (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")] -+ (match_operand:VI4_AVX 3 "nonimmediate_operand" "xjm")] - VPDOTPROD))] - "TARGET_AVXVNNIINT8" - "vpdp\t{%3, %2, %0|%0, %2, %3}" -- [(set_attr "prefix" "vex")]) -+ [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16")]) - - (define_insn "vbcstnebf162ps_" - [(set (match_operand:VF1_128_256 0 "register_operand" "=x") - (vec_duplicate:VF1_128_256 - (float_extend:SF -- (match_operand:BF 1 "memory_operand" "m"))))] -+ (match_operand:BF 1 "memory_operand" "jm"))))] - "TARGET_AVXNECONVERT" - "vbcstnebf162ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_insn "vbcstnesh2ps_" - [(set (match_operand:VF1_128_256 0 "register_operand" "=x") - (vec_duplicate:VF1_128_256 - (float_extend:SF -- (match_operand:HF 1 "memory_operand" "m"))))] -+ (match_operand:HF 1 "memory_operand" "jm"))))] - "TARGET_AVXNECONVERT" - "vbcstnesh2ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_mode_attr bf16_ph -@@ -30976,19 +30986,20 @@ - [(set (match_operand:V4SF 0 "register_operand" "=x") - (float_extend:V4SF - (vec_select: -- (match_operand:VHFBF_128 1 "memory_operand" "m") -+ (match_operand:VHFBF_128 1 "memory_operand" "jm") - (parallel [(const_int 0) (const_int 2) - (const_int 4) (const_int 6)]))))] - "TARGET_AVXNECONVERT" - "vcvtnee2ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_insn "vcvtnee2ps_" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (float_extend:V8SF - (vec_select: -- (match_operand:VHFBF_256 1 "memory_operand" "m") -+ (match_operand:VHFBF_256 1 "memory_operand" "jm") - (parallel [(const_int 0) (const_int 2) - (const_int 4) (const_int 6) - (const_int 8) (const_int 10) -@@ -30996,25 +31007,27 @@ - "TARGET_AVXNECONVERT" - "vcvtnee2ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_insn "vcvtneo2ps_" - [(set (match_operand:V4SF 0 "register_operand" "=x") - (float_extend:V4SF - (vec_select: -- (match_operand:VHFBF_128 1 "memory_operand" "m") -+ (match_operand:VHFBF_128 1 "memory_operand" "jm") - (parallel [(const_int 1) (const_int 3) - (const_int 5) (const_int 7)]))))] - "TARGET_AVXNECONVERT" - "vcvtneo2ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_insn "vcvtneo2ps_" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (float_extend:V8SF - (vec_select: -- (match_operand:VHFBF_256 1 "memory_operand" "m") -+ (match_operand:VHFBF_256 1 "memory_operand" "jm") - (parallel [(const_int 1) (const_int 3) - (const_int 5) (const_int 7) - (const_int 9) (const_int 11) -@@ -31022,6 +31035,7 @@ - "TARGET_AVXNECONVERT" - "vcvtneo2ps\t{%1, %0|%0, %1}" - [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16") - (set_attr "mode" "")]) - - (define_int_iterator VPDPWPROD -@@ -31078,8 +31092,9 @@ - (unspec:VI4_AVX - [(match_operand:VI4_AVX 1 "register_operand" "0") - (match_operand:VI4_AVX 2 "register_operand" "x") -- (match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")] -+ (match_operand:VI4_AVX 3 "nonimmediate_operand" "xjm")] - VPDPWPROD))] - "TARGET_AVXVNNIINT16" - "vpdp\t{%3, %2, %0|%0, %2, %3}" -- [(set_attr "prefix" "vex")]) -+ [(set_attr "prefix" "vex") -+ (set_attr "addr" "gpr16")]) --- -2.31.1 - diff --git a/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch b/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch deleted file mode 100644 index ee2a5b02d0ede3c325f0d4412eaaf480643c56d6..0000000000000000000000000000000000000000 --- a/GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch +++ /dev/null @@ -1,232 +0,0 @@ -From dbd597e8083884f9f1d15d8b641b1da244a1ee95 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Thu, 15 Aug 2024 12:54:07 +0800 -Subject: [PATCH 03/21] Align ix86_{move_max,store_max} with vectorizer. - -When none of mprefer-vector-width, avx256_optimal/avx128_optimal, -avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will -set ix86_{move_max,store_max} as max available vector length except -for AVX part. - - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) - opts->x_ix86_move_max = PVW_AVX512; - else - opts->x_ix86_move_max = PVW_AVX128; - -So for -mavx2, vectorizer will choose 256-bit for vectorization, but -128-bit is used for struct copy, there could be a potential STLF issue -due to this "misalign". - -The patch fixes that. - -gcc/ChangeLog: - - * config/i386/i386-options.cc (ix86_option_override_internal): - set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX - instead of PVW_AVX128. - -gcc/testsuite/ChangeLog: - * gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128. - * gcc.target/i386/pieces-memcpy-6.c: Ditto. - * gcc.target/i386/pieces-memset-38.c: Ditto. - * gcc.target/i386/pieces-memset-40.c: Ditto. - * gcc.target/i386/pieces-memset-41.c: Ditto. - * gcc.target/i386/pieces-memset-42.c: Ditto. - * gcc.target/i386/pieces-memset-43.c: Ditto. - * gcc.target/i386/pieces-strcpy-2.c: Ditto. - * gcc.target/i386/pieces-memcpy-22.c: New test. - * gcc.target/i386/pieces-memset-51.c: New test. - * gcc.target/i386/pieces-strcpy-3.c: New test. - -(cherry picked from commit 27dc1533b6dfc49f3912c524db51d6c372a5ac3d) ---- - gcc/config/i386/i386-options.cc | 6 ++++++ - gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++ - gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++ - gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +- - gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++ - 12 files changed, 53 insertions(+), 8 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c - create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c - create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c - -diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc -index 6c212a8edeb..f6c450cc871 100644 ---- a/gcc/config/i386/i386-options.cc -+++ b/gcc/config/i386/i386-options.cc -@@ -3062,6 +3062,9 @@ ix86_option_override_internal (bool main_args_p, - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) - opts->x_ix86_move_max = PVW_AVX512; -+ /* Align with vectorizer to avoid potential STLF issue. */ -+ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_move_max = PVW_AVX256; - else - opts->x_ix86_move_max = PVW_AVX128; - } -@@ -3086,6 +3089,9 @@ ix86_option_override_internal (bool main_args_p, - if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) - && TARGET_EVEX512_P (opts->x_ix86_isa_flags2)) - opts->x_ix86_store_max = PVW_AVX512; -+ /* Align with vectorizer to avoid potential STLF issue. */ -+ else if (TARGET_AVX_P (opts->x_ix86_isa_flags)) -+ opts->x_ix86_store_max = PVW_AVX256; - else - opts->x_ix86_store_max = PVW_AVX128; - } -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c -index 5faee21f9b9..53ad0b3be44 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *dst, *src; - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c -new file mode 100644 -index 00000000000..605b3623ffc ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ -+ -+extern char *dst, *src; -+ -+void -+foo (void) -+{ -+ __builtin_memcpy (dst, src, 33); -+} -+ -+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c -index 5f99cc98c47..cfd2a86cf33 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c -@@ -1,5 +1,5 @@ - /* { dg-do compile { target { ! ia32 } } } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *dst, *src; - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c -index ed4a24a54fd..ddd194debd5 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *dst; - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c -index 86358c99a83..5878876550c 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */ - /* Cope with --enable-frame-pointer, Solaris/x86 -mstackrealign default. */ - /* { dg-additional-options "-fomit-frame-pointer -mno-stackrealign" } */ - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c -index d7a27f52983..27a6c8ad139 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */ - /* Cope with --enable-frame-pointer. */ - /* { dg-additional-options "-fomit-frame-pointer" } */ - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c -index df0c122aae7..103da699ae5 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *dst; - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c -index 2f2179c2df9..f1494e17610 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *dst; - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c -new file mode 100644 -index 00000000000..192ec0d1647 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ -+ -+extern char *dst; -+ -+void -+foo (int x) -+{ -+ __builtin_memset (dst, x, 64); -+} -+ -+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ -diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c -index 90446edb4f3..9bb94b7419b 100644 ---- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c -+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c -@@ -1,5 +1,5 @@ - /* { dg-do compile { target { ! ia32 } } } */ --/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */ - - extern char *strcpy (char *, const char *); - -diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c -new file mode 100644 -index 00000000000..df7571b547f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */ -+ -+extern char *strcpy (char *, const char *); -+ -+void -+foo (char *s) -+{ -+ strcpy (s, -+ "1234567890abcdef123456abcdef5678123456abcdef567abcdef678" -+ "1234567"); -+} -+ -+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */ -+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */ --- -2.31.1 - diff --git a/GCC14-1009-Check-avx-upper-register-for-parallel.patch b/GCC14-1009-Check-avx-upper-register-for-parallel.patch deleted file mode 100644 index 7fca35e810b53e785c8d6766a5a3d7efddcd445c..0000000000000000000000000000000000000000 --- a/GCC14-1009-Check-avx-upper-register-for-parallel.patch +++ /dev/null @@ -1,148 +0,0 @@ -From 61495359a1dc4ec724261559e765170ea7c1ba31 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Thu, 29 Aug 2024 11:39:20 +0800 -Subject: [PATCH 04/21] Check avx upper register for parallel. - -For function arguments/return, when it's BLK mode, it's put in a -parallel with an expr_list, and the expr_list contains the real mode -and registers. -Current ix86_check_avx_upper_register only checked for SSE_REG_P, and -failed to handle that. The patch extend the handle to each subrtx. - -gcc/ChangeLog: - - PR target/116512 - * config/i386/i386.cc (ix86_check_avx_upper_register): Iterate - subrtx to scan for avx upper register. - (ix86_check_avx_upper_stores): Inline old - ix86_check_avx_upper_register. - (ix86_avx_u128_mode_needed): Ditto, and replace - FOR_EACH_SUBRTX with call to new - ix86_check_avx_upper_register. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr116512.c: New test. - -(cherry picked from commit ba9a3f105ea552a22d08f2d54dfdbef16af7c99e) ---- - gcc/config/i386/i386.cc | 36 +++++++++++++++--------- - gcc/testsuite/gcc.target/i386/pr116512.c | 26 +++++++++++++++++ - 2 files changed, 49 insertions(+), 13 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116512.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 35a28243389..a6b7e8b395e 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -15027,9 +15027,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn) - static bool - ix86_check_avx_upper_register (const_rtx exp) - { -- return (SSE_REG_P (exp) -- && !EXT_REX_SSE_REG_P (exp) -- && GET_MODE_BITSIZE (GET_MODE (exp)) > 128); -+ /* construct_container may return a parallel with expr_list -+ which contains the real reg and mode */ -+ subrtx_iterator::array_type array; -+ FOR_EACH_SUBRTX (iter, array, exp, NONCONST) -+ { -+ const_rtx x = *iter; -+ if (SSE_REG_P (x) -+ && !EXT_REX_SSE_REG_P (x) -+ && GET_MODE_BITSIZE (GET_MODE (x)) > 128) -+ return true; -+ } -+ -+ return false; - } - - /* Check if a 256bit or 512bit AVX register is referenced in stores. */ -@@ -15037,7 +15047,9 @@ ix86_check_avx_upper_register (const_rtx exp) - static void - ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data) - { -- if (ix86_check_avx_upper_register (dest)) -+ if (SSE_REG_P (dest) -+ && !EXT_REX_SSE_REG_P (dest) -+ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) - { - bool *used = (bool *) data; - *used = true; -@@ -15096,14 +15108,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) - return AVX_U128_CLEAN; - } - -- subrtx_iterator::array_type array; -- - rtx set = single_set (insn); - if (set) - { - rtx dest = SET_DEST (set); - rtx src = SET_SRC (set); -- if (ix86_check_avx_upper_register (dest)) -+ if (SSE_REG_P (dest) -+ && !EXT_REX_SSE_REG_P (dest) -+ && GET_MODE_BITSIZE (GET_MODE (dest)) > 128) - { - /* This is an YMM/ZMM load. Return AVX_U128_DIRTY if the - source isn't zero. */ -@@ -15114,9 +15126,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) - } - else - { -- FOR_EACH_SUBRTX (iter, array, src, NONCONST) -- if (ix86_check_avx_upper_register (*iter)) -- return AVX_U128_DIRTY; -+ if (ix86_check_avx_upper_register (src)) -+ return AVX_U128_DIRTY; - } - - /* This isn't YMM/ZMM load/store. */ -@@ -15127,9 +15138,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn) - Hardware changes state only when a 256bit register is written to, - but we need to prevent the compiler from moving optimal insertion - point above eventual read from 256bit or 512 bit register. */ -- FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) -- if (ix86_check_avx_upper_register (*iter)) -- return AVX_U128_DIRTY; -+ if (ix86_check_avx_upper_register (PATTERN (insn))) -+ return AVX_U128_DIRTY; - - return AVX_U128_ANY; - } -diff --git a/gcc/testsuite/gcc.target/i386/pr116512.c b/gcc/testsuite/gcc.target/i386/pr116512.c -new file mode 100644 -index 00000000000..c2bc6c91b64 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116512.c -@@ -0,0 +1,26 @@ -+/* { dg-do compile } */ -+/* { dg-options "-march=x86-64-v4 -O2" } */ -+/* { dg-final { scan-assembler-not "vzeroupper" { target { ! ia32 } } } } */ -+ -+#include -+ -+struct B { -+ union { -+ __m512 f; -+ __m512i s; -+ }; -+}; -+ -+struct B foo(int n) { -+ struct B res; -+ res.s = _mm512_set1_epi32(n); -+ -+ return res; -+} -+ -+__m512i bar(int n) { -+ struct B res; -+ res.s = _mm512_set1_epi32(n); -+ -+ return res.s; -+} --- -2.31.1 - diff --git a/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch b/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch deleted file mode 100644 index 25dd95e41df033749450809d9d6c7b6ce896185d..0000000000000000000000000000000000000000 --- a/GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch +++ /dev/null @@ -1,134 +0,0 @@ -From 7f3fcf3d6a95ab7f5abb6c9fd591783c930be081 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Mon, 2 Sep 2024 15:00:22 +0800 -Subject: [PATCH 05/21] i386: Fix vfpclassph non-optimizied intrin - -The intrin for non-optimized got a typo in mask type, which will cause -the high bits of __mmask32 being unexpectedly zeroed. - -The test does not fail under O0 with current 1b since the testcase is -wrong. We need to include avx512-mask-type.h after SIZE is defined, or -it will always be __mmask8. I will write a seperate patch to fix that -on trunk ONLY. - -gcc/ChangeLog: - - * config/i386/avx512fp16intrin.h - (_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32. - (_mm512_fpclass_ph_mask): Ditto. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test. - -(cherry picked from commit 59157c038d683e91c419a1fadd5f91f15218f57b) ---- - gcc/config/i386/avx512fp16intrin.h | 4 +- - .../i386/avx512fp16-vfpclassph-1c.c | 77 +++++++++++++++++++ - 2 files changed, 79 insertions(+), 2 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c - -diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h -index f86050b2087..e8baebd41d3 100644 ---- a/gcc/config/i386/avx512fp16intrin.h -+++ b/gcc/config/i386/avx512fp16intrin.h -@@ -3961,11 +3961,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm) - #else - #define _mm512_mask_fpclass_ph_mask(u, x, c) \ - ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ -- (int) (c),(__mmask8)(u))) -+ (int) (c),(__mmask32)(u))) - - #define _mm512_fpclass_ph_mask(x, c) \ - ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \ -- (int) (c),(__mmask8)-1)) -+ (int) (c),(__mmask32)-1)) - #endif /* __OPIMTIZE__ */ - - /* Intrinsics vgetexpph. */ -diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c -new file mode 100644 -index 00000000000..4739f1228e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c -@@ -0,0 +1,77 @@ -+/* { dg-do run } */ -+/* { dg-options "-O0 -mavx512fp16" } */ -+/* { dg-require-effective-target avx512fp16 } */ -+ -+#define AVX512FP16 -+#include "avx512f-helper.h" -+ -+#include -+#include -+#include -+#define SIZE (AVX512F_LEN / 16) -+#include "avx512f-mask-type.h" -+ -+#ifndef __FPCLASSPH__ -+#define __FPCLASSPH__ -+int check_fp_class_hp (_Float16 src, int imm) -+{ -+ int qNaN_res = isnan (src); -+ int sNaN_res = isnan (src); -+ int Pzero_res = (src == 0.0); -+ int Nzero_res = (src == -0.0); -+ int PInf_res = (isinf (src) == 1); -+ int NInf_res = (isinf (src) == -1); -+ int Denorm_res = (fpclassify (src) == FP_SUBNORMAL); -+ int FinNeg_res = __builtin_finite (src) && (src < 0); -+ -+ int result = (((imm & 1) && qNaN_res) -+ || (((imm >> 1) & 1) && Pzero_res) -+ || (((imm >> 2) & 1) && Nzero_res) -+ || (((imm >> 3) & 1) && PInf_res) -+ || (((imm >> 4) & 1) && NInf_res) -+ || (((imm >> 5) & 1) && Denorm_res) -+ || (((imm >> 6) & 1) && FinNeg_res) -+ || (((imm >> 7) & 1) && sNaN_res)); -+ return result; -+} -+#endif -+ -+MASK_TYPE -+CALC (_Float16 *s1, int imm) -+{ -+ int i; -+ MASK_TYPE res = 0; -+ -+ for (i = 0; i < SIZE; i++) -+ if (check_fp_class_hp(s1[i], imm)) -+ res = res | (1 << i); -+ -+ return res; -+} -+ -+void -+TEST (void) -+{ -+ int i; -+ UNION_TYPE (AVX512F_LEN, h) src; -+ MASK_TYPE res1, res2, res_ref = 0; -+ MASK_TYPE mask = MASK_VALUE; -+ -+ src.a[SIZE - 1] = NAN; -+ src.a[SIZE - 2] = 1.0 / 0.0; -+ for (i = 0; i < SIZE - 2; i++) -+ { -+ src.a[i] = -24.43 + 0.6 * i; -+ } -+ -+ res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF); -+ res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF); -+ -+ res_ref = CALC (src.a, 0xFF); -+ -+ if (res_ref != res1) -+ abort (); -+ -+ if ((mask & res_ref) != res2) -+ abort (); -+} --- -2.31.1 - diff --git a/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch b/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch deleted file mode 100644 index e8c0947984f121898bffca7dd9ab7917aae3ad69..0000000000000000000000000000000000000000 --- a/GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch +++ /dev/null @@ -1,64 +0,0 @@ -From 6b0ea4cec9cc15d00bf72efdc86295dc5b5e75d5 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Fri, 6 Sep 2024 11:19:26 +0800 -Subject: [PATCH 06/21] doc: Enhance Intel CPU documentation - -This patch will add those recent aliased CPU names into documentation -for clearness. - -gcc/ChangeLog: - - PR target/116617 - * doc/invoke.texi: Add meteorlake, raptorlake and lunarlake. - -(cherry picked from commit 3951efed1cce970a5c61eacbad7e5f5314a9fc17) ---- - gcc/doc/invoke.texi | 25 ++++++++++++++----------- - 1 file changed, 14 insertions(+), 11 deletions(-) - -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index f82f7d2817b..cf7ae3d39e6 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -34484,12 +34484,14 @@ UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 and AVX512BF16 - instruction set support. - - @item alderlake --Intel Alderlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, --XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, --CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, --VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and AVX-VNNI instruction set --support. -+@itemx raptorlake -+@itemx meteorlake -+Intel Alderlake/Raptorlake/Meteorlake CPU with 64-bit extensions, MOVBE, MMX, -+SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, -+XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -+MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -+LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and -+AVX-VNNI instruction set support. - - @item rocketlake - Intel Rocketlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3 -@@ -34531,11 +34533,12 @@ UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set - support. - - @item arrowlake-s --Intel Arrow Lake S CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, --XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, --MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, --PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -+@itemx lunarlake -+Intel Arrow Lake S/Lunarlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, -+SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, -+XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, -+MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, -+LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, - UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, - SM3 and SM4 instruction set support. - --- -2.31.1 - diff --git a/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch b/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch deleted file mode 100644 index b1fd47ea799894a3662c68a2d8f2599feaaaf12e..0000000000000000000000000000000000000000 --- a/GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch +++ /dev/null @@ -1,404 +0,0 @@ -From dfa4557ce2dda34c0d8bd5dd5a384eca2cdf5d51 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Wed, 18 Sep 2024 11:20:15 +0800 -Subject: [PATCH 07/21] doc: Add more alias option and reorder Intel CPU -march - documentation - -This patch is backported from GCC15 with some tweaks. - -Since r15-3539, there are requests coming in to add other alias option -documentation. This patch will add all ot them, including corei7, corei7-avx, -core-avx-i, core-avx2, atom, slm, gracemont and emerarldrapids. - -Also in the patch, I reordered that part of documentation, currently all -the CPUs/products are just all over the place. I regrouped them by -date-to-now products (since the very first CPU to latest Panther Lake), P-core -(since the clients become hybrid cores, starting from Sapphire Rapids) and -E-core (since Bonnell to latest Clearwater Forest). In GCC14 and -eariler GCC, Xeon Phi CPUs are still there, I put them after E-core -CPUs. - -And in the patch, I refined the product names in documentation. - -gcc/ChangeLog: - - * doc/invoke.texi: Add corei7, corei7-avx, core-avx-i, - core-avx2, atom, slm, gracemont and emerarldrapids. Reorder - the -march documentation by splitting them into date-to-now - products, P-core, E-core and Xeon Phi. Refine the product names in - documentation. - -(cherry picked from commit a3efd2ff9db0545d0f504153a6a0195e1c92e5cf) ---- - gcc/doc/invoke.texi | 260 +++++++++++++++++++++++--------------------- - 1 file changed, 134 insertions(+), 126 deletions(-) - -diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi -index cf7ae3d39e6..b175bac6715 100644 ---- a/gcc/doc/invoke.texi -+++ b/gcc/doc/invoke.texi -@@ -34315,6 +34315,7 @@ Intel Core 2 CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, CX16, - SAHF and FXSR instruction set support. - - @item nehalem -+@itemx corei7 - Intel Nehalem CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF and FXSR instruction set support. - -@@ -34323,17 +34324,20 @@ Intel Westmere CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR and PCLMUL instruction set support. - - @item sandybridge -+@itemx corei7-avx - Intel Sandy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE and PCLMUL instruction set - support. - - @item ivybridge -+@itemx core-avx-i - Intel Ivy Bridge CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND - and F16C instruction set support. - - @item haswell --Intel Haswell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -+@itemx core-avx2 -+Intel Haswell CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, - F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE and HLE instruction set support. - -@@ -34349,74 +34353,6 @@ SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, - F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, - CLFLUSHOPT, XSAVEC, XSAVES and SGX instruction set support. - --@item bonnell --Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 --instruction set support. -- --@item silvermont --Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND --instruction set support. -- --@item goldmont --Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, --RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction --set support. -- --@item goldmont-plus --Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, --SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, --RDPID and SGX instruction set support. -- --@item tremont --Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, --RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, --SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set --support. -- --@item sierraforest --Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, --XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, --MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, --PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set --support. -- --@item grandridge --Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, --XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, --MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, --PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set --support. -- --@item clearwaterforest --Intel Clearwater Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, --SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, --XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, --MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, --LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, --SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support. -- --@item knl --Intel Knight's Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, --RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, --AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. -- --@item knm --Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, --RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, --AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, --AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. -- - @item skylake-avx512 - Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -@@ -34424,16 +34360,30 @@ RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, - AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, - AVX512DQ and AVX512CD instruction set support. - -+@item cascadelake -+Intel Cascade Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, -+F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -+CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, -+AVX512CD and AVX512VNNI instruction set support. -+ - @item cannonlake --Intel Cannonlake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, -+Intel Cannon Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, - SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, - FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, - PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, - AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA and SHA instruction set - support. - -+@item cooperlake -+Intel Cooper Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, -+F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -+CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, -+AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. -+ - @item icelake-client --Intel Icelake Client CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+Intel Ice Lake Client CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, - RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, - AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, -@@ -34441,7 +34391,7 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 - , VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. - - @item icelake-server --Intel Icelake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+Intel Ice Lake Server CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, - RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, - AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, -@@ -34449,22 +34399,8 @@ AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2 - , VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD and CLWB - instruction set support. - --@item cascadelake --Intel Cascadelake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, --F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, --CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, --AVX512CD and AVX512VNNI instruction set support. -- --@item cooperlake --Intel cooperlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, --SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, --F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, --CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, CLWB, AVX512VL, AVX512BW, AVX512DQ, --AVX512CD, AVX512VNNI and AVX512BF16 instruction set support. -- - @item tigerlake --Intel Tigerlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -+Intel Tiger Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, - SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, - F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, - CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD -@@ -34472,37 +34408,67 @@ PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, - VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, MOVDIRI, MOVDIR64B, CLWB, - AVX512VP2INTERSECT and KEYLOCKER instruction set support. - --@item sapphirerapids --Intel sapphirerapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, --RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, --AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, --AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, --VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, WBNOINVD, CLWB, --MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, --UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 and AVX512BF16 --instruction set support. -+@item rocketlake -+Intel Rocket Lake CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, -+F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, -+CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD -+PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, -+VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. - - @item alderlake - @itemx raptorlake - @itemx meteorlake --Intel Alderlake/Raptorlake/Meteorlake CPU with 64-bit extensions, MOVBE, MMX, --SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, -+@itemx gracemont -+Intel Alder Lake/Raptor Lake/Meteor Lake/Gracemont CPU with 64-bit extensions, -+MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, -+PCLMUL, RDRND, XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, -+GFNI-SSE, CLWB, MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, -+BMI2, F16C, FMA, LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, -+WIDEKL and AVX-VNNI instruction set support. -+ -+@item arrowlake -+Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -+XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -+MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -+UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set -+support. -+ -+@item arrowlake-s -+@itemx lunarlake -+Intel Arrow Lake S/Lunar Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, -+SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, - XSAVE, XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, - MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, --LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL and --AVX-VNNI instruction set support. -+LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -+UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, -+SM3 and SM4 instruction set support. - --@item rocketlake --Intel Rocketlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3 --, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, RDRND, --F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, AES, --CLFLUSHOPT, XSAVEC, XSAVES, AVX512F, AVX512VL, AVX512BW, AVX512DQ, AVX512CD --PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, AVX512VBMI2, --VPCLMULQDQ, AVX512BITALG, RDPID and AVX512VPOPCNTDQ instruction set support. -+@item pantherlake -+Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -+XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -+MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -+UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, -+SM3, SM4 and PREFETCHI instruction set support. -+ -+@item sapphirerapids -+@itemx emeraldrapids -+Intel Sapphire Rapids/Emerald Rapids CPU with 64-bit extensions, MMX, SSE, -+SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, -+FSGSBASE, RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, -+PREFETCHW, AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, -+AVX512DQ, AVX512CD, PKU, AVX512VBMI, AVX512IFMA, SHA, AVX512VNNI, GFNI, VAES, -+AVX512VBMI2, VPCLMULQDQ, AVX512BITALG, RDPID, AVX512VPOPCNTDQ, PCONFIG, -+WBNOINVD, CLWB, MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, -+SERIALIZE, TSXLDTRK, UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16 -+and AVX512BF16 instruction set support. - - @item graniterapids --Intel graniterapids CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+Intel Granite Rapids CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, - RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, - AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, -@@ -34513,7 +34479,7 @@ UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512-FP16, AVX512BF16, AMX-FP16 - and PREFETCHI instruction set support. - - @item graniterapids-d --Intel graniterapids D CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+Intel Granite Rapids D CPU with 64-bit extensions, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, - RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, - AES, CLFLUSHOPT, XSAVEC, XSAVES, SGX, AVX512F, AVX512VL, AVX512BW, AVX512DQ, -@@ -34523,33 +34489,75 @@ MOVDIRI, MOVDIR64B, ENQCMD, CLDEMOTE, PTWRITE, WAITPKG, SERIALIZE, TSXLDTRK, - UINTR, AMX-BF16, AMX-TILE, AMX-INT8, AVX-VNNI, AVX512FP16, AVX512BF16, AMX-FP16, - PREFETCHI and AMX-COMPLEX instruction set support. - --@item arrowlake --Intel Arrow Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+@item bonnell -+@itemx atom -+Intel Bonnell CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3 and SSSE3 -+instruction set support. -+ -+@item silvermont -+@itemx slm -+Intel Silvermont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW and RDRND -+instruction set support. -+ -+@item goldmont -+Intel Goldmont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -+RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT and FSGSBASE instruction -+set support. -+ -+@item goldmont-plus -+Intel Goldmont Plus CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, -+SHA, RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, -+RDPID and SGX instruction set support. -+ -+@item tremont -+Intel Tremont CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, -+SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, PCLMUL, PREFETCHW, RDRND, AES, SHA, -+RDSEED, XSAVE, XSAVEC, XSAVES, XSAVEOPT, CLFLUSHOPT, FSGSBASE, PTWRITE, RDPID, -+SGX, CLWB, GFNI-SSE, MOVDIRI, MOVDIR64B, CLDEMOTE and WAITPKG instruction set -+support. -+ -+@item sierraforest -+Intel Sierra Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, - SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, - XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, - MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, - PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT and CMPCCXADD instruction set -+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set - support. - --@item arrowlake-s --@itemx lunarlake --Intel Arrow Lake S/Lunarlake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, -+@item grandridge -+Intel Grand Ridge CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, -+XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, -+MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, -+PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, -+AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, ENQCMD and UINTR instruction set -+support. -+ -+@item clearwaterforest -+Intel Clearwater Forest CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, - SSE3, SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, - XSAVEC, XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, - MOVDIRI, MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, - LZCNT, PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, --SM3 and SM4 instruction set support. -+ENQCMD, UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, -+SHA512, SM3, SM4, USER_MSR and PREFETCHI instruction set support. - --@item pantherlake --Intel Panther Lake CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, --SSSE3, SSE4.1, SSE4.2, POPCNT, AES, PREFETCHW, PCLMUL, RDRND, XSAVE, XSAVEC, --XSAVES, XSAVEOPT, FSGSBASE, PTWRITE, RDPID, SGX, GFNI-SSE, CLWB, MOVDIRI, --MOVDIR64B, CLDEMOTE, WAITPKG, ADCX, AVX, AVX2, BMI, BMI2, F16C, FMA, LZCNT, --PCONFIG, PKU, VAES, VPCLMULQDQ, SERIALIZE, HRESET, KL, WIDEKL, AVX-VNNI, --UINTR, AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD, AVXVNNIINT16, SHA512, --SM3, SM4 and PREFETCHI instruction set support. -+@item knl -+Intel Knights Landing CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -+AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1 instruction set support. -+ -+@item knm -+Intel Knights Mill CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3, -+SSSE3, SSE4.1, SSE4.2, POPCNT, CX16, SAHF, FXSR, AVX, XSAVE, PCLMUL, FSGSBASE, -+RDRND, F16C, AVX2, BMI, BMI2, LZCNT, FMA, MOVBE, HLE, RDSEED, ADCX, PREFETCHW, -+AVX512PF, AVX512ER, AVX512F, AVX512CD and PREFETCHWT1, AVX5124VNNIW, -+AVX5124FMAPS and AVX512VPOPCNTDQ instruction set support. - - @item k6 - AMD K6 CPU with MMX instruction set support. --- -2.31.1 - diff --git a/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch b/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch deleted file mode 100644 index ac4f937fda8d53c4d22dc1c8cb4963a28d606c1c..0000000000000000000000000000000000000000 --- a/GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch +++ /dev/null @@ -1,173 +0,0 @@ -From 5ecb6cc6761adb80e104a7a8e4b946cb74adca25 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Tue, 24 Sep 2024 15:53:14 +0800 -Subject: [PATCH 08/21] Add new microarchitecture tune for SRF/GRR/CWF. - -For Crestmont, 4-operand vex blendv instructions come from MSROM and -is slower than 3-instructions sequence (op1 & mask) | (op2 & ~mask). -legacy blendv instruction can still be handled by the decoder. - -The patch add a new tune which is enabled for all processors except -for SRF/CWF. It will use vpand + vpandn + vpor instead of -vpblendvb(similar for vblendvps/vblendvpd) for SRF/CWF. - -gcc/ChangeLog: - - * config/i386/i386-expand.cc (ix86_expand_sse_movcc): Guard - instruction blendv generation under new tune. - * config/i386/i386.h (TARGET_SSE_MOVCC_USE_BLENDV): New Macro. - * config/i386/x86-tune.def (X86_TUNE_SSE_MOVCC_USE_BLENDV): - New tune. - -(cherry picked from commit fe0692f689a18c432d6f59f404d4cd020cbebef2) ---- - gcc/config/i386/i386-expand.cc | 24 +++++++++---------- - gcc/config/i386/i386.h | 2 ++ - gcc/config/i386/x86-tune.def | 8 +++++++ - .../gcc.target/i386/sse_movcc_use_blendv.c | 12 ++++++++++ - 4 files changed, 34 insertions(+), 12 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c - -diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc -index 51efe6fdd7d..0a24a46fbf8 100644 ---- a/gcc/config/i386/i386-expand.cc -+++ b/gcc/config/i386/i386-expand.cc -@@ -4220,23 +4220,23 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) - switch (mode) - { - case E_V2SFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_mmx_blendvps; - break; - case E_V4SFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_sse4_1_blendvps; - break; - case E_V2DFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_sse4_1_blendvpd; - break; - case E_SFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_sse4_1_blendvss; - break; - case E_DFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_sse4_1_blendvsd; - break; - case E_V8QImode: -@@ -4244,7 +4244,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) - case E_V4HFmode: - case E_V4BFmode: - case E_V2SImode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - { - gen = gen_mmx_pblendvb_v8qi; - blend_mode = V8QImode; -@@ -4254,14 +4254,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) - case E_V2HImode: - case E_V2HFmode: - case E_V2BFmode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - { - gen = gen_mmx_pblendvb_v4qi; - blend_mode = V4QImode; - } - break; - case E_V2QImode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - gen = gen_mmx_pblendvb_v2qi; - break; - case E_V16QImode: -@@ -4271,18 +4271,18 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) - case E_V4SImode: - case E_V2DImode: - case E_V1TImode: -- if (TARGET_SSE4_1) -+ if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1) - { - gen = gen_sse4_1_pblendvb; - blend_mode = V16QImode; - } - break; - case E_V8SFmode: -- if (TARGET_AVX) -+ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV) - gen = gen_avx_blendvps256; - break; - case E_V4DFmode: -- if (TARGET_AVX) -+ if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV) - gen = gen_avx_blendvpd256; - break; - case E_V32QImode: -@@ -4291,7 +4291,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) - case E_V16BFmode: - case E_V8SImode: - case E_V4DImode: -- if (TARGET_AVX2) -+ if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV) - { - gen = gen_avx2_pblendvb; - blend_mode = V32QImode; -diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h -index 26e15d2677f..d78e554ec4b 100644 ---- a/gcc/config/i386/i386.h -+++ b/gcc/config/i386/i386.h -@@ -459,6 +459,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; - ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC] - #define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC] - #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR] -+#define TARGET_SSE_MOVCC_USE_BLENDV \ -+ ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV] - - /* Feature tests against the various architecture variations. */ - enum ix86_arch_indices { -diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def -index 1ab2f444b56..46e847589f9 100644 ---- a/gcc/config/i386/x86-tune.def -+++ b/gcc/config/i386/x86-tune.def -@@ -532,6 +532,14 @@ DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_NONE) - DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD, - "v2df_reduction_prefer_haddpd", m_NONE) - -+/* X86_TUNE_SSE_MOVCC_USE_BLENDV: Prefer blendv instructions to -+ 3-instruction sequence (op1 & mask) | (op2 & ~mask) -+ for vector condition move. -+ For Crestmont, 4-operand vex blendv instructions come from MSROM -+ which is slow. */ -+DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV, -+ "sse_movcc_use_blendv", ~m_CORE_ATOM) -+ - /*****************************************************************************/ - /* AVX instruction selection tuning (some of SSE flags affects AVX, too) */ - /*****************************************************************************/ -diff --git a/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c -new file mode 100644 -index 00000000000..ac9f1524949 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/sse_movcc_use_blendv.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-march=sierraforest -O2" } */ -+/* { dg-final { scan-assembler-not {(?n)vp?blendv(b|ps|pd)} } } */ -+ -+void -+foo (int* a, int* b, int* __restrict c) -+{ -+ for (int i = 0; i != 200; i++) -+ { -+ c[i] += a[i] > b[i] ? 1 : -1; -+ } -+} --- -2.31.1 - diff --git a/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch b/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch deleted file mode 100644 index 54b0ebd100ed5f3215f5de0840852f45e44a8e0f..0000000000000000000000000000000000000000 --- a/GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch +++ /dev/null @@ -1,417 +0,0 @@ -From dd5d6d20ffeaf3ab272521e53f6afa6e3a8ceb03 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Wed, 16 Oct 2024 13:43:48 +0800 -Subject: [PATCH 09/21] Refine splitters related to "combine vpcmpuw + - zero_extend to vpcmpuw" - -r12-6103-g1a7ce8570997eb combines vpcmpuw + zero_extend to vpcmpuw -with the pre_reload splitter, but the splitter transforms the -zero_extend into a subreg which make reload think the upper part is -garbage, it's not correct. - -The patch adjusts the zero_extend define_insn_and_split to -define_insn to keep zero_extend. - -gcc/ChangeLog: - - PR target/117159 - * config/i386/sse.md - (*_cmp3_zero_extend): - Change from define_insn_and_split to define_insn. - (*_cmp3_zero_extend): - Ditto. - (*_ucmp3_zero_extend): - Ditto. - (*_ucmp3_zero_extend): - Ditto. - (*_cmp3_zero_extend_2): - Split to the zero_extend pattern. - (*_cmp3_zero_extend_2): - Ditto. - (*_ucmp3_zero_extend_2): - Ditto. - (*_ucmp3_zero_extend_2): - Ditto. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr117159.c: New test. - * gcc.target/i386/avx512bw-pr103750-1.c: Remove xfail. - * gcc.target/i386/avx512bw-pr103750-2.c: Remove xfail. - -(cherry picked from commit 79e7e02b7cc578d03eab2b50c029f44409ef8e26) ---- - gcc/config/i386/sse.md | 198 +++++++----------- - .../gcc.target/i386/avx512bw-pr103750-1.c | 3 +- - .../gcc.target/i386/avx512bw-pr103750-2.c | 3 +- - gcc/testsuite/gcc.target/i386/pr117159.c | 42 ++++ - 4 files changed, 125 insertions(+), 121 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr117159.c - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 218aa412c33..6591547159f 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -3967,32 +3967,19 @@ - - ;; Since vpcmpd implicitly clear the upper bits of dest, transform - ;; vpcmpd + zero_extend to vpcmpd since the instruction --(define_insn_and_split "*_cmp3_zero_extend" -- [(set (match_operand:SWI248x 0 "register_operand") -+(define_insn "*_cmp3_zero_extend" -+ [(set (match_operand:SWI248x 0 "register_operand" "=k") - (zero_extend:SWI248x - (unspec: -- [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand") -- (match_operand:V48H_AVX512VL 2 "nonimmediate_operand") -- (match_operand:SI 3 "const_0_to_7_operand")] -+ [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v") -+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm") -+ (match_operand:SI 3 "const_0_to_7_operand" "n")] - UNSPEC_PCMP)))] - "TARGET_AVX512F - && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) -- && ix86_pre_reload_split () - && (GET_MODE_NUNITS (mode) - < GET_MODE_PRECISION (mode))" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_PCMP))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -- operands[0], mode); --} -+ "vcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") -@@ -4020,21 +4007,22 @@ - "#" - "&& 1" - [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_PCMP)) -- (set (match_dup 4) (match_dup 0))] -+ (zero_extend:SWI248x -+ (unspec: -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ UNSPEC_PCMP))) -+ (set (match_dup 4) (match_dup 5))] - { -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -+ operands[5] = lowpart_subreg (mode, - operands[0], mode); --} -- [(set_attr "type" "ssecmp") -- (set_attr "length_immediate" "1") -- (set_attr "prefix" "evex") -- (set_attr "mode" "")]) -+ if (SUBREG_P (operands[5])) -+ { -+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; -+ SUBREG_PROMOTED_SET (operands[5], 1); -+ } -+}) - - (define_insn_and_split "*_cmp3" - [(set (match_operand: 0 "register_operand") -@@ -4069,31 +4057,18 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - --(define_insn_and_split "*_cmp3_zero_extend" -- [(set (match_operand:SWI248x 0 "register_operand") -+(define_insn "*_cmp3_zero_extend" -+ [(set (match_operand:SWI248x 0 "register_operand" "=k") - (zero_extend:SWI248x - (unspec: -- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") -- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") -- (match_operand:SI 3 "const_0_to_7_operand")] -+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") -+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") -+ (match_operand:SI 3 "const_0_to_7_operand" "n")] - UNSPEC_PCMP)))] - "TARGET_AVX512BW -- && ix86_pre_reload_split () -- && (GET_MODE_NUNITS (mode) -- < GET_MODE_PRECISION (mode))" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_PCMP))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -- operands[0], mode); --} -+ && (GET_MODE_NUNITS (mode) -+ < GET_MODE_PRECISION (mode))" -+ "vpcmp\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") -@@ -4120,16 +4095,21 @@ - "#" - "&& 1" - [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_PCMP)) -- (set (match_dup 4) (match_dup 0))] -+ (zero_extend:SWI248x -+ (unspec: -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ UNSPEC_PCMP))) -+ (set (match_dup 4) (match_dup 5))] - { -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -+ operands[5] = lowpart_subreg (mode, - operands[0], mode); -+ if (SUBREG_P (operands[5])) -+ { -+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; -+ SUBREG_PROMOTED_SET (operands[5], 1); -+ } - } - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") -@@ -4187,31 +4167,18 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - --(define_insn_and_split "*_ucmp3_zero_extend" -- [(set (match_operand:SWI248x 0 "register_operand") -+(define_insn "*_ucmp3_zero_extend" -+ [(set (match_operand:SWI248x 0 "register_operand" "=k") - (zero_extend:SWI248x - (unspec: -- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand") -- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand") -- (match_operand:SI 3 "const_0_to_7_operand")] -+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v") -+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm") -+ (match_operand:SI 3 "const_0_to_7_operand" "n")] - UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512BW -- && ix86_pre_reload_split () - && (GET_MODE_NUNITS (mode) - < GET_MODE_PRECISION (mode))" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_UNSIGNED_PCMP))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -- operands[0], mode); --} -+ "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") -@@ -4239,16 +4206,21 @@ - "#" - "&& 1" - [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_UNSIGNED_PCMP)) -- (set (match_dup 4) (match_dup 0))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -+ (zero_extend:SWI248x -+ (unspec: -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ UNSPEC_UNSIGNED_PCMP))) -+ (set (match_dup 4) (match_dup 5))] -+{ -+ operands[5] = lowpart_subreg (mode, - operands[0], mode); -+ if (SUBREG_P (operands[5])) -+ { -+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; -+ SUBREG_PROMOTED_SET (operands[5], 1); -+ } - } - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") -@@ -4284,32 +4256,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "")]) - --(define_insn_and_split "*_ucmp3_zero_extend" -- [(set (match_operand:SWI248x 0 "register_operand") -+(define_insn "*_ucmp3_zero_extend" -+ [(set (match_operand:SWI248x 0 "register_operand" "=k") - (zero_extend:SWI248x - (unspec: -- [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand") -- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand") -- (match_operand:SI 3 "const_0_to_7_operand")] -+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v") -+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm") -+ (match_operand:SI 3 "const_0_to_7_operand" "n")] - UNSPEC_UNSIGNED_PCMP)))] - "TARGET_AVX512F - && (!VALID_MASK_AVX512BW_MODE (mode) || TARGET_AVX512BW) -- && ix86_pre_reload_split () - && (GET_MODE_NUNITS (mode) - < GET_MODE_PRECISION (mode))" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_UNSIGNED_PCMP))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -- operands[0], mode); --} -+ "vpcmpu\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") - (set_attr "prefix" "evex") -@@ -4337,16 +4296,21 @@ - "#" - "&& 1" - [(set (match_dup 0) -- (unspec: -- [(match_dup 1) -- (match_dup 2) -- (match_dup 3)] -- UNSPEC_UNSIGNED_PCMP)) -- (set (match_dup 4) (match_dup 0))] --{ -- operands[1] = force_reg (mode, operands[1]); -- operands[0] = lowpart_subreg (mode, -+ (zero_extend:SWI248x -+ (unspec: -+ [(match_dup 1) -+ (match_dup 2) -+ (match_dup 3)] -+ UNSPEC_UNSIGNED_PCMP))) -+ (set (match_dup 4) (match_dup 5))] -+{ -+ operands[5] = lowpart_subreg (mode, - operands[0], mode); -+ if (SUBREG_P (operands[5])) -+ { -+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1; -+ SUBREG_PROMOTED_SET (operands[5], 1); -+ } - } - [(set_attr "type" "ssecmp") - (set_attr "length_immediate" "1") -diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c -index b1165f069bb..e7d6183232b 100644 ---- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c -+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c -@@ -1,8 +1,7 @@ - /* PR target/103750 */ - /* { dg-do compile } */ - /* { dg-options "-O2 -mavx512bw -mavx512vl" } */ --/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ --/* xfail need to be fixed. */ -+/* { dg-final { scan-assembler-not "kmov" } } */ - - #include - extern __m128i* pi128; -diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c -index 7303f5403ba..3392e193222 100644 ---- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c -+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c -@@ -1,8 +1,7 @@ - /* PR target/103750 */ - /* { dg-do compile } */ - /* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */ --/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */ --/* xfail need to be fixed. */ -+/* { dg-final { scan-assembler-not "kmov" } } */ - - #include - extern __m128i* pi128; -diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c b/gcc/testsuite/gcc.target/i386/pr117159.c -new file mode 100644 -index 00000000000..b67d682ecef ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117159.c -@@ -0,0 +1,42 @@ -+/* { dg-do run } */ -+/* { dg-options "-Os -mavx512bw" } */ -+/* { dg-require-effective-target avx512bw } */ -+ -+typedef __attribute__((__vector_size__ (4))) unsigned char W; -+typedef __attribute__((__vector_size__ (64))) int V; -+typedef __attribute__((__vector_size__ (64))) long long Vq; -+ -+W w; -+V v; -+Vq vq; -+ -+static inline W -+foo (short m) -+{ -+ unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m); -+ W r = (W) k + w; -+ return r; -+} -+ -+static inline W -+foo1 (short m) -+{ -+ unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, v, m); -+ W r = (W) k + w; -+ return r; -+} -+ -+int -+main () -+{ -+ if (!__builtin_cpu_supports ("avx512bw")) -+ return 0; -+ W y = foo1 (65535); -+ if (!y[0] || !y[1] || y[2] || y[3]) -+ __builtin_abort(); -+ W x = foo (65535); -+ if (x[0] || x[1] || x[2] || x[3]) -+ __builtin_abort(); -+ -+ return 0; -+} --- -2.31.1 - diff --git a/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch b/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch deleted file mode 100644 index 7873948dfcc141215611494b7bcfd3160b601e8c..0000000000000000000000000000000000000000 --- a/GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch +++ /dev/null @@ -1,90 +0,0 @@ -From 234baf5dbb10c6af25fc2f74b8c725e61cdb0238 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Tue, 22 Oct 2024 01:54:40 -0700 -Subject: [PATCH 10/21] Fix ICE due to isa mismatch for the builtins. - -gcc/ChangeLog: - - PR target/117240 - * config/i386/i386-builtin.def: Add avx/avx512f to vaes - ymm/zmm builtins. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr117240_avx.c: New test. - * gcc.target/i386/pr117240_avx512f.c: New test. - -(cherry picked from commit b718f6ec1674c0db30f26c65b7a9215e9388dd6c) ---- - gcc/config/i386/i386-builtin.def | 16 ++++++++-------- - gcc/testsuite/gcc.target/i386/pr117240_avx.c | 10 ++++++++++ - gcc/testsuite/gcc.target/i386/pr117240_avx512f.c | 10 ++++++++++ - 3 files changed, 28 insertions(+), 8 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr117240_avx.c - create mode 100644 gcc/testsuite/gcc.target/i386/pr117240_avx512f.c - -diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def -index ab73e20121a..fdd9dba6e54 100644 ---- a/gcc/config/i386/i386-builtin.def -+++ b/gcc/config/i386/i386-builtin.def -@@ -2832,17 +2832,17 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B - - /* VAES. */ - BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) --BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) --BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) - BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) --BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) --BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) - BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) --BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) --BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) - BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI) --BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) --BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) -+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI) - - /* BF16 */ - BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF) -diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx.c b/gcc/testsuite/gcc.target/i386/pr117240_avx.c -new file mode 100644 -index 00000000000..88e83085315 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117240_avx.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mvaes -mno-xsave -Wno-psabi" } */ -+ -+typedef __attribute__((__vector_size__(32))) char V; -+ -+V -+foo(V v) -+{ -+ return __builtin_ia32_vaesenc_v32qi(v, v);/* { dg-error "needs isa option" } */ -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c -new file mode 100644 -index 00000000000..c2d616a05e3 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117240_avx512f.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mvaes -mevex512 -mno-xsave -Wno-psabi" } */ -+ -+typedef __attribute__((__vector_size__(64))) char V; -+ -+V -+foo(V v) -+{ -+ return __builtin_ia32_vaesenc_v64qi(v, v);/* { dg-error "needs isa option" } */ -+} --- -2.31.1 - diff --git a/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch b/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch deleted file mode 100644 index 0edd3c363c0af7e3051059c3e71f99c190fa1049..0000000000000000000000000000000000000000 --- a/GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch +++ /dev/null @@ -1,444 +0,0 @@ -From a586970df40f57d0208b514dcca8b9aa2de911c7 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Tue, 29 Oct 2024 02:09:39 -0700 -Subject: [PATCH 11/21] Fix ICE due to subreg:us_truncate. - -Force_operand issues an ICE when input -is (subreg:DI (us_truncate:V8QI)), it's probably because it's an -invalid rtx, So refine backend patterns for that. - -gcc/ChangeLog: - - PR target/117318 - * config/i386/sse.md (*avx512vl_v2div2qi2_mask_store_1): - Rename to .. - (avx512vl_v2div2qi2_mask_store_1): .. this. - (avx512vl_v2div2qi2_mask_store_2): Change to - define_expand. - (*avx512vl_v4qi2_mask_store_1): Rename to .. - (avx512vl_v4qi2_mask_store_1): .. this. - (avx512vl_v4qi2_mask_store_2): Change to - define_expand. - (*avx512vl_v8qi2_mask_store_1): Rename to .. - (avx512vl_v8qi2_mask_store_1): .. this. - (avx512vl_v8qi2_mask_store_2): Change to - define_expand. - (*avx512vl_v4hi2_mask_store_1): Rename to .. - (avx512vl_v4hi2_mask_store_1): .. this. - (avx512vl_v4hi2_mask_store_2): Change to - define_expand. - (*avx512vl_v2div2hi2_mask_store_1): Rename to .. - (avx512vl_v2div2hi2_mask_store_1): .. this. - (avx512vl_v2div2hi2_mask_store_2): Change to - define_expand. - (*avx512vl_v2div2si2_mask_store_1): Rename to .. - (avx512vl_v2div2si2_mask_store_1): .. this. - (avx512vl_v2div2si2_mask_store_2): Change to - define_expand. - (*avx512f_v8div16qi2_mask_store_1): Rename to .. - (avx512f_v8div16qi2_mask_store_1): .. this. - (avx512f_v8div16qi2_mask_store_2): Change to - define_expand. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr117318.c: New test. - -(cherry picked from commit 71a0cf699b6a2dc03abec53aeafab8b70db2bb07) ---- - gcc/config/i386/sse.md | 268 +++++++++-------------- - gcc/testsuite/gcc.target/i386/pr117318.c | 12 + - 2 files changed, 110 insertions(+), 170 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr117318.c - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 6591547159f..771c5cd01d2 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -14551,7 +14551,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v2div2qi2_mask_store_1" -+(define_insn "avx512vl_v2div2qi2_mask_store_1" - [(set (match_operand:V2QI 0 "memory_operand" "=m") - (vec_merge:V2QI - (any_truncate:V2QI -@@ -14565,28 +14565,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v2div2qi2_mask_store_2" -- [(set (match_operand:HI 0 "memory_operand") -- (subreg:HI -- (vec_merge:V2QI -- (any_truncate:V2QI -- (match_operand:V2DI 1 "register_operand")) -- (vec_select:V2QI -- (subreg:V4QI -- (vec_concat:V2HI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V2QI -- (any_truncate:V2QI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);") -+(define_expand "avx512vl_v2div2qi2_mask_store_2" -+ [(match_operand:HI 0 "memory_operand") -+ (any_truncate:V2QI -+ (match_operand:V2DI 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V2QImode, 0); -+ emit_insn (gen_avx512vl_v2div2qi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_insn "*avx512vl_v4qi2_store_1" - [(set (match_operand:V4QI 0 "memory_operand" "=m") -@@ -14655,7 +14646,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v4qi2_mask_store_1" -+(define_insn "avx512vl_v4qi2_mask_store_1" - [(set (match_operand:V4QI 0 "memory_operand" "=m") - (vec_merge:V4QI - (any_truncate:V4QI -@@ -14669,29 +14660,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v4qi2_mask_store_2" -- [(set (match_operand:SI 0 "memory_operand") -- (subreg:SI -- (vec_merge:V4QI -- (any_truncate:V4QI -- (match_operand:VI4_128_8_256 1 "register_operand")) -- (vec_select:V4QI -- (subreg:V8QI -- (vec_concat:V2SI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1) -- (const_int 2) (const_int 3)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V4QI -- (any_truncate:V4QI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);") -+(define_expand "avx512vl_v4qi2_mask_store_2" -+ [(match_operand:SI 0 "memory_operand") -+ (any_truncate:V4QI -+ (match_operand:VI4_128_8_256 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V4QImode, 0); -+ emit_insn (gen_avx512vl_v4qi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_mode_iterator VI2_128_BW_4_256 - [(V8HI "TARGET_AVX512BW") V8SI]) -@@ -14763,7 +14744,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v8qi2_mask_store_1" -+(define_insn "avx512vl_v8qi2_mask_store_1" - [(set (match_operand:V8QI 0 "memory_operand" "=m") - (vec_merge:V8QI - (any_truncate:V8QI -@@ -14777,31 +14758,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v8qi2_mask_store_2" -- [(set (match_operand:DI 0 "memory_operand") -- (subreg:DI -- (vec_merge:V8QI -- (any_truncate:V8QI -- (match_operand:VI2_128_BW_4_256 1 "register_operand")) -- (vec_select:V8QI -- (subreg:V16QI -- (vec_concat:V2DI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1) -- (const_int 2) (const_int 3) -- (const_int 4) (const_int 5) -- (const_int 6) (const_int 7)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V8QI -- (any_truncate:V8QI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") -+(define_expand "avx512vl_v8qi2_mask_store_2" -+ [(match_operand:DI 0 "memory_operand") -+ (any_truncate:V8QI -+ (match_operand:VI2_128_BW_4_256 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V8QImode, 0); -+ emit_insn (gen_avx512vl_v8qi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_mode_iterator PMOV_SRC_MODE_4 [V4DI V2DI V4SI]) - (define_mode_attr pmov_dst_4 -@@ -14923,7 +14892,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v4hi2_mask_store_1" -+(define_insn "avx512vl_v4hi2_mask_store_1" - [(set (match_operand:V4HI 0 "memory_operand" "=m") - (vec_merge:V4HI - (any_truncate:V4HI -@@ -14941,30 +14910,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v4hi2_mask_store_2" -- [(set (match_operand:DI 0 "memory_operand") -- (subreg:DI -- (vec_merge:V4HI -- (any_truncate:V4HI -- (match_operand:VI4_128_8_256 1 "register_operand")) -- (vec_select:V4HI -- (subreg:V8HI -- (vec_concat:V2DI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1) -- (const_int 2) (const_int 3)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V4HI -- (any_truncate:V4HI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);") -- -+(define_expand "avx512vl_v4hi2_mask_store_2" -+ [(match_operand:DI 0 "memory_operand") -+ (any_truncate:V4HI -+ (match_operand:VI4_128_8_256 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V4HImode, 0); -+ emit_insn (gen_avx512vl_v4hi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_insn "*avx512vl_v2div2hi2_store_1" - [(set (match_operand:V2HI 0 "memory_operand" "=m") -@@ -15025,7 +14983,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v2div2hi2_mask_store_1" -+(define_insn "avx512vl_v2div2hi2_mask_store_1" - [(set (match_operand:V2HI 0 "memory_operand" "=m") - (vec_merge:V2HI - (any_truncate:V2HI -@@ -15039,28 +14997,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v2div2hi2_mask_store_2" -- [(set (match_operand:SI 0 "memory_operand") -- (subreg:SI -- (vec_merge:V2HI -- (any_truncate:V2HI -- (match_operand:V2DI 1 "register_operand")) -- (vec_select:V2HI -- (subreg:V4HI -- (vec_concat:V2SI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V2HI -- (any_truncate:V2HI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);") -+(define_expand "avx512vl_v2div2hi2_mask_store_2" -+ [(match_operand:SI 0 "memory_operand") -+ (any_truncate:V2HI -+ (match_operand:V2DI 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V2HImode, 0); -+ emit_insn (gen_avx512vl_v2div2hi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_expand "truncv2div2si2" - [(set (match_operand:V2SI 0 "register_operand") -@@ -15168,7 +15117,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512vl_v2div2si2_mask_store_1" -+(define_insn "avx512vl_v2div2si2_mask_store_1" - [(set (match_operand:V2SI 0 "memory_operand" "=m") - (vec_merge:V2SI - (any_truncate:V2SI -@@ -15182,28 +15131,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512vl_v2div2si2_mask_store_2" -- [(set (match_operand:DI 0 "memory_operand") -- (subreg:DI -- (vec_merge:V2SI -- (any_truncate:V2SI -- (match_operand:V2DI 1 "register_operand")) -- (vec_select:V2SI -- (subreg:V4SI -- (vec_concat:V2DI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512VL && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V2SI -- (any_truncate:V2SI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);") -+(define_expand "avx512vl_v2div2si2_mask_store_2" -+ [(match_operand:DI 0 "memory_operand") -+ (any_truncate:V2SI -+ (match_operand:V2DI 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512VL" -+{ -+ operands[0] = adjust_address_nv (operands[0], V2SImode, 0); -+ emit_insn (gen_avx512vl_v2div2si2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - (define_expand "truncv8div8qi2" - [(set (match_operand:V8QI 0 "register_operand") -@@ -15302,7 +15242,7 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn "*avx512f_v8div16qi2_mask_store_1" -+(define_insn "avx512f_v8div16qi2_mask_store_1" - [(set (match_operand:V8QI 0 "memory_operand" "=m") - (vec_merge:V8QI - (any_truncate:V8QI -@@ -15316,31 +15256,19 @@ - (set_attr "prefix" "evex") - (set_attr "mode" "TI")]) - --(define_insn_and_split "avx512f_v8div16qi2_mask_store_2" -- [(set (match_operand:DI 0 "memory_operand") -- (subreg:DI -- (vec_merge:V8QI -- (any_truncate:V8QI -- (match_operand:V8DI 1 "register_operand")) -- (vec_select:V8QI -- (subreg:V16QI -- (vec_concat:V2DI -- (match_dup 0) -- (const_int 0)) 0) -- (parallel [(const_int 0) (const_int 1) -- (const_int 2) (const_int 3) -- (const_int 4) (const_int 5) -- (const_int 6) (const_int 7)])) -- (match_operand:QI 2 "register_operand")) 0))] -- "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()" -- "#" -- "&& 1" -- [(set (match_dup 0) -- (vec_merge:V8QI -- (any_truncate:V8QI (match_dup 1)) -- (match_dup 0) -- (match_dup 2)))] -- "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);") -+(define_expand "avx512f_v8div16qi2_mask_store_2" -+ [(match_operand:DI 0 "memory_operand") -+ (any_truncate:V8QI -+ (match_operand:V8DI 1 "register_operand")) -+ (match_operand:QI 2 "register_operand")] -+ "TARGET_AVX512F && TARGET_EVEX512" -+{ -+ operands[0] = adjust_address_nv (operands[0], V8QImode, 0); -+ emit_insn (gen_avx512f_v8div16qi2_mask_store_1 (operands[0], -+ operands[1], -+ operands[2])); -+ DONE; -+}) - - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; -diff --git a/gcc/testsuite/gcc.target/i386/pr117318.c b/gcc/testsuite/gcc.target/i386/pr117318.c -new file mode 100644 -index 00000000000..3d316ad04cf ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117318.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile } */ -+/* { dg-options "-mavx512f -O" } */ -+ -+typedef __attribute__((__vector_size__ (64))) long long V; -+unsigned long long x; -+ -+unsigned long long -+foo() -+{ -+ __builtin_ia32_pmovusqb512mem_mask (&x, (V){8000000000000000}, 255); -+ return x; -+} --- -2.31.1 - diff --git a/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch b/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch deleted file mode 100644 index 3ed62f3a4f8a28749cd898026d380a9b96280ea4..0000000000000000000000000000000000000000 --- a/GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch +++ /dev/null @@ -1,157 +0,0 @@ -From 317928fc26b9720fb8da54f2735901f28b9d6f65 Mon Sep 17 00:00:00 2001 -From: Hongyu Wang -Date: Wed, 7 Feb 2024 14:42:58 +0800 -Subject: [PATCH 12/21] [APX PPX] Avoid generating unmatched pushp/popp in - pro/epilogue - -According to APX spec, the pushp/popp pairs should be matched, -otherwise the PPX hint cannot take effect and cause performance loss. - -In the ix86_expand_epilogue, there are several optimizations that may -cause the epilogue using mov to restore the regs. Check if PPX applied -and prevent usage of mov/leave in the epilogue. Also do not use PPX -for eh_return. - -gcc/ChangeLog: - - * config/i386/i386.cc (ix86_expand_prologue): Set apx_ppx_used - flag in m.fs with TARGET_APX_PPX && !crtl->calls_eh_return. - (ix86_emit_save_regs): Emit ppx is available only when - TARGET_APX_PPX && !crtl->calls_eh_return. - (ix86_expand_epilogue): Don't restore reg using mov when - apx_ppx_used flag is true. - * config/i386/i386.h (struct machine_frame_state): - Add apx_ppx_used flag. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/apx-ppx-2.c: New test. - * gcc.target/i386/apx-ppx-3.c: Likewise. - -(cherry picked from commit df542909224a7ff88b204534ad035a0b216a98bf) ---- - gcc/config/i386/i386.cc | 13 +++++++++---- - gcc/config/i386/i386.h | 4 ++++ - gcc/testsuite/gcc.target/i386/apx-ppx-2.c | 14 ++++++++++++++ - gcc/testsuite/gcc.target/i386/apx-ppx-3.c | 7 +++++++ - 4 files changed, 34 insertions(+), 4 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-2.c - create mode 100644 gcc/testsuite/gcc.target/i386/apx-ppx-3.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index a6b7e8b395e..32780f69151 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -7417,6 +7417,7 @@ ix86_emit_save_regs (void) - { - int regno; - rtx_insn *insn; -+ bool use_ppx = TARGET_APX_PPX && !crtl->calls_eh_return; - - if (!TARGET_APX_PUSH2POP2 - || !ix86_can_use_push2pop2 () -@@ -7426,7 +7427,7 @@ ix86_emit_save_regs (void) - if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true)) - { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), -- TARGET_APX_PPX)); -+ use_ppx)); - RTX_FRAME_RELATED_P (insn) = 1; - } - } -@@ -7457,7 +7458,7 @@ ix86_emit_save_regs (void) - regno_list[0]), - gen_rtx_REG (word_mode, - regno_list[1]), -- TARGET_APX_PPX)); -+ use_ppx)); - RTX_FRAME_RELATED_P (insn) = 1; - rtx dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (3)); - -@@ -7490,7 +7491,7 @@ ix86_emit_save_regs (void) - else - { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, regno), -- TARGET_APX_PPX)); -+ use_ppx)); - RTX_FRAME_RELATED_P (insn) = 1; - aligned = true; - } -@@ -7499,7 +7500,7 @@ ix86_emit_save_regs (void) - { - insn = emit_insn (gen_push (gen_rtx_REG (word_mode, - regno_list[0]), -- TARGET_APX_PPX)); -+ use_ppx)); - RTX_FRAME_RELATED_P (insn) = 1; - } - } -@@ -8973,6 +8974,7 @@ ix86_expand_prologue (void) - if (!frame.save_regs_using_mov) - { - ix86_emit_save_regs (); -+ m->fs.apx_ppx_used = TARGET_APX_PPX && !crtl->calls_eh_return; - int_registers_saved = true; - gcc_assert (m->fs.sp_offset == frame.reg_save_offset); - } -@@ -9858,6 +9860,9 @@ ix86_expand_epilogue (int style) - /* SEH requires the use of pops to identify the epilogue. */ - else if (TARGET_SEH) - restore_regs_via_mov = false; -+ /* If we already save reg with pushp, don't use move at epilogue. */ -+ else if (m->fs.apx_ppx_used) -+ restore_regs_via_mov = false; - /* If we're only restoring one register and sp cannot be used then - using a move instruction to restore the register since it's - less work than reloading sp and popping the register. */ -diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h -index d78e554ec4b..3056f8b4c10 100644 ---- a/gcc/config/i386/i386.h -+++ b/gcc/config/i386/i386.h -@@ -2703,6 +2703,10 @@ struct GTY(()) machine_frame_state - The flags realigned and sp_realigned are mutually exclusive. */ - BOOL_BITFIELD sp_realigned : 1; - -+ /* When APX_PPX used in prologue, force epilogue to emit -+ popp instead of move and leave. */ -+ BOOL_BITFIELD apx_ppx_used : 1; -+ - /* If sp_realigned is set, this is the last valid offset from the CFA - that can be used for access with the frame pointer. */ - HOST_WIDE_INT sp_realigned_fp_last; -diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-2.c b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c -new file mode 100644 -index 00000000000..42a95340b55 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-2.c -@@ -0,0 +1,14 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-O1 -mapx-features=ppx -fno-omit-frame-pointer" } */ -+ -+/* { dg-final { scan-assembler "pushp" } } */ -+/* { dg-final { scan-assembler "popp" } } */ -+/* { dg-final { scan-assembler-not "leave" } } */ -+ -+extern int bar (int a); -+extern int *q; -+ -+void foo (int *a) -+{ -+ q[2] = bar (q[1]); -+} -diff --git a/gcc/testsuite/gcc.target/i386/apx-ppx-3.c b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c -new file mode 100644 -index 00000000000..76931fbe294 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/apx-ppx-3.c -@@ -0,0 +1,7 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-O2 -mapx-features=ppx" } */ -+ -+/* { dg-final { scan-assembler-not "pushp" } } */ -+/* { dg-final { scan-assembler-not "popp" } } */ -+ -+#include "eh_return-2.c" --- -2.31.1 - diff --git a/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch b/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch deleted file mode 100644 index 844899460ef451fd39a747d5d101b646eefea4b8..0000000000000000000000000000000000000000 --- a/GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch +++ /dev/null @@ -1,73 +0,0 @@ -From ccc5b723882eeb512b5b0fa2c3d29555822367f7 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Fri, 1 Nov 2024 15:59:47 +0800 -Subject: [PATCH 13/21] i386: Do not allow pointer conversion for CMPccXADD - intrin under -O0 - -The pointer conversion to wider type under macro would not consider -whether the higher bit is cleaned or not. It will lead to unexpected -cmp result. - -After this change, it will throw an incompatible pointer type error just -like -O2 does currently. - -gcc/ChangeLog: - - * config/i386/cmpccxaddintrin.h (_cmpccxadd_epi32): Do not do - type conversion for pointer. - (_cmpccxadd_epi64): Ditto. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/cmpccxadd-1b.c: New test. - -(cherry picked from commit 82bfb6c5ba6d1f84472271f367221988cd50f478) ---- - gcc/config/i386/cmpccxaddintrin.h | 6 +++--- - gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c | 15 +++++++++++++++ - 2 files changed, 18 insertions(+), 3 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c - -diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h -index 39f368ffc08..9349fb00c1b 100644 ---- a/gcc/config/i386/cmpccxaddintrin.h -+++ b/gcc/config/i386/cmpccxaddintrin.h -@@ -72,11 +72,11 @@ _cmpccxadd_epi64 (long long *__A, long long __B, long long __C, - } - #else - #define _cmpccxadd_epi32(A,B,C,D) \ -- __builtin_ia32_cmpccxadd ((int *) (A), (int) (B), (int) (C), \ -+ __builtin_ia32_cmpccxadd ((A), (int) (B), (int) (C), \ - (_CMPCCX_ENUM) (D)) - #define _cmpccxadd_epi64(A,B,C,D) \ -- __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \ -- (long long) (C), (_CMPCCX_ENUM) (D)) -+ __builtin_ia32_cmpccxadd64 ((A), (long long) (B), (long long) (C), \ -+ (_CMPCCX_ENUM) (D)) - #endif - - #ifdef __DISABLE_CMPCCXADD__ -diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c -new file mode 100644 -index 00000000000..7d20325da50 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1b.c -@@ -0,0 +1,15 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-O0 -mcmpccxadd" } */ -+#include -+ -+short *a; -+int b, c; -+int *d; -+long long e, f; -+ -+void extern -+cmpccxadd_test(void) -+{ -+ b = _cmpccxadd_epi32 (a, b, c, _CMPCCX_O); /* { dg-error "incompatible pointer type" } */ -+ e = _cmpccxadd_epi64 (d, e, f, _CMPCCX_O); /* { dg-error "incompatible pointer type" } */ -+} --- -2.31.1 - diff --git a/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch b/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch deleted file mode 100644 index 0942ec613768643b827429459fb4aa3f063173fc..0000000000000000000000000000000000000000 --- a/GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch +++ /dev/null @@ -1,82 +0,0 @@ -From a39abdaaec03d7506c9c5de258b1b4740540e8d4 Mon Sep 17 00:00:00 2001 -From: "Hu, Lin1" -Date: Tue, 5 Nov 2024 15:49:57 +0800 -Subject: [PATCH 14/21] i386: Add OPTION_MASK_ISA2_EVEX512 for some AVX512 - instructions. - -gcc/ChangeLog: - - PR target/117304 - * config/i386/i386-builtin.def: Add OPTION_MASK_ISA2_EVEX512 for some - AVX512 512-bits instructions. - -gcc/testsuite/ChangeLog: - - PR target/117304 - * gcc.target/i386/pr117304-1.c: New test. - -(cherry picked from commit 05fd99e3d5e9f00e4e23596ed15a3cec2aaba128) ---- - gcc/config/i386/i386-builtin.def | 10 ++++---- - gcc/testsuite/gcc.target/i386/pr117304-1.c | 28 ++++++++++++++++++++++ - 2 files changed, 33 insertions(+), 5 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr117304-1.c - -diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def -index fdd9dba6e54..ee34e0a1497 100644 ---- a/gcc/config/i386/i386-builtin.def -+++ b/gcc/config/i386/i386-builtin.def -@@ -3065,11 +3065,11 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_cvtsi2ss_round, "__builtin_ia32_ - BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse_cvtsi2ssq_round, "__builtin_ia32_cvtsi2ss64", IX86_BUILTIN_CVTSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT64_INT) - BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_round, "__builtin_ia32_cvtss2sd_round", IX86_BUILTIN_CVTSS2SD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_INT) - BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtss2sd_mask_round, "__builtin_ia32_cvtss2sd_mask_round", IX86_BUILTIN_CVTSS2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF_V2DF_UQI_INT) --BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) --BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) --BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) --BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) --BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2dq512_mask", IX86_BUILTIN_CVTTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv8dfv8si2_mask_round, "__builtin_ia32_cvttpd2udq512_mask", IX86_BUILTIN_CVTTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2dq512_mask", IX86_BUILTIN_CVTTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_truncv16sfv16si2_mask_round, "__builtin_ia32_cvttps2udq512_mask", IX86_BUILTIN_CVTTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT) -+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv16siv16sf2_mask_round, "__builtin_ia32_cvtudq2ps512_mask", IX86_BUILTIN_CVTUDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT) - BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT) - BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT) - BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT) -diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c -new file mode 100644 -index 00000000000..da26f4bd1b7 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c -@@ -0,0 +1,28 @@ -+/* PR target/117304 */ -+/* { dg-do compile } */ -+/* { dg-options "-O2 -mavx512f -mno-evex512 -mavx512vl" } */ -+ -+typedef __attribute__((__vector_size__(32))) int __v8si; -+typedef __attribute__((__vector_size__(32))) unsigned int __v8su; -+typedef __attribute__((__vector_size__(64))) double __v8df; -+typedef __attribute__((__vector_size__(64))) int __v16si; -+typedef __attribute__((__vector_size__(64))) unsigned int __v16su; -+typedef __attribute__((__vector_size__(64))) float __v16sf; -+typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); -+ -+volatile __v8df df; -+volatile __v16sf sf; -+volatile __v8si hi; -+volatile __v8su hui; -+volatile __v16si i; -+volatile __v16su ui; -+ -+void -+foo() -+{ -+ hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ -+ hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ -+ ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ -+ ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ -+ __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ -+} --- -2.31.1 - diff --git a/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch b/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch deleted file mode 100644 index 7f974150eaf82453d0485188a1bda42976dee6c0..0000000000000000000000000000000000000000 --- a/GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch +++ /dev/null @@ -1,40 +0,0 @@ -From d0c96acf46e0073a037d8693e4c8c25b3978fb47 Mon Sep 17 00:00:00 2001 -From: "Hu, Lin1" -Date: Thu, 7 Nov 2024 10:13:15 +0800 -Subject: [PATCH 15/21] i386: Modify regexp of pr117304-1.c - -Since the test doesn't care if the hint is correct, -modify the regexp of the hint part to avoid future -changes to the hint that would cause the test to fail. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr117304-1.c: Modify regexp. - -(cherry picked from commit 6a0e143a6449bcc250af13642263f671f756500b) ---- - gcc/testsuite/gcc.target/i386/pr117304-1.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/gcc/testsuite/gcc.target/i386/pr117304-1.c b/gcc/testsuite/gcc.target/i386/pr117304-1.c -index da26f4bd1b7..4f00ff7c92a 100644 ---- a/gcc/testsuite/gcc.target/i386/pr117304-1.c -+++ b/gcc/testsuite/gcc.target/i386/pr117304-1.c -@@ -20,9 +20,9 @@ volatile __v16su ui; - void - foo() - { -- hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_cvttpd2dq128_mask'?" } */ -- hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_cvttpd2udq128_mask'?" } */ -- ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_cvttps2dq128_mask'?" } */ -- ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_cvttps2udq128_mask'?" } */ -- __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_cvtudq2ps128_mask'?" } */ -+ hi ^= __builtin_ia32_cvttpd2dq512_mask(df, hi, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ -+ hui ^= __builtin_ia32_cvttpd2udq512_mask(df, hui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttpd2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ -+ ui ^= __builtin_ia32_cvttps2dq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2dq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ -+ ui ^= __builtin_ia32_cvttps2udq512_mask(sf, ui, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvttps2udq512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ -+ __builtin_ia32_cvtudq2ps512_mask(ui, sf, 0, 4); /* { dg-error "implicit declaration of function '__builtin_ia32_cvtudq2ps512_mask'; did you mean '__builtin_ia32_\[^\n\r]*'?" } */ - } --- -2.31.1 - diff --git a/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch b/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch deleted file mode 100644 index 91a4f55721a4f46e84072ffe802d36891d786b03..0000000000000000000000000000000000000000 --- a/GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch +++ /dev/null @@ -1,30 +0,0 @@ -From 07372a132627aa03829bdddb99ab0c9f826e4646 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Mon, 11 Nov 2024 10:52:33 +0800 -Subject: [PATCH 16/21] i386: Add new model number for Arrow Lake - -gcc/ChangeLog: - - * common/config/i386/cpuinfo.h (get_intel_cpu): Add new model - number for Arrow Lake. - -(cherry picked from commit 4380d6f8acc878fbdeb6ce86f4be64d340bdfd4b) ---- - gcc/common/config/i386/cpuinfo.h | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h -index 56427474b7b..e2f1e2f5f46 100644 ---- a/gcc/common/config/i386/cpuinfo.h -+++ b/gcc/common/config/i386/cpuinfo.h -@@ -606,6 +606,7 @@ get_intel_cpu (struct __processor_model *cpu_model, - CHECK___builtin_cpu_is ("grandridge"); - cpu_model->__cpu_type = INTEL_GRANDRIDGE; - break; -+ case 0xb5: - case 0xc5: - /* Arrow Lake. */ - cpu = "arrowlake"; --- -2.31.1 - diff --git a/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch b/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch deleted file mode 100644 index 9d5cf6879a6f8dccef836078dec2d981055aae69..0000000000000000000000000000000000000000 --- a/GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch +++ /dev/null @@ -1,104 +0,0 @@ -From f729f8e4f7b4a84efb9560c1da582dd6b79cc1dc Mon Sep 17 00:00:00 2001 -From: "Hu, Lin1" -Date: Wed, 6 Nov 2024 15:42:13 +0800 -Subject: [PATCH 17/21] i386: Zero extend 32-bit address to 64-bit with option - -mx32 -maddress-mode=long. [PR 117418] - --maddress-mode=long let Pmode = DI_mode, so zero extend 32-bit address to -64-bit and uses a 64-bit register as a pointer for avoid raise an ICE. - -gcc/ChangeLog: - - PR target/117418 - * config/i386/i386-expand.cc (ix86_expand_builtin): Convert - pointer's mode according to Pmode. - -gcc/testsuite/ChangeLog: - - PR target/117418 - * gcc.target/i386/pr117418-1.c: New test. - -(cherry picked from commit 8b4bb54e6c45411845ec559c49f594a6239c3969) ---- - gcc/config/i386/i386-expand.cc | 12 +++++++++++ - gcc/testsuite/gcc.target/i386/pr117418-1.c | 24 ++++++++++++++++++++++ - 2 files changed, 36 insertions(+) - create mode 100644 gcc/testsuite/gcc.target/i386/pr117418-1.c - -diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc -index 0a24a46fbf8..52cf79e2a47 100644 ---- a/gcc/config/i386/i386-expand.cc -+++ b/gcc/config/i386/i386-expand.cc -@@ -13475,6 +13475,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - op1 = expand_normal (arg1); - op2 = expand_normal (arg2); - -+ if (GET_MODE (op1) != Pmode) -+ op1 = convert_to_mode (Pmode, op1, 1); -+ - if (!address_operand (op2, VOIDmode)) - { - op2 = convert_memory_address (Pmode, op2); -@@ -13510,6 +13513,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - emit_label (ok_label); - emit_insn (gen_rtx_SET (target, pat)); - -+ if (GET_MODE (op0) != Pmode) -+ op0 = convert_to_mode (Pmode, op0, 1); -+ - for (i = 0; i < 8; i++) - { - op = gen_rtx_MEM (V2DImode, -@@ -13534,6 +13540,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - -+ if (GET_MODE (op2) != Pmode) -+ op2 = convert_to_mode (Pmode, op2, 1); -+ - op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); - emit_move_insn (op, op1); - -@@ -13571,6 +13580,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - if (!REG_P (op0)) - op0 = copy_to_mode_reg (SImode, op0); - -+ if (GET_MODE (op3) != Pmode) -+ op3 = convert_to_mode (Pmode, op3, 1); -+ - /* Force to use xmm0, xmm1 for keylow, keyhi*/ - op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0)); - emit_move_insn (op, op1); -diff --git a/gcc/testsuite/gcc.target/i386/pr117418-1.c b/gcc/testsuite/gcc.target/i386/pr117418-1.c -new file mode 100644 -index 00000000000..4839b139b79 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr117418-1.c -@@ -0,0 +1,24 @@ -+/* PR target/117418 */ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-options "-maddress-mode=long -mwidekl -mx32" } */ -+/* { dg-require-effective-target maybe_x32 } */ -+/* { dg-final { scan-assembler-times "aesdec128kl" 1 } } */ -+/* { dg-final { scan-assembler-times "aesdec256kl" 1 } } */ -+/* { dg-final { scan-assembler-times "aesenc128kl" 1 } } */ -+/* { dg-final { scan-assembler-times "aesenc256kl" 1 } } */ -+/* { dg-final { scan-assembler-times "encodekey128" 1 } } */ -+/* { dg-final { scan-assembler-times "encodekey256" 1 } } */ -+ -+typedef __attribute__((__vector_size__(16))) long long V; -+V a; -+ -+void -+foo() -+{ -+ __builtin_ia32_aesdec128kl_u8 (&a, a, &a); -+ __builtin_ia32_aesdec256kl_u8 (&a, a, &a); -+ __builtin_ia32_aesenc128kl_u8 (&a, a, &a); -+ __builtin_ia32_aesenc256kl_u8 (&a, a, &a); -+ __builtin_ia32_encodekey128_u32 (0, a, &a); -+ __builtin_ia32_encodekey256_u32 (0, a, a, &a); -+} --- -2.31.1 - diff --git a/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch b/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch deleted file mode 100644 index 62ae5909b648e28f398b60f0b0b6249720d8894c..0000000000000000000000000000000000000000 --- a/GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch +++ /dev/null @@ -1,37 +0,0 @@ -From 2f29473fe18bbcd6e8ebf036336def1f76ea45bb Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Thu, 21 Nov 2024 23:57:38 -0800 -Subject: [PATCH 18/21] Fix uninitialized operands[2] in vec_unpacks_hi_v4sf. - -It could cause weired spill in RA when register pressure is high. - -gcc/ChangeLog: - - PR target/117562 - * config/i386/sse.md (vec_unpacks_hi_v4sf): Initialize - operands[2] with CONST0_RTX. - -(cherry picked from commit 4a63cc6de77481878ec31e1e6ac30e22c50b063a) ---- - gcc/config/i386/sse.md | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) - -diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md -index 771c5cd01d2..4376f848d74 100644 ---- a/gcc/config/i386/sse.md -+++ b/gcc/config/i386/sse.md -@@ -9628,7 +9628,10 @@ - (match_dup 2) - (parallel [(const_int 0) (const_int 1)]))))] - "TARGET_SSE2" -- "operands[2] = gen_reg_rtx (V4SFmode);") -+{ -+ operands[2] = gen_reg_rtx (V4SFmode); -+ emit_move_insn (operands[2], CONST0_RTX (V4SFmode)); -+}) - - (define_expand "vec_unpacks_hi_v8sf" - [(set (match_dup 2) --- -2.31.1 - diff --git a/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch b/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch deleted file mode 100644 index f66d2b78bb0e275aa3408a6603234ae9160eddc4..0000000000000000000000000000000000000000 --- a/GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 7ccf847529c8d9822e44876d3c5d167f9b055cae Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Mon, 10 Feb 2025 14:00:57 +0800 -Subject: [PATCH 19/21] i386: Fix AVX512BW intrin header with __OPTIMIZE__ [PR - 118813] - -When moving intrins around for AVX10 implementation in GCC 14, -the intrin _kshiftli_mask32 and _kshiftri_mask32 are wrongly -wrapped by "#if __OPTIMIZE__" instead of "#ifdef __OPTIMIZE__", -leading to the intrin file not `-Wsystem-headers -Wundef` clean -since r14-4490. - -gcc/ChangeLog: - - PR target/118813 - * config/i386/avx512bwintrin.h: Fix wrong __OPTIMIZE__ - wrap. - -(cherry picked from commit cec0326137ef91e2910a9c70eb9743f032e87137) ---- - gcc/config/i386/avx512bwintrin.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h -index 8991c9c1e57..d07f73dde2f 100644 ---- a/gcc/config/i386/avx512bwintrin.h -+++ b/gcc/config/i386/avx512bwintrin.h -@@ -199,7 +199,7 @@ _kunpackw_mask32 (__mmask16 __A, __mmask16 __B) - (__mmask32) __B); - } - --#if __OPTIMIZE__ -+#ifdef __OPTIMIZE__ - extern __inline __mmask32 - __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) - _kshiftli_mask32 (__mmask32 __A, unsigned int __B) --- -2.31.1 - diff --git a/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch b/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch deleted file mode 100644 index b8cefbe76bbe7fb7fd05ea97a39b97e98156af1f..0000000000000000000000000000000000000000 --- a/GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch +++ /dev/null @@ -1,41 +0,0 @@ -From f29e5cb8c3386994a4d3b73908deeabab1da316f Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Mon, 10 Feb 2025 16:53:27 +0800 -Subject: [PATCH 20/21] i386: Do not check vector size conflict when AVX512 is - not explicitly set [PR 118815] - -When AVX512 is not explicitly set, we should not take EVEX512 bit into -consideration when checking vector size. It will solve the intrin header -file reporting warnings when compiling with -Wsystem-headers. - -However, there is side effect on the usage for '-march=xxx -mavx10.1-256', -where xxx is with AVX512. It will not report warning on vector size for now. -Since it is a rare usage, we will take it. - -gcc/ChangeLog: - - PR target/118815 - * config/i386/i386-options.cc (ix86_option_override_internal): - Do not check vector size conflict when AVX512 is not explicitly - set. - -(cherry picked from commit 31cbac836bb4f4c2172a91ee6164d8fdd32a8cb8) ---- - gcc/config/i386/i386-options.cc | 1 + - 1 file changed, 1 insertion(+) - -diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc -index f6c450cc871..a6eba1ca2b8 100644 ---- a/gcc/config/i386/i386-options.cc -+++ b/gcc/config/i386/i386-options.cc -@@ -2725,6 +2725,7 @@ ix86_option_override_internal (bool main_args_p, - "using 512 as max vector size"); - } - else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags) -+ && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F) - && !(OPTION_MASK_ISA2_EVEX512 - & opts->x_ix86_isa_flags2_explicit)) - warning (0, "Vector size conflicts between AVX10.1 and AVX512, using " --- -2.31.1 - diff --git a/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch b/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch deleted file mode 100644 index 689104a455ead84572313ad6525a8ebc5415c45b..0000000000000000000000000000000000000000 --- a/GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch +++ /dev/null @@ -1,402 +0,0 @@ -From 5525dca8e858d3fd06c13b7839f6f9ff8b02c057 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Tue, 11 Feb 2025 11:29:34 +0800 -Subject: [PATCH 21/21] i386: Deprecate -m[no-]avx10.1 and make - -mno-avx10.1-512 to disable the whole AVX10.1 - -Based on the feedback we got, we would like to re-alias avx10.x to 512 -bit in the future. This leaves the current avx10.1 alias to 256 bit -inconsistent. Since it has been there for GCC 14.1 and GCC 14.2, -we decide to deprecate avx10.1 alias. The current proposal is not -adding it back in the future, but it might change if necessary. - -For -mno- options, it is confusing what it is disabling when it comes -to avx10. Since there is barely usage enabling AVX10 with 512 bit -then disabling it, we will only provide -mno-avx10.x options in the -future, disabling the whole AVX10.x. If someone really wants to disable -512 bit after enabling it, -mavx10.x-512 -mno-avx10.x -mavx10.x-256 is -the only way to do that since we also do not want to break the usual -expression on -m- options enabling everything mentioned. - -However, for avx10.1, since we deprecated avx10.1, there is no reason -we should have -mno-avx10.1. Thus, we need to keep -mno-avx10.1-[256,512]. -To avoid confusion, we will make -mno-avx10.1-512 to disable the -whole AVX10.1 set to match the future -mno-avx10.x. - -gcc/ChangeLog: - - * common/config/i386/i386-common.cc - (OPTION_MASK_ISA2_AVX2_UNSET): Change AVX10.1 unset macro. - (OPTION_MASK_ISA2_AVX10_1_256_UNSET): Removed. - (OPTION_MASK_ISA2_AVX10_1_512_UNSET): Removed. - (OPTION_MASK_ISA2_AVX10_1_UNSET): New. - (ix86_handle_option): Adjust AVX10.1 unset macro. - * common/config/i386/i386-isas.h: Remove avx10.1. - * config/i386/i386-options.cc - (ix86_valid_target_attribute_inner_p): Ditto. - (ix86_option_override_internal): Adjust warning message. - * config/i386/i386.opt: Remove mavx10.1. - * config/i386/i386.opt.urls: Regenerated. - * doc/extend.texi: Remove avx10.1 and adjust doc. - * doc/sourcebuild.texi: Ditto. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/avx10_1-1.c: Change to avx10.1-256. - * gcc.target/i386/avx10_1-13.c: Ditto. - * gcc.target/i386/avx10_1-14.c: Ditto. - * gcc.target/i386/avx10_1-21.c: Ditto. - * gcc.target/i386/avx10_1-22.c: Ditto. - * gcc.target/i386/avx10_1-23.c: Ditto. - * gcc.target/i386/avx10_1-24.c: Ditto. - * gcc.target/i386/avx10_1-3.c: Ditto. - * gcc.target/i386/avx10_1-5.c: Ditto. - * gcc.target/i386/avx10_1-6.c: Ditto. - * gcc.target/i386/avx10_1-8.c: Ditto. - * gcc.target/i386/avx10_1-12.c: Adjust warning message. - * gcc.target/i386/avx10_1-19.c: Ditto. - * gcc.target/i386/avx10_1-17.c: Adjust to no-avx10.1-512. - -(cherry picked from commit de562367d344758ea9264992e884f031d4435688) ---- - gcc/common/config/i386/i386-common.cc | 15 +++++++-------- - gcc/common/config/i386/i386-isas.h | 1 - - gcc/config/i386/i386-options.cc | 3 +-- - gcc/config/i386/i386.opt | 5 ----- - gcc/config/i386/i386.opt.urls | 3 --- - gcc/doc/extend.texi | 11 ++++------- - gcc/doc/sourcebuild.texi | 5 +---- - gcc/testsuite/gcc.target/i386/avx10_1-1.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-12.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-13.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-14.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-17.c | 4 ++-- - gcc/testsuite/gcc.target/i386/avx10_1-19.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-21.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-22.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-23.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-24.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-3.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-5.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-6.c | 2 +- - gcc/testsuite/gcc.target/i386/avx10_1-8.c | 2 +- - 21 files changed, 28 insertions(+), 45 deletions(-) - -diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc -index d578918dfb7..bb03ef1e292 100644 ---- a/gcc/common/config/i386/i386-common.cc -+++ b/gcc/common/config/i386/i386-common.cc -@@ -239,7 +239,7 @@ along with GCC; see the file COPYING3. If not see - (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \ - | OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVXNECONVERT_UNSET \ - | OPTION_MASK_ISA2_AVXVNNIINT16_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET \ -- | OPTION_MASK_ISA2_AVX10_1_256_UNSET) -+ | OPTION_MASK_ISA2_AVX10_1_UNSET) - #define OPTION_MASK_ISA_AVX512F_UNSET \ - (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ - | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ -@@ -319,9 +319,8 @@ along with GCC; see the file COPYING3. If not see - #define OPTION_MASK_ISA2_APX_F_UNSET OPTION_MASK_ISA2_APX_F - #define OPTION_MASK_ISA2_EVEX512_UNSET OPTION_MASK_ISA2_EVEX512 - #define OPTION_MASK_ISA2_USER_MSR_UNSET OPTION_MASK_ISA2_USER_MSR --#define OPTION_MASK_ISA2_AVX10_1_256_UNSET \ -- (OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512_UNSET) --#define OPTION_MASK_ISA2_AVX10_1_512_UNSET OPTION_MASK_ISA2_AVX10_1_512 -+#define OPTION_MASK_ISA2_AVX10_1_UNSET \ -+ (OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512) - - /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same - as -mno-sse4.1. */ -@@ -1419,8 +1418,8 @@ ix86_handle_option (struct gcc_options *opts, - } - else - { -- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_256_UNSET; -- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_256_UNSET; -+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET; -+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET; - opts->x_ix86_no_avx10_1_explicit = 1; - } - return true; -@@ -1435,8 +1434,8 @@ ix86_handle_option (struct gcc_options *opts, - } - else - { -- opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_512_UNSET; -- opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_512_UNSET; -+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX10_1_UNSET; -+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX10_1_UNSET; - opts->x_ix86_no_avx10_1_explicit = 1; - } - return true; -diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h -index 9c2179a3dd8..017c795e211 100644 ---- a/gcc/common/config/i386/i386-isas.h -+++ b/gcc/common/config/i386/i386-isas.h -@@ -193,7 +193,6 @@ ISA_NAMES_TABLE_START - ISA_NAMES_TABLE_ENTRY("sm4", FEATURE_SM4, P_NONE, "-msm4") - ISA_NAMES_TABLE_ENTRY("apxf", FEATURE_APX_F, P_NONE, "-mapxf") - ISA_NAMES_TABLE_ENTRY("usermsr", FEATURE_USER_MSR, P_NONE, "-musermsr") -- ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1_256, P_NONE, "-mavx10.1") - ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1_256, P_AVX10_1_256, "-mavx10.1-256") - ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1-512") - ISA_NAMES_TABLE_END -diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc -index a6eba1ca2b8..11c6ddf0f44 100644 ---- a/gcc/config/i386/i386-options.cc -+++ b/gcc/config/i386/i386-options.cc -@@ -1135,7 +1135,6 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], - IX86_ATTR_ISA ("apxf", OPT_mapxf), - IX86_ATTR_ISA ("evex512", OPT_mevex512), - IX86_ATTR_ISA ("usermsr", OPT_musermsr), -- IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1_256), - IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), - IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512), - -@@ -2746,7 +2745,7 @@ ix86_option_override_internal (bool main_args_p, - && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512) - & opts->x_ix86_isa_flags2_explicit)) - { -- warning (0, "%<-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512%> " -+ warning (0, "%<-mno-avx10.1-256, -mno-avx10.1-512%> " - "cannot disable AVX512 instructions when " - "%<-mavx512XXX%>"); - /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is -diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt -index d5f793a9e8b..f99c4e3ae5d 100644 ---- a/gcc/config/i386/i386.opt -+++ b/gcc/config/i386/i386.opt -@@ -1380,8 +1380,3 @@ mavx10.1-512 - Target Mask(ISA2_AVX10_1_512) Var(ix86_isa_flags2) Save - Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, - and AVX10.1-512 built-in functions and code generation. -- --mavx10.1 --Target Alias(mavx10.1-256) --Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, --and AVX10.1 built-in functions and code generation. -diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls -index 81c5bb9a927..3ed76635002 100644 ---- a/gcc/config/i386/i386.opt.urls -+++ b/gcc/config/i386/i386.opt.urls -@@ -615,6 +615,3 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) - mavx10.1-512 - UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) - --mavx10.1 --UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) -- -diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi -index e290265d68d..a3272bcce30 100644 ---- a/gcc/doc/extend.texi -+++ b/gcc/doc/extend.texi -@@ -7383,20 +7383,17 @@ Enable/disable the generation of the USER_MSR instructions. - Enable/disable the generation of the APX features, including - EGPR, PUSH2POP2, NDD and PPX. - --@cindex @code{target("avx10.1")} function attribute, x86 --@item avx10.1 --@itemx no-avx10.1 --Enable/disable the generation of the AVX10.1 instructions. -- - @cindex @code{target("avx10.1-256")} function attribute, x86 - @item avx10.1-256 - @itemx no-avx10.1-256 --Enable/disable the generation of the AVX10.1 instructions. -+Enable the generation of the AVX10.1 instructions with 256 bit support. -+Disable the generation of the AVX10.1 instructions. - - @cindex @code{target("avx10.1-512")} function attribute, x86 - @item avx10.1-512 - @itemx no-avx10.1-512 --Enable/disable the generation of the AVX10.1 512 bit instructions. -+Enable the generation of the AVX10.1 instructions with 512 bit support. -+Disable the generation of the AVX10.1 instructions. - - @cindex @code{target("cld")} function attribute, x86 - @item cld -diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi -index 8e4e59ac44c..5b026cfe073 100644 ---- a/gcc/doc/sourcebuild.texi -+++ b/gcc/doc/sourcebuild.texi -@@ -2543,11 +2543,8 @@ Target supports compiling @code{avx} instructions. - @item avx_runtime - Target supports the execution of @code{avx} instructions. - --@item avx10.1 --Target supports the execution of @code{avx10.1} instructions. -- - @item avx10.1-256 --Target supports the execution of @code{avx10.1} instructions. -+Target supports the execution of @code{avx10.1-256} instructions. - - @item avx10.1-512 - Target supports the execution of @code{avx10.1-512} instructions. -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-1.c b/gcc/testsuite/gcc.target/i386/avx10_1-1.c -index cfd9662bb13..33ce99ed60a 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-1.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-1.c -@@ -1,5 +1,5 @@ - /* { dg-do compile { target { ! ia32 } } } */ --/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ -+/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ - - #include - -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-12.c b/gcc/testsuite/gcc.target/i386/avx10_1-12.c -index 61f0e4db61b..ae1c77bbcbd 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-12.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-12.c -@@ -1,6 +1,6 @@ - /* { dg-do compile } */ - /* { dg-options "-march=x86-64 -mno-avx10.1-512 -mavx512f" } */ --/* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" "" { target *-*-* } 0 } */ -+/* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" "" { target *-*-* } 0 } */ - /* { dg-final { scan-assembler "%zmm" } } */ - - #include "avx10_1-2.c" -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-13.c b/gcc/testsuite/gcc.target/i386/avx10_1-13.c -index 8a111190025..e94ac8e1862 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-13.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-13.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=x86-64 -mavx10.1" } */ -+/* { dg-options "-march=x86-64 -mavx10.1-256" } */ - /* { dg-final { scan-assembler "%zmm" } } */ - - typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-14.c b/gcc/testsuite/gcc.target/i386/avx10_1-14.c -index 03222a7a031..76573e644fe 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-14.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-14.c -@@ -4,7 +4,7 @@ - - typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); - --__attribute__ ((target ("avx10.1"))) __m512d -+__attribute__ ((target ("avx10.1-256"))) __m512d - foo () - { /* { dg-warning "Vector size conflicts between AVX10.1 and AVX512, using 512 as max vector size" } */ - __m512d a, b; -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-17.c b/gcc/testsuite/gcc.target/i386/avx10_1-17.c -index a19230f597b..09f125215dc 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-17.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-17.c -@@ -4,9 +4,9 @@ - - typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); - --__attribute__ ((target ("no-avx10.1"))) __m512d -+__attribute__ ((target ("no-avx10.1-512"))) __m512d - foo () --{ /* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ -+{ /* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ - __m512d a, b; - a = a + b; - return a; -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-19.c b/gcc/testsuite/gcc.target/i386/avx10_1-19.c -index 7aacc15aad9..7445ecfa548 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-19.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-19.c -@@ -6,7 +6,7 @@ typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); - - __attribute__ ((target ("avx512f"))) __m512d - foo () --{ /* { dg-warning "'-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ -+{ /* { dg-warning "'-mno-avx10.1-256, -mno-avx10.1-512' cannot disable AVX512 instructions when '-mavx512XXX'" } */ - __m512d a, b; - a = a + b; - return a; -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-21.c b/gcc/testsuite/gcc.target/i386/avx10_1-21.c -index 27a7265df61..0a1fcc9c0f8 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-21.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-21.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=x86-64 -mavx10.1 -mevex512 -Wno-psabi" } */ -+/* { dg-options "-march=x86-64 -mavx10.1-256 -mevex512 -Wno-psabi" } */ - /* { dg-warning "Using '-mevex512' without any AVX512 features enabled together with AVX10.1 only will not enable any AVX512 or AVX10.1-512 features, using 256 as max vector size" "" { target *-*-* } 0 } */ - /* { dg-final { scan-assembler-not "%zmm" } } */ - -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-22.c b/gcc/testsuite/gcc.target/i386/avx10_1-22.c -index 796262283d6..cb649dc5538 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-22.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-22.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-march=x86-64 -mavx10.1 -Wno-psabi" } */ -+/* { dg-options "-march=x86-64 -mavx10.1-256 -Wno-psabi" } */ - /* { dg-final { scan-assembler-not "%zmm" } } */ - - typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-23.c b/gcc/testsuite/gcc.target/i386/avx10_1-23.c -index 6e8d64d0f34..f31c63650ab 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-23.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-23.c -@@ -4,7 +4,7 @@ - - typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); - --__attribute__ ((target ("avx10.1"))) __m512d -+__attribute__ ((target ("avx10.1-256"))) __m512d - foo () - { /* { dg-warning "Using '-mevex512' without any AVX512 features enabled together with AVX10.1 only will not enable any AVX512 or AVX10.1-512 features, using 256 as max vector size" "" { target *-*-* } 0 } */ - __m512d a, b; -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-24.c b/gcc/testsuite/gcc.target/i386/avx10_1-24.c -index 2e93f041760..1bba0fb4b66 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-24.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-24.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ -+/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ - /* { dg-final { scan-assembler-not "%zmm" } } */ - - typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-3.c b/gcc/testsuite/gcc.target/i386/avx10_1-3.c -index 3be988a1a62..a176f2749ce 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-3.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-3.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ -+/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ - - #include - -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-5.c b/gcc/testsuite/gcc.target/i386/avx10_1-5.c -index 20b78ea9510..3079cf14ef0 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-5.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-5.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O0 -march=x86-64 -mavx10.1 -Wno-psabi" } */ -+/* { dg-options "-O0 -march=x86-64 -mavx10.1-256 -Wno-psabi" } */ - /* { dg-final { scan-assembler-not ".%zmm" } } */ - - #include "avx10_1-2.c" -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-6.c b/gcc/testsuite/gcc.target/i386/avx10_1-6.c -index fbc92d5c4ca..60dbd05c4a7 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-6.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-6.c -@@ -1,5 +1,5 @@ - /* { dg-do compile } */ --/* { dg-options "-O2 -march=x86-64 -mavx10.1" } */ -+/* { dg-options "-O2 -march=x86-64 -mavx10.1-256" } */ - - #include - -diff --git a/gcc/testsuite/gcc.target/i386/avx10_1-8.c b/gcc/testsuite/gcc.target/i386/avx10_1-8.c -index 69b6c6a3e1a..ec930f72218 100644 ---- a/gcc/testsuite/gcc.target/i386/avx10_1-8.c -+++ b/gcc/testsuite/gcc.target/i386/avx10_1-8.c -@@ -1,4 +1,4 @@ - /* { dg-do compile { target { ! ia32 } } } */ --/* { dg-options "-march=x86-64 -mavx10.1 -mavx512f -mno-evex512" } */ -+/* { dg-options "-march=x86-64 -mavx10.1-256 -mavx512f -mno-evex512" } */ - - #include "avx10_1-1.c" --- -2.31.1 - diff --git a/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch b/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch deleted file mode 100644 index 0d5f6c47c8e55137ad1e14a58e4d649ddbf87a0b..0000000000000000000000000000000000000000 --- a/GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch +++ /dev/null @@ -1,444 +0,0 @@ -From 4e7735a8d87559bbddfe3a985786996e22241f8d Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Mon, 12 Aug 2024 14:35:31 +0800 -Subject: [PATCH] Move ix86_align_loops into a separate pass and insert the - pass after pass_endbr_and_patchable_area. - -gcc/ChangeLog: - - PR target/116174 - * config/i386/i386.cc (ix86_align_loops): Move this to .. - * config/i386/i386-features.cc (ix86_align_loops): .. here. - (class pass_align_tight_loops): New class. - (make_pass_align_tight_loops): New function. - * config/i386/i386-passes.def: Insert pass_align_tight_loops - after pass_insert_endbr_and_patchable_area. - * config/i386/i386-protos.h (make_pass_align_tight_loops): New - declare. - -gcc/testsuite/ChangeLog: - - * gcc.target/i386/pr116174.c: New test. - -(cherry picked from commit c3c83d22d212a35cb1bfb8727477819463f0dcd8) ---- - gcc/config/i386/i386-features.cc | 191 +++++++++++++++++++++++ - gcc/config/i386/i386-passes.def | 3 + - gcc/config/i386/i386-protos.h | 1 + - gcc/config/i386/i386.cc | 146 ----------------- - gcc/testsuite/gcc.target/i386/pr116174.c | 12 ++ - 5 files changed, 207 insertions(+), 146 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116174.c - -diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc -index e3e004d5526..7de19d42363 100644 ---- a/gcc/config/i386/i386-features.cc -+++ b/gcc/config/i386/i386-features.cc -@@ -3253,6 +3253,197 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt) - return new pass_remove_partial_avx_dependency (ctxt); - } - -+/* When a hot loop can be fit into one cacheline, -+ force align the loop without considering the max skip. */ -+static void -+ix86_align_loops () -+{ -+ basic_block bb; -+ -+ /* Don't do this when we don't know cache line size. */ -+ if (ix86_cost->prefetch_block == 0) -+ return; -+ -+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS); -+ profile_count count_threshold = cfun->cfg->count_max / param_align_threshold; -+ FOR_EACH_BB_FN (bb, cfun) -+ { -+ rtx_insn *label = BB_HEAD (bb); -+ bool has_fallthru = 0; -+ edge e; -+ edge_iterator ei; -+ -+ if (!LABEL_P (label)) -+ continue; -+ -+ profile_count fallthru_count = profile_count::zero (); -+ profile_count branch_count = profile_count::zero (); -+ -+ FOR_EACH_EDGE (e, ei, bb->preds) -+ { -+ if (e->flags & EDGE_FALLTHRU) -+ has_fallthru = 1, fallthru_count += e->count (); -+ else -+ branch_count += e->count (); -+ } -+ -+ if (!fallthru_count.initialized_p () || !branch_count.initialized_p ()) -+ continue; -+ -+ if (bb->loop_father -+ && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) -+ && (has_fallthru -+ ? (!(single_succ_p (bb) -+ && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) -+ && optimize_bb_for_speed_p (bb) -+ && branch_count + fallthru_count > count_threshold -+ && (branch_count > fallthru_count * param_align_loop_iterations)) -+ /* In case there'no fallthru for the loop. -+ Nops inserted won't be executed. */ -+ : (branch_count > count_threshold -+ || (bb->count > bb->prev_bb->count * 10 -+ && (bb->prev_bb->count -+ <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) -+ { -+ rtx_insn* insn, *end_insn; -+ HOST_WIDE_INT size = 0; -+ bool padding_p = true; -+ basic_block tbb = bb; -+ unsigned cond_branch_num = 0; -+ bool detect_tight_loop_p = false; -+ -+ for (unsigned int i = 0; i != bb->loop_father->num_nodes; -+ i++, tbb = tbb->next_bb) -+ { -+ /* Only handle continuous cfg layout. */ -+ if (bb->loop_father != tbb->loop_father) -+ { -+ padding_p = false; -+ break; -+ } -+ -+ FOR_BB_INSNS (tbb, insn) -+ { -+ if (!NONDEBUG_INSN_P (insn)) -+ continue; -+ size += ix86_min_insn_size (insn); -+ -+ /* We don't know size of inline asm. -+ Don't align loop for call. */ -+ if (asm_noperands (PATTERN (insn)) >= 0 -+ || CALL_P (insn)) -+ { -+ size = -1; -+ break; -+ } -+ } -+ -+ if (size == -1 || size > ix86_cost->prefetch_block) -+ { -+ padding_p = false; -+ break; -+ } -+ -+ FOR_EACH_EDGE (e, ei, tbb->succs) -+ { -+ /* It could be part of the loop. */ -+ if (e->dest == bb) -+ { -+ detect_tight_loop_p = true; -+ break; -+ } -+ } -+ -+ if (detect_tight_loop_p) -+ break; -+ -+ end_insn = BB_END (tbb); -+ if (JUMP_P (end_insn)) -+ { -+ /* For decoded icache: -+ 1. Up to two branches are allowed per Way. -+ 2. A non-conditional branch is the last micro-op in a Way. -+ */ -+ if (onlyjump_p (end_insn) -+ && (any_uncondjump_p (end_insn) -+ || single_succ_p (tbb))) -+ { -+ padding_p = false; -+ break; -+ } -+ else if (++cond_branch_num >= 2) -+ { -+ padding_p = false; -+ break; -+ } -+ } -+ -+ } -+ -+ if (padding_p && detect_tight_loop_p) -+ { -+ emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)), -+ GEN_INT (0)), label); -+ /* End of function. */ -+ if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) -+ break; -+ /* Skip bb which already fits into one cacheline. */ -+ bb = tbb; -+ } -+ } -+ } -+ -+ loop_optimizer_finalize (); -+ free_dominance_info (CDI_DOMINATORS); -+} -+ -+namespace { -+ -+const pass_data pass_data_align_tight_loops = -+{ -+ RTL_PASS, /* type */ -+ "align_tight_loops", /* name */ -+ OPTGROUP_NONE, /* optinfo_flags */ -+ TV_MACH_DEP, /* tv_id */ -+ 0, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ 0, /* todo_flags_finish */ -+}; -+ -+class pass_align_tight_loops : public rtl_opt_pass -+{ -+public: -+ pass_align_tight_loops (gcc::context *ctxt) -+ : rtl_opt_pass (pass_data_align_tight_loops, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ bool gate (function *) final override -+ { -+ return optimize && optimize_function_for_speed_p (cfun); -+ } -+ -+ unsigned int execute (function *) final override -+ { -+ timevar_push (TV_MACH_DEP); -+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN -+ ix86_align_loops (); -+#endif -+ timevar_pop (TV_MACH_DEP); -+ return 0; -+ } -+}; // class pass_align_tight_loops -+ -+} // anon namespace -+ -+rtl_opt_pass * -+make_pass_align_tight_loops (gcc::context *ctxt) -+{ -+ return new pass_align_tight_loops (ctxt); -+} -+ - /* This compares the priority of target features in function DECL1 - and DECL2. It returns positive value if DECL1 is higher priority, - negative value if DECL2 is higher priority and 0 if they are the -diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def -index 7d96766f7b9..e500f15c997 100644 ---- a/gcc/config/i386/i386-passes.def -+++ b/gcc/config/i386/i386-passes.def -@@ -31,5 +31,8 @@ along with GCC; see the file COPYING3. If not see - INSERT_PASS_BEFORE (pass_cse2, 1, pass_stv, true /* timode_p */); - - INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_endbr_and_patchable_area); -+ /* pass_align_tight_loops must be after pass_insert_endbr_and_patchable_area. -+ PR116174. */ -+ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); - - INSERT_PASS_AFTER (pass_combine, 1, pass_remove_partial_avx_dependency); -diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h -index 46214a63974..36c7b1aed42 100644 ---- a/gcc/config/i386/i386-protos.h -+++ b/gcc/config/i386/i386-protos.h -@@ -419,6 +419,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area - (gcc::context *); - extern rtl_opt_pass *make_pass_remove_partial_avx_dependency - (gcc::context *); -+extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); - - extern bool ix86_has_no_direct_extern_access; - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 6f89891d3cb..288c69467d6 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -23444,150 +23444,6 @@ ix86_split_stlf_stall_load () - } - } - --/* When a hot loop can be fit into one cacheline, -- force align the loop without considering the max skip. */ --static void --ix86_align_loops () --{ -- basic_block bb; -- -- /* Don't do this when we don't know cache line size. */ -- if (ix86_cost->prefetch_block == 0) -- return; -- -- loop_optimizer_init (AVOID_CFG_MODIFICATIONS); -- profile_count count_threshold = cfun->cfg->count_max / param_align_threshold; -- FOR_EACH_BB_FN (bb, cfun) -- { -- rtx_insn *label = BB_HEAD (bb); -- bool has_fallthru = 0; -- edge e; -- edge_iterator ei; -- -- if (!LABEL_P (label)) -- continue; -- -- profile_count fallthru_count = profile_count::zero (); -- profile_count branch_count = profile_count::zero (); -- -- FOR_EACH_EDGE (e, ei, bb->preds) -- { -- if (e->flags & EDGE_FALLTHRU) -- has_fallthru = 1, fallthru_count += e->count (); -- else -- branch_count += e->count (); -- } -- -- if (!fallthru_count.initialized_p () || !branch_count.initialized_p ()) -- continue; -- -- if (bb->loop_father -- && bb->loop_father->latch != EXIT_BLOCK_PTR_FOR_FN (cfun) -- && (has_fallthru -- ? (!(single_succ_p (bb) -- && single_succ (bb) == EXIT_BLOCK_PTR_FOR_FN (cfun)) -- && optimize_bb_for_speed_p (bb) -- && branch_count + fallthru_count > count_threshold -- && (branch_count > fallthru_count * param_align_loop_iterations)) -- /* In case there'no fallthru for the loop. -- Nops inserted won't be executed. */ -- : (branch_count > count_threshold -- || (bb->count > bb->prev_bb->count * 10 -- && (bb->prev_bb->count -- <= ENTRY_BLOCK_PTR_FOR_FN (cfun)->count / 2))))) -- { -- rtx_insn* insn, *end_insn; -- HOST_WIDE_INT size = 0; -- bool padding_p = true; -- basic_block tbb = bb; -- unsigned cond_branch_num = 0; -- bool detect_tight_loop_p = false; -- -- for (unsigned int i = 0; i != bb->loop_father->num_nodes; -- i++, tbb = tbb->next_bb) -- { -- /* Only handle continuous cfg layout. */ -- if (bb->loop_father != tbb->loop_father) -- { -- padding_p = false; -- break; -- } -- -- FOR_BB_INSNS (tbb, insn) -- { -- if (!NONDEBUG_INSN_P (insn)) -- continue; -- size += ix86_min_insn_size (insn); -- -- /* We don't know size of inline asm. -- Don't align loop for call. */ -- if (asm_noperands (PATTERN (insn)) >= 0 -- || CALL_P (insn)) -- { -- size = -1; -- break; -- } -- } -- -- if (size == -1 || size > ix86_cost->prefetch_block) -- { -- padding_p = false; -- break; -- } -- -- FOR_EACH_EDGE (e, ei, tbb->succs) -- { -- /* It could be part of the loop. */ -- if (e->dest == bb) -- { -- detect_tight_loop_p = true; -- break; -- } -- } -- -- if (detect_tight_loop_p) -- break; -- -- end_insn = BB_END (tbb); -- if (JUMP_P (end_insn)) -- { -- /* For decoded icache: -- 1. Up to two branches are allowed per Way. -- 2. A non-conditional branch is the last micro-op in a Way. -- */ -- if (onlyjump_p (end_insn) -- && (any_uncondjump_p (end_insn) -- || single_succ_p (tbb))) -- { -- padding_p = false; -- break; -- } -- else if (++cond_branch_num >= 2) -- { -- padding_p = false; -- break; -- } -- } -- -- } -- -- if (padding_p && detect_tight_loop_p) -- { -- emit_insn_before (gen_max_skip_align (GEN_INT (ceil_log2 (size)), -- GEN_INT (0)), label); -- /* End of function. */ -- if (!tbb || tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) -- break; -- /* Skip bb which already fits into one cacheline. */ -- bb = tbb; -- } -- } -- } -- -- loop_optimizer_finalize (); -- free_dominance_info (CDI_DOMINATORS); --} -- - /* Implement machine specific optimizations. We implement padding of returns - for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ - static void -@@ -23611,8 +23467,6 @@ ix86_reorg (void) - #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN - if (TARGET_FOUR_JUMP_LIMIT) - ix86_avoid_jump_mispredicts (); -- -- ix86_align_loops (); - #endif - } - } -diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c b/gcc/testsuite/gcc.target/i386/pr116174.c -new file mode 100644 -index 00000000000..8877d0b51af ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116174.c -@@ -0,0 +1,12 @@ -+/* { dg-do compile { target *-*-linux* } } */ -+/* { dg-options "-O2 -fcf-protection=branch" } */ -+ -+char * -+foo (char *dest, const char *src) -+{ -+ while ((*dest++ = *src++) != '\0') -+ /* nothing */; -+ return --dest; -+} -+ -+/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */ --- -2.31.1 - diff --git a/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch b/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch deleted file mode 100644 index 36d17a6d397f68ab2c1a3b495ffe5d2a2ca16deb..0000000000000000000000000000000000000000 --- a/GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch +++ /dev/null @@ -1,119 +0,0 @@ -From 3f3f546bf830d019224aaf6cd349a1b9b738de1a Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Fri, 6 Sep 2024 05:24:07 -0700 -Subject: [PATCH] x86-64: Don't use temp for argument in a TImode register - -Don't use temp for a PARALLEL BLKmode argument of an EXPR_LIST expression -in a TImode register. Otherwise, the TImode variable will be put in -the GPR save area which guarantees only 8-byte alignment. - -gcc/ - - PR target/116621 - * config/i386/i386.cc (ix86_gimplify_va_arg): Don't use temp for - a PARALLEL BLKmode container of an EXPR_LIST expression in a - TImode register. - -gcc/testsuite/ - - PR target/116621 - * gcc.target/i386/pr116621.c: New test. - -Signed-off-by: H.J. Lu -(cherry picked from commit fa7bbb065c63aa802e0bbb04d605407dad58cf94) ---- - gcc/config/i386/i386.cc | 22 ++++++++++-- - gcc/testsuite/gcc.target/i386/pr116621.c | 43 ++++++++++++++++++++++++ - 2 files changed, 63 insertions(+), 2 deletions(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116621.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index feefbe322de..8f1c1f9ccd0 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -4893,13 +4893,31 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, - - examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); - -- need_temp = (!REG_P (container) -+ bool container_in_reg = false; -+ if (REG_P (container)) -+ container_in_reg = true; -+ else if (GET_CODE (container) == PARALLEL -+ && GET_MODE (container) == BLKmode -+ && XVECLEN (container, 0) == 1) -+ { -+ /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST -+ expression in a TImode register. In this case, temp isn't -+ needed. Otherwise, the TImode variable will be put in the -+ GPR save area which guarantees only 8-byte alignment. */ -+ rtx x = XVECEXP (container, 0, 0); -+ if (GET_CODE (x) == EXPR_LIST -+ && REG_P (XEXP (x, 0)) -+ && XEXP (x, 1) == const0_rtx) -+ container_in_reg = true; -+ } -+ -+ need_temp = (!container_in_reg - && ((needed_intregs && TYPE_ALIGN (type) > 64) - || TYPE_ALIGN (type) > 128)); - - /* In case we are passing structure, verify that it is consecutive block - on the register save area. If not we need to do moves. */ -- if (!need_temp && !REG_P (container)) -+ if (!need_temp && !container_in_reg) - { - /* Verify that all registers are strictly consecutive */ - if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) -diff --git a/gcc/testsuite/gcc.target/i386/pr116621.c b/gcc/testsuite/gcc.target/i386/pr116621.c -new file mode 100644 -index 00000000000..704266458a8 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116621.c -@@ -0,0 +1,43 @@ -+/* { dg-do run } */ -+/* { dg-options "-O2" } */ -+ -+#include -+#include -+ -+union S8302 -+{ -+ union -+ { -+ double b; -+ int c; -+ } a; -+ long double d; -+ unsigned short int f[5]; -+}; -+ -+union S8302 s8302; -+extern void check8302va (int i, ...); -+ -+int -+main (void) -+{ -+ memset (&s8302, '\0', sizeof (s8302)); -+ s8302.a.b = -221438.250000; -+ check8302va (1, s8302); -+ return 0; -+} -+ -+__attribute__((noinline, noclone)) -+void -+check8302va (int z, ...) -+{ -+ union S8302 arg, *p; -+ va_list ap; -+ -+ __builtin_va_start (ap, z); -+ p = &s8302; -+ arg = __builtin_va_arg (ap, union S8302); -+ if (p->a.b != arg.a.b) -+ __builtin_abort (); -+ __builtin_va_end (ap); -+} --- -2.31.1 - diff --git a/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch b/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch deleted file mode 100644 index a11b84a358b1662af3b8f826e981d5c7e8ca85b9..0000000000000000000000000000000000000000 --- a/GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 25cb153f93bb9ff3543ba8e31bbe7be4f6168aa4 Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Wed, 25 Sep 2024 16:39:04 +0800 -Subject: [PATCH] x86: Don't use address override with segment regsiter - -Address override only applies to the (reg32) part in the thread address -fs:(reg32). Don't rewrite thread address like - -(set (reg:CCZ 17 flags) - (compare:CCZ (reg:SI 98 [ __gmpfr_emax.0_1 ]) - (mem/c:SI (plus:SI (plus:SI (unspec:SI [ - (const_int 0 [0]) - ] UNSPEC_TP) - (reg:SI 107)) - (const:SI (unspec:SI [ - (symbol_ref:SI ("previous_emax") [flags 0x1a] ) - ] UNSPEC_DTPOFF))) [1 previous_emax+0 S4 A32]))) - -if address override is used to avoid the invalid memory operand like - - cmpl %fs:previous_emax@dtpoff(%eax), %r12d - -gcc/ - - PR target/116839 - * config/i386/i386.cc (ix86_rewrite_tls_address_1): Make it - static. Return if TLS address is thread register plus an integer - register. - -gcc/testsuite/ - - PR target/116839 - * gcc.target/i386/pr116839.c: New file. - -Signed-off-by: H.J. Lu -(cherry picked from commit c79cc30862d7255ca15884aa956d1ccfa279d86a) ---- - gcc/config/i386/i386.cc | 9 ++++- - gcc/testsuite/gcc.target/i386/pr116839.c | 48 ++++++++++++++++++++++++ - 2 files changed, 56 insertions(+), 1 deletion(-) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116839.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 8f1c1f9ccd0..93d05a301c9 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -12458,7 +12458,7 @@ ix86_tls_address_pattern_p (rtx op) - } - - /* Rewrite *LOC so that it refers to a default TLS address space. */ --void -+static void - ix86_rewrite_tls_address_1 (rtx *loc) - { - subrtx_ptr_iterator::array_type array; -@@ -12480,6 +12480,13 @@ ix86_rewrite_tls_address_1 (rtx *loc) - if (GET_CODE (u) == UNSPEC - && XINT (u, 1) == UNSPEC_TP) - { -+ /* NB: Since address override only applies to the -+ (reg32) part in fs:(reg32), return if address -+ override is used. */ -+ if (Pmode != word_mode -+ && REG_P (XEXP (*x, 1 - i))) -+ return; -+ - addr_space_t as = DEFAULT_TLS_SEG_REG; - - *x = XEXP (*x, 1 - i); -diff --git a/gcc/testsuite/gcc.target/i386/pr116839.c b/gcc/testsuite/gcc.target/i386/pr116839.c -new file mode 100644 -index 00000000000..e5df8256251 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116839.c -@@ -0,0 +1,48 @@ -+/* { dg-do compile { target { ! ia32 } } } */ -+/* { dg-require-effective-target maybe_x32 } */ -+/* { dg-options "-mx32 -O2 -fPIC -mtls-dialect=gnu2" } */ -+/* { dg-final { scan-assembler-not "cmpl\[ \t\]+%fs:previous_emax@dtpoff\\(%eax\\)" } } */ -+ -+typedef long mpfr_prec_t; -+typedef long mpfr_exp_t; -+typedef struct { -+ mpfr_prec_t _mpfr_prec; -+} __mpfr_struct; -+typedef __mpfr_struct mpfr_t[1]; -+extern _Thread_local mpfr_exp_t __gmpfr_emax; -+static _Thread_local mpfr_exp_t previous_emax; -+static _Thread_local mpfr_t bound_emax; -+extern const mpfr_t __gmpfr_const_log2_RNDD; -+extern const mpfr_t __gmpfr_const_log2_RNDU; -+ -+typedef enum { -+ MPFR_RNDN=0, -+ MPFR_RNDZ, -+ MPFR_RNDU, -+ MPFR_RNDD, -+ MPFR_RNDA, -+ MPFR_RNDF, -+ MPFR_RNDNA=-1 -+} mpfr_rnd_t; -+typedef __mpfr_struct *mpfr_ptr; -+typedef const __mpfr_struct *mpfr_srcptr; -+void mpfr_mul (mpfr_ptr, mpfr_srcptr, mpfr_rnd_t); -+ -+void -+foo (void) -+{ -+ mpfr_exp_t saved_emax; -+ -+ if (__gmpfr_emax != previous_emax) -+ { -+ saved_emax = __gmpfr_emax; -+ -+ bound_emax->_mpfr_prec = 32; -+ -+ mpfr_mul (bound_emax, saved_emax < 0 ? -+ __gmpfr_const_log2_RNDD : __gmpfr_const_log2_RNDU, -+ MPFR_RNDU); -+ previous_emax = saved_emax; -+ __gmpfr_emax = saved_emax; -+ } -+} --- -2.31.1 - diff --git a/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch b/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch deleted file mode 100644 index 969c80c8e1aa9eb60bb4a4c3f79cd785dddc41ad..0000000000000000000000000000000000000000 --- a/GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch +++ /dev/null @@ -1,77 +0,0 @@ -From 1032b72548c47a199e0407c69d6740d6c3341f43 Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Fri, 4 Oct 2024 16:21:15 +0800 -Subject: [PATCH] x86: Disable stack protector for naked functions - -Since naked functions should not enable stack protector, define -TARGET_STACK_PROTECT_RUNTIME_ENABLED_P to disable stack protector -for naked functions. - -gcc/ - - PR target/116962 - * config/i386/i386.cc (ix86_stack_protect_runtime_enabled_p): New - function. - (TARGET_STACK_PROTECT_RUNTIME_ENABLED_P): New. - -gcc/testsuite/ - - PR target/116962 - * gcc.target/i386/pr116962.c: New file. - -Signed-off-by: H.J. Lu -(cherry picked from commit 7d2845da112214f064e7b24531cc67e256b5177e) ---- - gcc/config/i386/i386.cc | 11 +++++++++++ - gcc/testsuite/gcc.target/i386/pr116962.c | 10 ++++++++++ - 2 files changed, 21 insertions(+) - create mode 100644 gcc/testsuite/gcc.target/i386/pr116962.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index 2a0a79888be..f8ab1893985 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -24265,6 +24265,13 @@ ix86_stack_protect_guard (void) - return default_stack_protect_guard (); - } - -+static bool -+ix86_stack_protect_runtime_enabled_p (void) -+{ -+ /* Naked functions should not enable stack protector. */ -+ return !ix86_function_naked (current_function_decl); -+} -+ - /* For 32-bit code we can save PIC register setup by using - __stack_chk_fail_local hidden function instead of calling - __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC -@@ -26582,6 +26589,10 @@ ix86_libgcc_floating_mode_supported_p - #undef TARGET_STACK_PROTECT_GUARD - #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard - -+#undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P -+#define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \ -+ ix86_stack_protect_runtime_enabled_p -+ - #if !TARGET_MACHO - #undef TARGET_STACK_PROTECT_FAIL - #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail -diff --git a/gcc/testsuite/gcc.target/i386/pr116962.c b/gcc/testsuite/gcc.target/i386/pr116962.c -new file mode 100644 -index 00000000000..ced16eee746 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr116962.c -@@ -0,0 +1,10 @@ -+/* { dg-do compile { target fstack_protector } } */ -+/* { dg-options "-O2 -fstack-protector-all" } */ -+/* { dg-final { scan-assembler-not "__stack_chk_fail" } } */ -+ -+__attribute__ ((naked)) -+void -+foo (void) -+{ -+ asm ("ret"); -+} --- -2.31.1 - diff --git a/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch b/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch deleted file mode 100644 index 23fed87d5122c2831b8d94498c010975f80f2295..0000000000000000000000000000000000000000 --- a/GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 5f47dc6e9aa82e1c00ed030cb9469cd84df8691d Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Tue, 11 Feb 2025 13:47:54 +0800 -Subject: [PATCH] x86: Correct ASM_OUTPUT_SYMBOL_REF - -x is not a macro argument. It just happens to work as final.cc passes -x for 2nd argument: - -final.cc: ASM_OUTPUT_SYMBOL_REF (file, x); - - PR target/118825 - * config/i386/i386.h (ASM_OUTPUT_SYMBOL_REF): Replace x with - SYM. - -Signed-off-by: H.J. Lu -(cherry picked from commit 7317fc0b03380a83ad03a5fc4fabef5f38c44c9d) ---- - gcc/config/i386/i386.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h -index 1c456c3422f..2fc82b175e6 100644 ---- a/gcc/config/i386/i386.h -+++ b/gcc/config/i386/i386.h -@@ -2229,7 +2229,7 @@ extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER]; - #define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \ - do { \ - const char *name \ -- = assemble_name_resolve (XSTR (x, 0)); \ -+ = assemble_name_resolve (XSTR (SYM, 0)); \ - /* In -masm=att wrap identifiers that start with $ \ - into parens. */ \ - if (ASSEMBLER_DIALECT == ASM_ATT \ --- -2.31.1 - diff --git a/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch b/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch deleted file mode 100644 index fb6767759511d4933550d95664b742edebc833f6..0000000000000000000000000000000000000000 --- a/GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch +++ /dev/null @@ -1,65 +0,0 @@ -From 0e4986a933e0f69b0d34cfefde117b510e4b09e7 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Wed, 26 Feb 2025 11:28:45 +0800 -Subject: [PATCH] i386: Treat Granite Rapids/Granite Rapids-D similar as - Sapphire Rapids in x86-tune.def - -Since GNR, GNR-D are both P-core based, we should treat them -just like SPR for now. - -gcc/ChangeLog: - - * config/i386/x86-tune.def - (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): Add GNR, GNR-D. - (X86_TUNE_AVOID_256FMA_CHAINS): Ditto. - (X86_TUNE_AVX512_MOVE_BY_PIECES): Ditto. - (X86_TUNE_AVX512_STORE_BY_PIECES): Ditto. ---- - gcc/config/i386/x86-tune.def | 13 ++++++++----- - 1 file changed, 8 insertions(+), 5 deletions(-) - -diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def -index 46e847589..0523a75a2 100644 ---- a/gcc/config/i386/x86-tune.def -+++ b/gcc/config/i386/x86-tune.def -@@ -87,8 +87,8 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, - several insns to break false dependency on the dest register for GLC - micro-architecture. */ - DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, -- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_CORE_HYBRID -- | m_CORE_ATOM) -+ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS -+ | m_GRANITERAPIDS_D | m_CORE_HYBRID | m_CORE_ATOM) - - /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies - are resolved on SSE register parts instead of whole registers, so we may -@@ -521,7 +521,8 @@ DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 - /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or - smaller FMA chain. */ - DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4 -- | m_CORE_HYBRID | m_SAPPHIRERAPIDS | m_CORE_ATOM | m_GENERIC) -+ | m_CORE_HYBRID | m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D -+ | m_CORE_ATOM | m_GENERIC) - - /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or - smaller FMA chain. */ -@@ -583,12 +584,14 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces", - /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit - AVX instructions. */ - DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces", -- m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) -+ m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D -+ | m_ZNVER4 | m_ZNVER5) - - /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit - AVX instructions. */ - DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces", -- m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5) -+ m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D -+ | m_ZNVER4 | m_ZNVER5) - - /*****************************************************************************/ - /*****************************************************************************/ --- -2.31.1 - diff --git a/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch b/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch deleted file mode 100644 index 444a6db885976247eddbbce2463cd496c6f6e1ce..0000000000000000000000000000000000000000 --- a/GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch +++ /dev/null @@ -1,138 +0,0 @@ -From 5ba6fdc5476d33c57f4751cae93054fdbc7211c0 Mon Sep 17 00:00:00 2001 -From: Haochen Jiang -Date: Mon, 24 Mar 2025 15:51:16 +0800 -Subject: [PATCH] i386: Add -mavx10.1 back with 512 bit alias - -When AVX10.1 options are added into GCC 14, E-core is supposed to -support up to 256 bit vector width, while P-core up to 512 bit vector -width. Therefore, we added avx10.1-256 and avx10.1-512 options into -compiler and alias avx10.1 to 256 bit for compatibility since there -will be real platforms with 256 bit only support. - -However, all the future platforms will now support 512 bit vector width, -including P-core and E-core. Therefore, we could alias avx10.1 directly -to 512 bit. However, avx10.1 alias to 256 bit has been there in GCC 14.1 -and GCC 14.2, so we have to raise a warning since GCC 14.3 for this -behavior change. - -While backporting the patch from GCC 15, we choose to only warn when -users use -mavx10.1 option in order not to interrupt the usage of other -options since -mavx10.1-256/512 and -mevex512 will be dropped in GCC 16. -There is no need to warn them this early in GCC 14 to overwhelm users. - -gcc/ChangeLog: - - * common/config/i386/i386-isas.h: Add avx10.1. - * config/i386/i386-c.cc (ix86_target_macros_internal): Ditto. - * config/i386/i386-options.cc - (ix86_valid_target_attribute_inner_p): Ditto. - * config/i386/i386.opt: Ditto. - * config/i386/i386.opt.urls: Ditto. - * doc/extend.texi: Ditto. - * doc/sourcebuild.texi: Ditto. ---- - gcc/common/config/i386/i386-isas.h | 1 + - gcc/config/i386/i386-c.cc | 5 +---- - gcc/config/i386/i386-options.cc | 1 + - gcc/config/i386/i386.opt | 5 +++++ - gcc/config/i386/i386.opt.urls | 3 +++ - gcc/doc/extend.texi | 6 ++++++ - gcc/doc/sourcebuild.texi | 3 +++ - 7 files changed, 20 insertions(+), 4 deletions(-) - -diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h -index 017c795e211..cdbc188620a 100644 ---- a/gcc/common/config/i386/i386-isas.h -+++ b/gcc/common/config/i386/i386-isas.h -@@ -195,4 +195,5 @@ ISA_NAMES_TABLE_START - ISA_NAMES_TABLE_ENTRY("usermsr", FEATURE_USER_MSR, P_NONE, "-musermsr") - ISA_NAMES_TABLE_ENTRY("avx10.1-256", FEATURE_AVX10_1_256, P_AVX10_1_256, "-mavx10.1-256") - ISA_NAMES_TABLE_ENTRY("avx10.1-512", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1-512") -+ ISA_NAMES_TABLE_ENTRY("avx10.1", FEATURE_AVX10_1_512, P_AVX10_1_512, "-mavx10.1") - ISA_NAMES_TABLE_END -diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc -index 07f4936ba91..0103b8543a1 100644 ---- a/gcc/config/i386/i386-c.cc -+++ b/gcc/config/i386/i386-c.cc -@@ -743,10 +743,7 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, - if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR) - def_or_undef (parse_in, "__USER_MSR__"); - if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256) -- { -- def_or_undef (parse_in, "__AVX10_1_256__"); -- def_or_undef (parse_in, "__AVX10_1__"); -- } -+ def_or_undef (parse_in, "__AVX10_1_256__"); - if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_512) - def_or_undef (parse_in, "__AVX10_1_512__"); - if (isa_flag2 & OPTION_MASK_ISA2_APX_F) -diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc -index 11c6ddf0f44..57c83a9aa1c 100644 ---- a/gcc/config/i386/i386-options.cc -+++ b/gcc/config/i386/i386-options.cc -@@ -1137,6 +1137,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], - IX86_ATTR_ISA ("usermsr", OPT_musermsr), - IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256), - IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512), -+ IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1_512), - - /* enum options */ - IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), -diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt -index f99c4e3ae5d..df95963dd9f 100644 ---- a/gcc/config/i386/i386.opt -+++ b/gcc/config/i386/i386.opt -@@ -1380,3 +1380,8 @@ mavx10.1-512 - Target Mask(ISA2_AVX10_1_512) Var(ix86_isa_flags2) Save - Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, - and AVX10.1-512 built-in functions and code generation. -+ -+mavx10.1 -+Target Alias(mavx10.1-512) Warn(%<-mavx10.1%> is aliased to 512 bit since GCC14.3) -+Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, -+and AVX10.1-512 built-in functions and code generation. -diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls -index 3ed76635002..81c5bb9a927 100644 ---- a/gcc/config/i386/i386.opt.urls -+++ b/gcc/config/i386/i386.opt.urls -@@ -615,3 +615,6 @@ UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256) - mavx10.1-512 - UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512) - -+mavx10.1 -+UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1) -+ -diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi -index 8bd30bb2a46..27fefb30041 100644 ---- a/gcc/doc/extend.texi -+++ b/gcc/doc/extend.texi -@@ -7395,6 +7395,12 @@ Disable the generation of the AVX10.1 instructions. - Enable the generation of the AVX10.1 instructions with 512 bit support. - Disable the generation of the AVX10.1 instructions. - -+@cindex @code{target("avx10.1")} function attribute, x86 -+@item avx10.1 -+@itemx no-avx10.1 -+Enable the generation of the AVX10.1 instructions with 512 bit support. -+Disable the generation of the AVX10.1 instructions. -+ - @cindex @code{target("cld")} function attribute, x86 - @item cld - @itemx no-cld -diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi -index 5b026cfe073..23dedef4161 100644 ---- a/gcc/doc/sourcebuild.texi -+++ b/gcc/doc/sourcebuild.texi -@@ -2549,6 +2549,9 @@ Target supports the execution of @code{avx10.1-256} instructions. - @item avx10.1-512 - Target supports the execution of @code{avx10.1-512} instructions. - -+@item avx10.1 -+Target supports the execution of @code{avx10.1} instructions. -+ - @item avx2 - Target supports compiling @code{avx2} instructions. - --- -2.31.1 - diff --git a/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch b/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch deleted file mode 100644 index 1aceb888f4f212fcbb9f78ac4526563b026e98f3..0000000000000000000000000000000000000000 --- a/GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch +++ /dev/null @@ -1,154 +0,0 @@ -From d275b3748a23aa4b6b821ae3bdf1751010923773 Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Tue, 27 Aug 2024 07:03:22 -0700 -Subject: [PATCH] Extend check-function-bodies to allow label and directives - -As PR target/116174 shown, we may need to verify labels and the directive -order. Extend check-function-bodies to support matched output lines to -allow label and directives. - -gcc/ - - * doc/sourcebuild.texi (check-function-bodies): Add an optional - argument for matched output lines. - -gcc/testsuite/ - - * gcc.target/i386/pr116174.c: Use check-function-bodies. - * lib/scanasm.exp (parse_function_bodies): Append the line if - $up_config(matched) matches the line. - (check-function-bodies): Add an argument for matched. Set - up_config(matched) to $matched. Append the expected line without - $config(line_prefix) to function_regexp if it starts with ".L". - -Signed-off-by: H.J. Lu -(cherry picked from commit d6bb1e257fc414d21bc31faa7ddecbc93a197e3c) ---- - gcc/doc/sourcebuild.texi | 9 ++++++--- - gcc/testsuite/gcc.target/i386/pr116174.c | 18 +++++++++++++++--- - gcc/testsuite/lib/scanasm.exp | 15 +++++++++++++-- - 3 files changed, 34 insertions(+), 8 deletions(-) - -diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi -index 23dedef4161..c8130dc1ba9 100644 ---- a/gcc/doc/sourcebuild.texi -+++ b/gcc/doc/sourcebuild.texi -@@ -3440,7 +3440,7 @@ assembly output. - Passes if @var{symbol} is not defined as a hidden symbol in the test's - assembly output. - --@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ target/xfail @var{selector} @}]] -+@item check-function-bodies @var{prefix} @var{terminator} [@var{options} [@{ target/xfail @var{selector} @} [@var{matched}]]] - Looks through the source file for comments that give the expected assembly - output for selected functions. Each line of expected output starts with the - prefix string @var{prefix} and the expected output for a function as a whole -@@ -3467,8 +3467,11 @@ Depending on the configuration (see - @code{configure_check-function-bodies} in - @file{gcc/testsuite/lib/scanasm.exp}), the test may discard from the - compiler's assembly output directives such as @code{.cfi_startproc}, --local label definitions such as @code{.LFB0}, and more. --It then matches the result against the expected -+local label definitions such as @code{.LFB0}, and more. This behavior -+can be overridden using the optional @var{matched} argument, which -+specifies a regexp for lines that should not be discarded in this way. -+ -+The test then matches the result against the expected - output for a function as a single regular expression. This means that - later lines can use backslashes to refer back to @samp{(@dots{})} - captures on earlier lines. For example: -diff --git a/gcc/testsuite/gcc.target/i386/pr116174.c b/gcc/testsuite/gcc.target/i386/pr116174.c -index 8877d0b51af..686aeb9ff31 100644 ---- a/gcc/testsuite/gcc.target/i386/pr116174.c -+++ b/gcc/testsuite/gcc.target/i386/pr116174.c -@@ -1,6 +1,20 @@ - /* { dg-do compile { target *-*-linux* } } */ --/* { dg-options "-O2 -fcf-protection=branch" } */ -+/* { dg-options "-O2 -g0 -fcf-protection=branch" } */ -+/* Keep labels and directives ('.p2align', '.cfi_startproc'). -+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ - -+/* -+**foo: -+**.LFB0: -+** .cfi_startproc -+** ( -+** endbr64 -+** .p2align 5 -+** | -+** endbr32 -+** ) -+**... -+*/ - char * - foo (char *dest, const char *src) - { -@@ -8,5 +22,3 @@ foo (char *dest, const char *src) - /* nothing */; - return --dest; - } -- --/* { dg-final { scan-assembler "\t\.cfi_startproc\n\tendbr(32|64)\n" } } */ -diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp -index 6cf9997240d..d1c8e3b5079 100644 ---- a/gcc/testsuite/lib/scanasm.exp -+++ b/gcc/testsuite/lib/scanasm.exp -@@ -952,6 +952,9 @@ proc parse_function_bodies { config filename result } { - verbose "parse_function_bodies: $function_name:\n$function_body" - set up_result($function_name) $function_body - set in_function 0 -+ } elseif { $up_config(matched) ne "" \ -+ && [regexp $up_config(matched) $line] } { -+ append function_body $line "\n" - } elseif { [regexp $up_config(fluff) $line] } { - verbose "parse_function_bodies: $function_name: ignoring fluff line: $line" - } else { -@@ -982,7 +985,7 @@ proc check_function_body { functions name body_regexp } { - - # Check the implementations of functions against expected output. Used as: - # --# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR]] } } -+# { dg-do { check-function-bodies PREFIX TERMINATOR[ OPTION[ SELECTOR [MATCHED]]] } } - # - # See sourcebuild.texi for details. - -@@ -990,7 +993,7 @@ proc check-function-bodies { args } { - if { [llength $args] < 2 } { - error "too few arguments to check-function-bodies" - } -- if { [llength $args] > 4 } { -+ if { [llength $args] > 5 } { - error "too many arguments to check-function-bodies" - } - -@@ -1029,6 +1032,11 @@ proc check-function-bodies { args } { - } - } - -+ set matched "" -+ if { [llength $args] >= 5 } { -+ set matched [lindex $args 4] -+ } -+ - set testcase [testname-for-summary] - # The name might include a list of options; extract the file name. - set filename [lindex $testcase 0] -@@ -1048,6 +1056,7 @@ proc check-function-bodies { args } { - # (name in \1). This may be different from '$config(start)'. - set start_expected {^(\S+):$} - -+ set config(matched) $matched - configure_check-function-bodies config - set have_bodies 0 - if { [is_remote host] } { -@@ -1090,6 +1099,8 @@ proc check-function-bodies { args } { - append function_regexp ")" - } elseif { [string equal $line "..."] } { - append function_regexp ".*" -+ } elseif { [regexp {^\.L} $line] } { -+ append function_regexp $line "\n" - } else { - append function_regexp $config(line_prefix) $line "\n" - } --- -2.31.1 - diff --git a/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch b/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch deleted file mode 100644 index 121bae33fda57254a7893f455373cdc3bd1c4619..0000000000000000000000000000000000000000 --- a/GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch +++ /dev/null @@ -1,253 +0,0 @@ -From a33e2808c8987dcd422c5156c47bcf672ddc7f9f Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Sun, 13 Apr 2025 12:20:42 -0700 -Subject: [PATCH] APX: Don't use red-zone with 32 GPRs and no caller-saved - registers - -Don't use red-zone when there are no caller-saved registers with 32 GPRs -since 128-byte red-zone is too small for 31 GPRs. - -gcc/ - - PR target/119784 - * config/i386/i386.cc (ix86_using_red_zone): Don't use red-zone - with 32 GPRs and no caller-saved registers. - -gcc/testsuite/ - - PR target/119784 - * gcc.target/i386/pr119784a.c: New test. - * gcc.target/i386/pr119784b.c: Likewise. - -Signed-off-by: H.J. Lu -(cherry picked from commit 0a074b8c7e79f9d9359d044f1499b0a9ce9d2801) ---- - gcc/config/i386/i386.cc | 6 ++ - gcc/testsuite/gcc.target/i386/pr119784a.c | 96 +++++++++++++++++++++++ - gcc/testsuite/gcc.target/i386/pr119784b.c | 87 ++++++++++++++++++++ - 3 files changed, 189 insertions(+) - create mode 100644 gcc/testsuite/gcc.target/i386/pr119784a.c - create mode 100644 gcc/testsuite/gcc.target/i386/pr119784b.c - -diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc -index dde4ba5ca19..ce726f40f09 100644 ---- a/gcc/config/i386/i386.cc -+++ b/gcc/config/i386/i386.cc -@@ -444,6 +444,9 @@ int ix86_arch_specified; - indirect thunk pushes the return address onto stack, destroying - red-zone. - -+ NB: Don't use red-zone for functions with no_caller_saved_registers -+ and 32 GPRs since 128-byte red-zone is too small for 31 GPRs. -+ - TODO: If we can reserve the first 2 WORDs, for PUSH and, another - for CALL, in red-zone, we can allow local indirect jumps with - indirect thunk. */ -@@ -453,6 +456,9 @@ ix86_using_red_zone (void) - { - return (TARGET_RED_ZONE - && !TARGET_64BIT_MS_ABI -+ && (!TARGET_APX_EGPR -+ || (cfun->machine->call_saved_registers -+ != TYPE_NO_CALLER_SAVED_REGISTERS)) - && (!cfun->machine->has_local_indirect_jump - || cfun->machine->indirect_branch_type == indirect_branch_keep)); - } -diff --git a/gcc/testsuite/gcc.target/i386/pr119784a.c b/gcc/testsuite/gcc.target/i386/pr119784a.c -new file mode 100644 -index 00000000000..8a119d4cc1f ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr119784a.c -@@ -0,0 +1,96 @@ -+/* { dg-do compile { target { *-*-linux* && lp64 } } } */ -+/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ -+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ -+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ -+ -+/* start must save and restore all caller saved registers. */ -+ -+/* -+**start: -+**.LFB[0-9]+: -+** .cfi_startproc -+** subq \$248, %rsp -+**... -+** movq %rax, \(%rsp\) -+** movq %rdx, 8\(%rsp\) -+** movq %rcx, 16\(%rsp\) -+** movq %rbx, 24\(%rsp\) -+** movq %rsi, 32\(%rsp\) -+** movq %rdi, 40\(%rsp\) -+**... -+** movq %rbp, 48\(%rsp\) -+** movq %r8, 56\(%rsp\) -+** movq %r9, 64\(%rsp\) -+** movq %r10, 72\(%rsp\) -+** movq %r11, 80\(%rsp\) -+** movq %r12, 88\(%rsp\) -+** movq %r13, 96\(%rsp\) -+** movq %r14, 104\(%rsp\) -+** movq %r15, 112\(%rsp\) -+** movq %r16, 120\(%rsp\) -+** movq %r17, 128\(%rsp\) -+** movq %r18, 136\(%rsp\) -+** movq %r19, 144\(%rsp\) -+** movq %r20, 152\(%rsp\) -+** movq %r21, 160\(%rsp\) -+** movq %r22, 168\(%rsp\) -+** movq %r23, 176\(%rsp\) -+** movq %r24, 184\(%rsp\) -+** movq %r25, 192\(%rsp\) -+** movq %r26, 200\(%rsp\) -+** movq %r27, 208\(%rsp\) -+** movq %r28, 216\(%rsp\) -+** movq %r29, 224\(%rsp\) -+** movq %r30, 232\(%rsp\) -+** movq %r31, 240\(%rsp\) -+**... -+** call \*code\(%rip\) -+** movq \(%rsp\), %rax -+** movq 8\(%rsp\), %rdx -+** movq 16\(%rsp\), %rcx -+** movq 24\(%rsp\), %rbx -+** movq 32\(%rsp\), %rsi -+** movq 40\(%rsp\), %rdi -+** movq 48\(%rsp\), %rbp -+** movq 56\(%rsp\), %r8 -+** movq 64\(%rsp\), %r9 -+** movq 72\(%rsp\), %r10 -+** movq 80\(%rsp\), %r11 -+** movq 88\(%rsp\), %r12 -+** movq 96\(%rsp\), %r13 -+** movq 104\(%rsp\), %r14 -+** movq 112\(%rsp\), %r15 -+** movq 120\(%rsp\), %r16 -+** movq 128\(%rsp\), %r17 -+** movq 136\(%rsp\), %r18 -+** movq 144\(%rsp\), %r19 -+** movq 152\(%rsp\), %r20 -+** movq 160\(%rsp\), %r21 -+** movq 168\(%rsp\), %r22 -+** movq 176\(%rsp\), %r23 -+** movq 184\(%rsp\), %r24 -+** movq 192\(%rsp\), %r25 -+** movq 200\(%rsp\), %r26 -+** movq 208\(%rsp\), %r27 -+** movq 216\(%rsp\), %r28 -+** movq 224\(%rsp\), %r29 -+** movq 232\(%rsp\), %r30 -+** movq 240\(%rsp\), %r31 -+** addq \$248, %rsp -+**... -+** ret -+** .cfi_endproc -+**... -+*/ -+ -+#define DONT_SAVE_REGS __attribute__((no_callee_saved_registers)) -+#define SAVE_REGS __attribute__((no_caller_saved_registers)) -+ -+typedef DONT_SAVE_REGS void (*op_t)(void); -+ -+extern op_t code[]; -+ -+SAVE_REGS void start() -+{ -+ code[0](); -+} -diff --git a/gcc/testsuite/gcc.target/i386/pr119784b.c b/gcc/testsuite/gcc.target/i386/pr119784b.c -new file mode 100644 -index 00000000000..c6761976ed6 ---- /dev/null -+++ b/gcc/testsuite/gcc.target/i386/pr119784b.c -@@ -0,0 +1,87 @@ -+/* { dg-do compile { target { *-*-linux* && x32 } } } */ -+/* { dg-options "-O2 -fno-pic -mtune=generic -mgeneral-regs-only -mapxf -mtune-ctrl=prologue_using_move,epilogue_using_move" } */ -+/* Keep labels and directives ('.cfi_startproc', '.cfi_endproc'). */ -+/* { dg-final { check-function-bodies "**" "" "" { target "*-*-*" } {^\t?\.} } } */ -+ -+/* start must save and restore all caller saved registers. */ -+ -+/* -+**start: -+**.LFB[0-9]+: -+** .cfi_startproc -+** subl \$248, %esp -+**... -+** movq %rax, \(%rsp\) -+** movq %rdx, 8\(%rsp\) -+** movq %rcx, 16\(%rsp\) -+** movq %rbx, 24\(%rsp\) -+** movq %rsi, 32\(%rsp\) -+** movq %rdi, 40\(%rsp\) -+**... -+** movq %rbp, 48\(%rsp\) -+** movq %r8, 56\(%rsp\) -+** movq %r9, 64\(%rsp\) -+** movq %r10, 72\(%rsp\) -+** movq %r11, 80\(%rsp\) -+** movq %r12, 88\(%rsp\) -+** movq %r13, 96\(%rsp\) -+** movq %r14, 104\(%rsp\) -+** movq %r15, 112\(%rsp\) -+** movq %r16, 120\(%rsp\) -+** movq %r17, 128\(%rsp\) -+** movq %r18, 136\(%rsp\) -+** movq %r19, 144\(%rsp\) -+** movq %r20, 152\(%rsp\) -+** movq %r21, 160\(%rsp\) -+** movq %r22, 168\(%rsp\) -+** movq %r23, 176\(%rsp\) -+** movq %r24, 184\(%rsp\) -+** movq %r25, 192\(%rsp\) -+** movq %r26, 200\(%rsp\) -+** movq %r27, 208\(%rsp\) -+** movq %r28, 216\(%rsp\) -+** movq %r29, 224\(%rsp\) -+** movq %r30, 232\(%rsp\) -+** movq %r31, 240\(%rsp\) -+**... -+** movl code\(%rip\), %ebp -+** call \*%rbp -+** movq \(%rsp\), %rax -+** movq 8\(%rsp\), %rdx -+** movq 16\(%rsp\), %rcx -+** movq 24\(%rsp\), %rbx -+** movq 32\(%rsp\), %rsi -+** movq 40\(%rsp\), %rdi -+** movq 48\(%rsp\), %rbp -+** movq 56\(%rsp\), %r8 -+** movq 64\(%rsp\), %r9 -+** movq 72\(%rsp\), %r10 -+** movq 80\(%rsp\), %r11 -+** movq 88\(%rsp\), %r12 -+** movq 96\(%rsp\), %r13 -+** movq 104\(%rsp\), %r14 -+** movq 112\(%rsp\), %r15 -+** movq 120\(%rsp\), %r16 -+** movq 128\(%rsp\), %r17 -+** movq 136\(%rsp\), %r18 -+** movq 144\(%rsp\), %r19 -+** movq 152\(%rsp\), %r20 -+** movq 160\(%rsp\), %r21 -+** movq 168\(%rsp\), %r22 -+** movq 176\(%rsp\), %r23 -+** movq 184\(%rsp\), %r24 -+** movq 192\(%rsp\), %r25 -+** movq 200\(%rsp\), %r26 -+** movq 208\(%rsp\), %r27 -+** movq 216\(%rsp\), %r28 -+** movq 224\(%rsp\), %r29 -+** movq 232\(%rsp\), %r30 -+** movq 240\(%rsp\), %r31 -+** addl \$248, %esp -+**... -+** ret -+** .cfi_endproc -+**... -+*/ -+ -+#include "pr119784a.c" --- -2.31.1 - diff --git a/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch b/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch deleted file mode 100644 index db8c931eb92be49afbbd3f9ce95d745dc5a38b78..0000000000000000000000000000000000000000 --- a/GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch +++ /dev/null @@ -1,43 +0,0 @@ -From 8cc672d3d3a2f090d840fb2a8c344cf927715d6c Mon Sep 17 00:00:00 2001 -From: "H.J. Lu" -Date: Mon, 14 Apr 2025 15:49:26 -0700 -Subject: [PATCH] x86: Update gcc.target/i386/apx-interrupt-1.c - -ix86_add_cfa_restore_note omits the REG_CFA_RESTORE REG note for registers -pushed in red-zone. Since - -commit 0a074b8c7e79f9d9359d044f1499b0a9ce9d2801 -Author: H.J. Lu -Date: Sun Apr 13 12:20:42 2025 -0700 - - APX: Don't use red-zone with 32 GPRs and no caller-saved registers - -disabled red-zone, update gcc.target/i386/apx-interrupt-1.c to expect -31 .cfi_restore directives. - - PR target/119784 - * gcc.target/i386/apx-interrupt-1.c: Expect 31 .cfi_restore - directives. - -Signed-off-by: H.J. Lu -(cherry picked from commit 5ed2fa4768f3d318b8ace5bd4a095596e06fad7b) ---- - gcc/testsuite/gcc.target/i386/apx-interrupt-1.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c -index fefe2e6d6fc..fa1acc7a142 100644 ---- a/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c -+++ b/gcc/testsuite/gcc.target/i386/apx-interrupt-1.c -@@ -66,7 +66,7 @@ void foo (void *frame) - /* { dg-final { scan-assembler-times {\t\.cfi_offset 132, -120} 1 } } */ - /* { dg-final { scan-assembler-times {\t\.cfi_offset 131, -128} 1 } } */ - /* { dg-final { scan-assembler-times {\t\.cfi_offset 130, -136} 1 } } */ --/* { dg-final { scan-assembler-times ".cfi_restore" 15} } */ -+/* { dg-final { scan-assembler-times ".cfi_restore" 31 } } */ - /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)ax" 1 } } */ - /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)bx" 1 } } */ - /* { dg-final { scan-assembler-times "pop(?:l|q)\[\\t \]*%(?:e|r)cx" 1 } } */ --- -2.31.1 - diff --git a/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch b/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch deleted file mode 100644 index d73a4a0d7dc7f65593065c1e47bc90fec6fb3f5e..0000000000000000000000000000000000000000 --- a/GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 058f489e8223db7b2b5ebf386580e3f407638382 Mon Sep 17 00:00:00 2001 -From: liuhongt -Date: Mon, 28 Apr 2025 07:45:50 -0700 -Subject: [PATCH] Remove other processors from X86_TUNE_DEST_FALSE_DEP_FOR_GLC - except GLC - -Since the tune if only for GLC(sapphirerapids and alderlake-P). - -gcc/ChangeLog: - - * config/i386/x86-tune.def (X86_TUNE_DEST_FALSE_DEP_FOR_GLC): - Remove other processor except for GLC since this one is only - for GLC. - -(cherry picked from commit 1ad6e171b126a82f38b1e8cbfd207f1d91c58a59) ---- - gcc/config/i386/x86-tune.def | 3 +-- - 1 file changed, 1 insertion(+), 2 deletions(-) - -diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def -index 0523a75a2..f90d270fd 100644 ---- a/gcc/config/i386/x86-tune.def -+++ b/gcc/config/i386/x86-tune.def -@@ -87,8 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY, - several insns to break false dependency on the dest register for GLC - micro-architecture. */ - DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC, -- "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_GRANITERAPIDS -- | m_GRANITERAPIDS_D | m_CORE_HYBRID | m_CORE_ATOM) -+ "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE) - - /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies - are resolved on SSE register parts instead of whole registers, so we may --- -2.31.1 - diff --git a/gcc-14.2.0.tar.xz b/gcc-14.3.0.tar.xz similarity index 83% rename from gcc-14.2.0.tar.xz rename to gcc-14.3.0.tar.xz index 89e6b270fa1d51677b20483e4ff93e6b8e7d0f5d..ce96da0cdccf60678c2922b56e28b1f1567e05c8 100644 Binary files a/gcc-14.2.0.tar.xz and b/gcc-14.3.0.tar.xz differ diff --git a/gcc-14.spec b/gcc-14.spec index 2e6f9ccc360d069362ba4cda082a62fefb63cc74..bb3e99ec708ed67d392196436d353dd08b972ce6 100644 --- a/gcc-14.spec +++ b/gcc-14.spec @@ -89,13 +89,13 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: %{?_scl_prefix}gcc%{gcc_ver} -Version: 14.2.1 -Release: 8 +Version: 14.3.1 +Release: 1 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have # GCC Runtime Exception. License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD ExcludeArch: loongarch64 -Source0: https://ftp.gnu.org/gnu/gcc/gcc-14.2.0/gcc-14.2.0.tar.xz +Source0: https://ftp.gnu.org/gnu/gcc/gcc-14.3.0/gcc-14.3.0.tar.xz URL: http://gcc.gnu.org @@ -171,41 +171,8 @@ Provides: %{?_scl_prefix}gcc%{gcc_ver}(major) = %{gcc_major} Patch1001: GCC14-1001-libstdc++-compat.patch Patch1002: GCC14-1002-change-gcc-version.patch -Patch1003: GCC14-1003-i386-Add-non-optimize-prefetchi-intrins.patch Patch1004: GCC14-1004-riscv-lib64.patch Patch1005: GCC14-1005-libstdc-compat-Update-symbol-list-for-RISC-V-64.patch -Patch1006: GCC14-1006-Refine-constraint-Bk-to-define_special_memory_constr.patch -Patch1007: GCC14-1007-i386-Fix-some-vex-insns-that-prohibit-egpr.patch -Patch1008: GCC14-1008-Align-ix86_-move_max-store_max-with-vectorizer.patch -Patch1009: GCC14-1009-Check-avx-upper-register-for-parallel.patch -Patch1010: GCC14-1010-i386-Fix-vfpclassph-non-optimizied-intrin.patch -Patch1011: GCC14-1011-doc-Enhance-Intel-CPU-documentation.patch -Patch1012: GCC14-1012-doc-Add-more-alias-option-and-reorder-Intel-CPU-marc.patch -Patch1013: GCC14-1013-Add-new-microarchitecture-tune-for-SRF-GRR-CWF.patch -Patch1014: GCC14-1014-Refine-splitters-related-to-combine-vpcmpuw-zero_ext.patch -Patch1015: GCC14-1015-Fix-ICE-due-to-isa-mismatch-for-the-builtins.patch -Patch1016: GCC14-1016-Fix-ICE-due-to-subreg-us_truncate.patch -Patch1017: GCC14-1017-APX-PPX-Avoid-generating-unmatched-pushp-popp-in-pro.patch -Patch1018: GCC14-1018-i386-Do-not-allow-pointer-conversion-for-CMPccXADD-i.patch -Patch1019: GCC14-1019-i386-Add-OPTION_MASK_ISA2_EVEX512-for-some-AVX512-in.patch -Patch1020: GCC14-1020-i386-Modify-regexp-of-pr117304-1.c.patch -Patch1021: GCC14-1021-i386-Add-new-model-number-for-Arrow-Lake.patch -Patch1022: GCC14-1022-i386-Zero-extend-32-bit-address-to-64-bit-with-optio.patch -Patch1023: GCC14-1023-Fix-uninitialized-operands-2-in-vec_unpacks_hi_v4sf.patch -Patch1024: GCC14-1024-i386-Fix-AVX512BW-intrin-header-with-__OPTIMIZE__-PR.patch -Patch1025: GCC14-1025-i386-Do-not-check-vector-size-conflict-when-AVX512-i.patch -Patch1026: GCC14-1026-i386-Deprecate-m-no-avx10.1-and-make-mno-avx10.1-512.patch -Patch1027: GCC14-1027-Move-ix86_align_loops-into-a-separate-pass-and-inser.patch -Patch1028: GCC14-1028-x86-64-Don-t-use-temp-for-argument-in-a-TImode-regis.patch -Patch1029: GCC14-1029-x86-Don-t-use-address-override-with-segment-regsiter.patch -Patch1030: GCC14-1030-x86-Disable-stack-protector-for-naked-functions.patch -Patch1031: GCC14-1031-x86-Correct-ASM_OUTPUT_SYMBOL_REF.patch -Patch1032: GCC14-1032-i386-Treat-Granite-Rapids-Granite-Rapids-D-similar-a.patch -Patch1033: GCC14-1033-i386-Add-mavx10.1-back-with-512-bit-alias.patch -Patch1034: GCC14-1034-Extend-check-function-bodies-to-allow-label-and-dire.patch -Patch1035: GCC14-1035-APX-Don-t-use-red-zone-with-32-GPRs-and-no-caller-sa.patch -Patch1036: GCC14-1036-x86-Update-gcc.target-i386-apx-interrupt-1.c.patch -Patch1037: GCC14-1037-Remove-other-processors-from-X86_TUNE_DEST_FALSE_DEP.patch # On ARM EABI systems, we do want -gnueabi to be part of the # target triple. @@ -499,7 +466,7 @@ Requires: %{?_scl_prefix}liblsan%{gcc_ver} = %{version}-%{release} This package contains Leak Sanitizer static runtime library. %prep -%autosetup -p1 -n gcc-14.2.0 +%autosetup -p1 -n gcc-14.3.0 echo '%{vendor} %{version}-%{release}' > gcc/DEV-PHASE @@ -2236,6 +2203,9 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Tue Jun 17 2025 jchzhou - 14.3.1-1 +- Update to 14.3.1 + * Fri May 30 2025 Hu, Lin - 14.2.1-8 - [Sync] Sync patches from gcc.gnu.org's releases/gcc-14