From 5335700c03c24b9be05496b986de032f28ad75a7 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Wed, 28 Jul 2021 11:42:49 +0800 Subject: [PATCH] [Sync] Sync patch from openeuler/gcc Sync patch from openeuler/gcc - 20210728 --- ...admath-Enable-libquadmath-on-kunpeng.patch | 473 +++++++++++++++++ ...Extend-to-check-non-trapping-for-mor.patch | 318 +++++++++++ 0003-version-Set-version-to-10.3.1.patch | 31 ++ ...timization-Avoid-issueing-loads-in-S.patch | 138 +++++ ...-optimization-Fix-load-eliding-in-SM.patch | 66 +++ ...-simdmath-Enable-simdmath-on-kunpeng.patch | 289 ++++++++++ ...ping-vectorization-on-reduction-chai.patch | 68 +++ ...timization-Add-checks-to-avoid-spoil.patch | 97 ++++ ...Simplify-removing-subregs-when-expan.patch | 141 +++++ ...timization-94963-avoid-bogus-uninit-.patch | 98 ++++ ...64-bits-simd-when-test-simd_pcs_attr.patch | 23 + ...-fp-model-Enable-fp-model-on-kunpeng.patch | 397 ++++++++++++++ ...undant-loop-elimination-optimization.patch | 499 ++++++++++++++++++ gcc.spec | 44 +- 14 files changed, 2677 insertions(+), 5 deletions(-) create mode 100644 0001-libquadmath-Enable-libquadmath-on-kunpeng.patch create mode 100644 0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch create mode 100644 0003-version-Set-version-to-10.3.1.patch create mode 100644 0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch create mode 100644 0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch create mode 100644 0006-simdmath-Enable-simdmath-on-kunpeng.patch create mode 100644 0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch create mode 100644 0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch create mode 100644 0009-Backport-expand-Simplify-removing-subregs-when-expan.patch create mode 100644 0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch create mode 100644 0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch create mode 100644 0012-fp-model-Enable-fp-model-on-kunpeng.patch create mode 100644 0013-LoopElim-Redundant-loop-elimination-optimization.patch diff --git a/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch b/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch new file mode 100644 index 0000000..33dd94e --- /dev/null +++ b/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch @@ -0,0 +1,473 @@ +From 85740d3cc56fda699beae689b5d73233d16097af Mon Sep 17 00:00:00 2001 +From: bule +Date: Thu, 8 Jul 2021 11:52:47 +0800 +Subject: [PATCH 01/13] [libquadmath] Enable libquadmath on kunpeng + +This enable libquadmath on kunpeng platform to convenient +users that migrating from x86 platform. libquadmath uses "__float128" +as quad precision floating point type and with math functions with "q" +suffix like "cosq". For those who do not need to adapt to x86 platform, +you can use "long double" as quad precision floating point type and math +functions with "l" suffix like "cosl" in libm for quad precision math. + +diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in +index 8c011212258..66df9c922f8 100644 +--- a/libquadmath/Makefile.in ++++ b/libquadmath/Makefile.in +@@ -90,7 +90,7 @@ POST_UNINSTALL = : + build_triplet = @build@ + host_triplet = @host@ + target_triplet = @target@ +-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES = ++#libquadmath_la_DEPENDENCIES = + subdir = . + ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 + am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \ +@@ -147,68 +147,68 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \ + "$(DESTDIR)$(libsubincludedir)" + LTLIBRARIES = $(toolexeclib_LTLIBRARIES) + am__dirstamp = $(am__leading_dot)dirstamp +-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/acoshq.lo math/fmodq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/acosq.lo math/frexpq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.lo math/asinhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/hypotq.lo math/remainderq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/asinq.lo math/rintq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/atan2q.lo math/isinfq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/roundq.lo math/atanhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/isnanq.lo math/scalblnq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/atanq.lo math/j0q.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/scalbnq.lo math/cbrtq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/j1q.lo math/signbitq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.lo math/jnq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/sincos_table.lo math/complex.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/ldexpq.lo math/sincosq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/copysignq.lo math/lgammaq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/sincosq_kernel.lo math/coshq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/llroundq.lo math/sinhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/cosq.lo math/log10q.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/sinq.lo math/cosq_kernel.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/log1pq.lo math/sinq_kernel.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/erfq.lo math/logq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/sqrtq.lo math/expm1q.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/lroundq.lo math/tanhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/expq.lo math/modfq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/tanq.lo math/fabsq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/nanq.lo math/tgammaq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/finiteq.lo math/nextafterq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/truncq.lo math/floorq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/powq.lo math/fmaq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/logbq.lo math/exp2q.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/issignalingq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_neg.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/lgammaq_product.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/tgammaq_product.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/casinhq_kernel.lo math/cacoshq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/cacosq.lo math/casinhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/casinq.lo math/catanhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/catanq.lo math/cimagq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/conjq.lo math/cprojq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/crealq.lo math/fdimq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/fmaxq.lo math/fminq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/ilogbq.lo math/llrintq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/log2q.lo math/lrintq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/nearbyintq.lo math/remquoq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.lo math/cexpq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/clog10q.lo math/clogq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/csinq.lo math/csinhq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/csqrtq.lo math/ctanq.lo \ +-@BUILD_LIBQUADMATH_TRUE@ math/ctanhq.lo printf/addmul_1.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/add_n.lo printf/cmp.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/divrem.lo printf/flt1282mpn.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/fpioconst.lo printf/lshift.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/mul_1.lo printf/mul_n.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/mul.lo printf/printf_fphex.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/printf_fp.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/rshift.lo printf/submul_1.lo \ +-@BUILD_LIBQUADMATH_TRUE@ printf/sub_n.lo strtod/strtoflt128.lo \ +-@BUILD_LIBQUADMATH_TRUE@ strtod/mpn2flt128.lo \ +-@BUILD_LIBQUADMATH_TRUE@ strtod/tens_in_limb.lo ++am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \ ++ math/acoshq.lo math/fmodq.lo \ ++ math/acosq.lo math/frexpq.lo \ ++ math/rem_pio2q.lo math/asinhq.lo \ ++ math/hypotq.lo math/remainderq.lo \ ++ math/asinq.lo math/rintq.lo \ ++ math/atan2q.lo math/isinfq.lo \ ++ math/roundq.lo math/atanhq.lo \ ++ math/isnanq.lo math/scalblnq.lo \ ++ math/atanq.lo math/j0q.lo \ ++ math/scalbnq.lo math/cbrtq.lo \ ++ math/j1q.lo math/signbitq.lo \ ++ math/ceilq.lo math/jnq.lo \ ++ math/sincos_table.lo math/complex.lo \ ++ math/ldexpq.lo math/sincosq.lo \ ++ math/copysignq.lo math/lgammaq.lo \ ++ math/sincosq_kernel.lo math/coshq.lo \ ++ math/llroundq.lo math/sinhq.lo \ ++ math/cosq.lo math/log10q.lo \ ++ math/sinq.lo math/cosq_kernel.lo \ ++ math/log1pq.lo math/sinq_kernel.lo \ ++ math/erfq.lo math/logq.lo \ ++ math/sqrtq.lo math/expm1q.lo \ ++ math/lroundq.lo math/tanhq.lo \ ++ math/expq.lo math/modfq.lo \ ++ math/tanq.lo math/fabsq.lo \ ++ math/nanq.lo math/tgammaq.lo \ ++ math/finiteq.lo math/nextafterq.lo \ ++ math/truncq.lo math/floorq.lo \ ++ math/powq.lo math/fmaq.lo \ ++ math/logbq.lo math/exp2q.lo \ ++ math/issignalingq.lo \ ++ math/lgammaq_neg.lo \ ++ math/lgammaq_product.lo \ ++ math/tanq_kernel.lo \ ++ math/tgammaq_product.lo \ ++ math/casinhq_kernel.lo math/cacoshq.lo \ ++ math/cacosq.lo math/casinhq.lo \ ++ math/casinq.lo math/catanhq.lo \ ++ math/catanq.lo math/cimagq.lo \ ++ math/conjq.lo math/cprojq.lo \ ++ math/crealq.lo math/fdimq.lo \ ++ math/fmaxq.lo math/fminq.lo \ ++ math/ilogbq.lo math/llrintq.lo \ ++ math/log2q.lo math/lrintq.lo \ ++ math/nearbyintq.lo math/remquoq.lo \ ++ math/ccoshq.lo math/cexpq.lo \ ++ math/clog10q.lo math/clogq.lo \ ++ math/csinq.lo math/csinhq.lo \ ++ math/csqrtq.lo math/ctanq.lo \ ++ math/ctanhq.lo printf/addmul_1.lo \ ++ printf/add_n.lo printf/cmp.lo \ ++ printf/divrem.lo printf/flt1282mpn.lo \ ++ printf/fpioconst.lo printf/lshift.lo \ ++ printf/mul_1.lo printf/mul_n.lo \ ++ printf/mul.lo printf/printf_fphex.lo \ ++ printf/printf_fp.lo \ ++ printf/quadmath-printf.lo \ ++ printf/rshift.lo printf/submul_1.lo \ ++ printf/sub_n.lo strtod/strtoflt128.lo \ ++ strtod/mpn2flt128.lo \ ++ strtod/tens_in_limb.lo + libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS) + AM_V_lt = $(am__v_lt_@AM_V@) + am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +@@ -218,8 +218,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \ + $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \ + $(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \ + $@ +-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \ +-@BUILD_LIBQUADMATH_TRUE@ $(toolexeclibdir) ++am_libquadmath_la_rpath = -rpath \ ++ $(toolexeclibdir) + AM_V_P = $(am__v_P_@AM_V@) + am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) + am__v_P_0 = false +@@ -337,7 +337,7 @@ CFLAGS = @CFLAGS@ + CPP = @CPP@ + CPPFLAGS = @CPPFLAGS@ + CYGPATH_W = @CYGPATH_W@ +-DEFS = @DEFS@ ++DEFS = @DEFS@ -D__float128="long double" + DEPDIR = @DEPDIR@ + DSYMUTIL = @DSYMUTIL@ + DUMPBIN = @DUMPBIN@ +@@ -409,7 +409,7 @@ datadir = @datadir@ + datarootdir = @datarootdir@ + docdir = @docdir@ + dvidir = @dvidir@ +-enable_shared = @enable_shared@ ++enable_shared = yes + enable_static = @enable_static@ + exec_prefix = @exec_prefix@ + get_gcc_base_ver = @get_gcc_base_ver@ +@@ -451,109 +451,109 @@ top_build_prefix = @top_build_prefix@ + top_builddir = @top_builddir@ + top_srcdir = @top_srcdir@ + AUTOMAKE_OPTIONS = foreign info-in-builddir +-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config +-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include +-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS) +-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg = +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep = +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun +-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la +-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD = +-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ +-@BUILD_LIBQUADMATH_TRUE@ $(version_arg) $(lt_host_flags) -lm +- +-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) +-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h +-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include +-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \ +-@BUILD_LIBQUADMATH_TRUE@ math/x2y2m1q.c math/acoshq.c math/fmodq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/acosq.c math/frexpq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ +-@BUILD_LIBQUADMATH_TRUE@ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ +-@BUILD_LIBQUADMATH_TRUE@ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ +-@BUILD_LIBQUADMATH_TRUE@ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ +-@BUILD_LIBQUADMATH_TRUE@ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ +-@BUILD_LIBQUADMATH_TRUE@ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ +-@BUILD_LIBQUADMATH_TRUE@ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c ++ACLOCAL_AMFLAGS = -I .. -I ../config ++AM_CPPFLAGS = -I $(top_srcdir)/../include ++AM_CFLAGS = $(XCFLAGS) ++gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER) ++@LIBQUAD_USE_SYMVER_FALSE@version_arg = ++@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun ++@LIBQUAD_USE_SYMVER_FALSE@version_dep = ++@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun ++toolexeclib_LTLIBRARIES = libquadmath.la ++libquadmath_la_LIBADD = ++libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \ ++ $(version_arg) $(lt_host_flags) -lm ++ ++libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD) ++nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h ++libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include ++libquadmath_la_SOURCES = \ ++ math/x2y2m1q.c math/acoshq.c math/fmodq.c \ ++ math/acosq.c math/frexpq.c \ ++ math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \ ++ math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \ ++ math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \ ++ math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \ ++ math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \ ++ math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \ ++ math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \ ++ math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \ ++ math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \ ++ math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \ ++ math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \ ++ math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \ ++ math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \ ++ math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \ ++ math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \ ++ math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \ ++ math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \ ++ math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \ ++ math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \ ++ math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \ ++ printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \ ++ printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \ ++ printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \ ++ printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \ ++ strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c + + + # Work around what appears to be a GNU make bug handling MAKEFLAGS + # values defined in terms of make variables, as is the case for CC and + # friends when we are called from the top level Makefile. +-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \ +-@BUILD_LIBQUADMATH_TRUE@ "AR_FLAGS=$(AR_FLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS=$(CFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CXXFLAGS=$(CXXFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ +-@BUILD_LIBQUADMATH_TRUE@ "INSTALL=$(INSTALL)" \ +-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_DATA=$(INSTALL_DATA)" \ +-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ +-@BUILD_LIBQUADMATH_TRUE@ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ +-@BUILD_LIBQUADMATH_TRUE@ "JC1FLAGS=$(JC1FLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "LDFLAGS=$(LDFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ +-@BUILD_LIBQUADMATH_TRUE@ "MAKE=$(MAKE)" \ +-@BUILD_LIBQUADMATH_TRUE@ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ +-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ +-@BUILD_LIBQUADMATH_TRUE@ "SHELL=$(SHELL)" \ +-@BUILD_LIBQUADMATH_TRUE@ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "exec_prefix=$(exec_prefix)" \ +-@BUILD_LIBQUADMATH_TRUE@ "infodir=$(infodir)" \ +-@BUILD_LIBQUADMATH_TRUE@ "libdir=$(libdir)" \ +-@BUILD_LIBQUADMATH_TRUE@ "prefix=$(prefix)" \ +-@BUILD_LIBQUADMATH_TRUE@ "includedir=$(includedir)" \ +-@BUILD_LIBQUADMATH_TRUE@ "AR=$(AR)" \ +-@BUILD_LIBQUADMATH_TRUE@ "AS=$(AS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CC=$(CC)" \ +-@BUILD_LIBQUADMATH_TRUE@ "CXX=$(CXX)" \ +-@BUILD_LIBQUADMATH_TRUE@ "LD=$(LD)" \ +-@BUILD_LIBQUADMATH_TRUE@ "LIBCFLAGS=$(LIBCFLAGS)" \ +-@BUILD_LIBQUADMATH_TRUE@ "NM=$(NM)" \ +-@BUILD_LIBQUADMATH_TRUE@ "PICFLAG=$(PICFLAG)" \ +-@BUILD_LIBQUADMATH_TRUE@ "RANLIB=$(RANLIB)" \ +-@BUILD_LIBQUADMATH_TRUE@ "DESTDIR=$(DESTDIR)" ++AM_MAKEFLAGS = \ ++ "AR_FLAGS=$(AR_FLAGS)" \ ++ "CC_FOR_BUILD=$(CC_FOR_BUILD)" \ ++ "CFLAGS=$(CFLAGS)" \ ++ "CXXFLAGS=$(CXXFLAGS)" \ ++ "CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \ ++ "CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \ ++ "INSTALL=$(INSTALL)" \ ++ "INSTALL_DATA=$(INSTALL_DATA)" \ ++ "INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \ ++ "INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \ ++ "JC1FLAGS=$(JC1FLAGS)" \ ++ "LDFLAGS=$(LDFLAGS)" \ ++ "LIBCFLAGS=$(LIBCFLAGS)" \ ++ "LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \ ++ "MAKE=$(MAKE)" \ ++ "MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \ ++ "PICFLAG=$(PICFLAG)" \ ++ "PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \ ++ "SHELL=$(SHELL)" \ ++ "RUNTESTFLAGS=$(RUNTESTFLAGS)" \ ++ "exec_prefix=$(exec_prefix)" \ ++ "infodir=$(infodir)" \ ++ "libdir=$(libdir)" \ ++ "prefix=$(prefix)" \ ++ "includedir=$(includedir)" \ ++ "AR=$(AR)" \ ++ "AS=$(AS)" \ ++ "CC=$(CC)" \ ++ "CXX=$(CXX)" \ ++ "LD=$(LD)" \ ++ "LIBCFLAGS=$(LIBCFLAGS)" \ ++ "NM=$(NM)" \ ++ "PICFLAG=$(PICFLAG)" \ ++ "RANLIB=$(RANLIB)" \ ++ "DESTDIR=$(DESTDIR)" + + + # Subdir rules rely on $(FLAGS_TO_PASS) +-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS) +-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES = +-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC = ++FLAGS_TO_PASS = $(AM_MAKEFLAGS) ++MAKEOVERRIDES = ++@GENINSRC_FALSE@STAMP_GENINSRC = + + # AM_CONDITIONAL on configure option --generated-files-in-srcdir +-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc +-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC) +-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = ++@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc ++ALL_LOCAL_DEPS = $(STAMP_GENINSRC) ++@BUILD_INFO_FALSE@STAMP_BUILD_INFO = + + # AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO]) +-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info +-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) +-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info ++@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info ++CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO) ++MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info + + # Automake Documentation: + # If your package has Texinfo files in many directories, you can use the +@@ -564,8 +564,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo.tex + + # Defines info, dvi, pdf and html targets + MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include +-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS = +-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi ++info_TEXINFOS = ++info_TEXINFOS = libquadmath.texi + libquadmath_TEXINFOS = libquadmath-vers.texi + MULTISRCTOP = + MULTIBUILDTOP = +@@ -1187,6 +1187,7 @@ distclean-tags: + -rm -f cscope.out cscope.in.out cscope.po.out cscope.files + check-am: all-am + check: check-am ++#all-local + all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \ + all-local + installdirs: +@@ -1425,22 +1426,22 @@ uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \ + + .PRECIOUS: Makefile + +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ +-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) +- +-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info +-@BUILD_LIBQUADMATH_TRUE@ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info +-@BUILD_LIBQUADMATH_TRUE@ @touch $@ +- +-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) +-@BUILD_LIBQUADMATH_TRUE@ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi +-@BUILD_LIBQUADMATH_TRUE@ @touch $@ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(top_srcdir)/../contrib/make_sunver.pl \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ perl $(top_srcdir)/../contrib/make_sunver.pl \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ $(srcdir)/quadmath.map \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ sed 's,\([^/ ]*\)\.l\([ao]\),.libs/\1.\2,g'` \ ++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@ > $@ || (rm -f $@ ; exit 1) ++ ++stamp-geninsrc: libquadmath.info ++ cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info ++ @touch $@ ++ ++stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS) ++ $(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi ++ @touch $@ + + all-local: $(ALL_LOCAL_DEPS) + +diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h +index 81eb957d2fa..faa5977cbc9 100644 +--- a/libquadmath/quadmath.h ++++ b/libquadmath/quadmath.h +@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA. */ + extern "C" { + #endif + ++#ifdef AARCH64_QUADMATH ++typedef long double __float128; ++#endif + /* Define the complex type corresponding to __float128 + ("_Complex __float128" is not allowed) */ + #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__) +@@ -160,10 +163,9 @@ extern int quadmath_snprintf (char *str, size_t size, + #define FLT128_MAX_10_EXP 4932 + + +-#define HUGE_VALQ __builtin_huge_valq() + /* The following alternative is valid, but brings the warning: + (floating constant exceeds range of ‘__float128’) */ +-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */ ++ #define HUGE_VALQ (__extension__ 0x1.0p32767Q) + + #define M_Eq 2.718281828459045235360287471352662498Q /* e */ + #define M_LOG2Eq 1.442695040888963407359924681001892137Q /* log_2 e */ +-- +2.21.0.windows.1 + diff --git a/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch b/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch new file mode 100644 index 0000000..c5df4d9 --- /dev/null +++ b/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch @@ -0,0 +1,318 @@ +From d1e1ec0cd539f96be5a86b369b8c20b36ce9567f Mon Sep 17 00:00:00 2001 +From: yangyang +Date: Thu, 8 Jul 2021 14:38:39 +0800 +Subject: [PATCH 02/13] [Backport] cselim: Extend to check non-trapping for + more references + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=54ecfb182bc32140722022c1d9818dee4bdc0e45 + +If there is a dominating store, a store to the same reference can not be +trapped. But previously, it only supports such check on MEM_REFs. +So this patch extends it to support ARRAY_REFs and COMPONENT_REFs. + +This patch also supports a special case: if there is a dominating load of +local variable without address escape, a store is not trapped, as local +stack is always writable. Other loads are ignored for simplicity, as they +don't help to check if a store can be trapped (the memory may be read-only). + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c +index ce242ba569b..8ee1850ac63 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c +@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) { + return a[0]+a[1]; + } + +-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c +index 90ae36bfce2..9b96875ac7a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c +@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) { + return a[0]+a[1]; + } + +-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c +index c633cbe947d..b2d04119381 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c +@@ -13,4 +13,4 @@ int test(int b, int k) { + return a.data[0] + a.data[1]; + } + +-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c +index 7cad563128d..8d3c4f7cc6a 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c +@@ -16,4 +16,4 @@ int test(int b, int k) { + return a.data[0].x + a.data[1].x; + } + +-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c +new file mode 100644 +index 00000000000..c35a2afc70b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-cselim-details" } */ ++ ++typedef union { ++ int i; ++ float f; ++} U; ++ ++int foo(U *u, int b, int i) ++{ ++ u->i = 0; ++ if (b) ++ u->i = i; ++ return u->i; ++} ++ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c +new file mode 100644 +index 00000000000..f9e66aefb13 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-cselim-details" } */ ++ ++int *t; ++ ++int f1 (int tt) ++{ ++ int *t1 = t; ++ *t1 = -5; ++ if (*t1 < tt) ++ *((unsigned *) t1) = 5; ++ return *t1; ++} ++ ++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c +index 09313716598..a06f339f0bb 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -fdump-tree-pre-stats" } */ ++/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */ + + typedef union { + int i; +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index b1e0dce93d8..3b5b6907679 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -1986,26 +1986,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb, + + ??? We currently are very conservative and assume that a load might + trap even if a store doesn't (write-only memory). This probably is +- overly conservative. */ ++ overly conservative. + +-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF +- through it was seen, which would constitute a no-trap region for +- same accesses. */ +-struct name_to_bb ++ We currently support a special case that for !TREE_ADDRESSABLE automatic ++ variables, it could ignore whether something is a load or store because the ++ local stack should be always writable. */ ++ ++/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which ++ basic block an *_REF through it was seen, which would constitute a ++ no-trap region for same accesses. ++ ++ Size is needed to support 2 MEM_REFs of different types, like ++ MEM(s_1) and MEM(s_1), which would compare equal with ++ OEP_ADDRESS_OF. */ ++struct ref_to_bb + { +- unsigned int ssa_name_ver; ++ tree exp; ++ HOST_WIDE_INT size; + unsigned int phase; +- bool store; +- HOST_WIDE_INT offset, size; + basic_block bb; + }; + + /* Hashtable helpers. */ + +-struct ssa_names_hasher : free_ptr_hash ++struct refs_hasher : free_ptr_hash + { +- static inline hashval_t hash (const name_to_bb *); +- static inline bool equal (const name_to_bb *, const name_to_bb *); ++ static inline hashval_t hash (const ref_to_bb *); ++ static inline bool equal (const ref_to_bb *, const ref_to_bb *); + }; + + /* Used for quick clearing of the hash-table when we see calls. +@@ -2015,28 +2022,29 @@ static unsigned int nt_call_phase; + /* The hash function. */ + + inline hashval_t +-ssa_names_hasher::hash (const name_to_bb *n) ++refs_hasher::hash (const ref_to_bb *n) + { +- return n->ssa_name_ver ^ (((hashval_t) n->store) << 31) +- ^ (n->offset << 6) ^ (n->size << 3); ++ inchash::hash hstate; ++ inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF); ++ hstate.add_hwi (n->size); ++ return hstate.end (); + } + + /* The equality function of *P1 and *P2. */ + + inline bool +-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2) ++refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2) + { +- return n1->ssa_name_ver == n2->ssa_name_ver +- && n1->store == n2->store +- && n1->offset == n2->offset +- && n1->size == n2->size; ++ return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF) ++ && n1->size == n2->size; + } + + class nontrapping_dom_walker : public dom_walker + { + public: + nontrapping_dom_walker (cdi_direction direction, hash_set *ps) +- : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {} ++ : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128) ++ {} + + virtual edge before_dom_children (basic_block); + virtual void after_dom_children (basic_block); +@@ -2053,7 +2061,7 @@ private: + hash_set *m_nontrapping; + + /* The hash table for remembering what we've seen. */ +- hash_table m_seen_ssa_names; ++ hash_table m_seen_refs; + }; + + /* Called by walk_dominator_tree, when entering the block BB. */ +@@ -2102,65 +2110,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb) + } + + /* We see the expression EXP in basic block BB. If it's an interesting +- expression (an MEM_REF through an SSA_NAME) possibly insert the +- expression into the set NONTRAP or the hash table of seen expressions. +- STORE is true if this expression is on the LHS, otherwise it's on +- the RHS. */ ++ expression of: ++ 1) MEM_REF ++ 2) ARRAY_REF ++ 3) COMPONENT_REF ++ possibly insert the expression into the set NONTRAP or the hash table ++ of seen expressions. STORE is true if this expression is on the LHS, ++ otherwise it's on the RHS. */ + void + nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store) + { + HOST_WIDE_INT size; + +- if (TREE_CODE (exp) == MEM_REF +- && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME +- && tree_fits_shwi_p (TREE_OPERAND (exp, 1)) ++ if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF ++ || TREE_CODE (exp) == COMPONENT_REF) + && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0) + { +- tree name = TREE_OPERAND (exp, 0); +- struct name_to_bb map; +- name_to_bb **slot; +- struct name_to_bb *n2bb; ++ struct ref_to_bb map; ++ ref_to_bb **slot; ++ struct ref_to_bb *r2bb; + basic_block found_bb = 0; + +- /* Try to find the last seen MEM_REF through the same +- SSA_NAME, which can trap. */ +- map.ssa_name_ver = SSA_NAME_VERSION (name); +- map.phase = 0; +- map.bb = 0; +- map.store = store; +- map.offset = tree_to_shwi (TREE_OPERAND (exp, 1)); +- map.size = size; ++ if (!store) ++ { ++ tree base = get_base_address (exp); ++ /* Only record a LOAD of a local variable without address-taken, as ++ the local stack is always writable. This allows cselim on a STORE ++ with a dominating LOAD. */ ++ if (!auto_var_p (base) || TREE_ADDRESSABLE (base)) ++ return; ++ } + +- slot = m_seen_ssa_names.find_slot (&map, INSERT); +- n2bb = *slot; +- if (n2bb && n2bb->phase >= nt_call_phase) +- found_bb = n2bb->bb; ++ /* Try to find the last seen *_REF, which can trap. */ ++ map.exp = exp; ++ map.size = size; ++ slot = m_seen_refs.find_slot (&map, INSERT); ++ r2bb = *slot; ++ if (r2bb && r2bb->phase >= nt_call_phase) ++ found_bb = r2bb->bb; + +- /* If we've found a trapping MEM_REF, _and_ it dominates EXP +- (it's in a basic block on the path from us to the dominator root) ++ /* If we've found a trapping *_REF, _and_ it dominates EXP ++ (it's in a basic block on the path from us to the dominator root) + then we can't trap. */ + if (found_bb && (((size_t)found_bb->aux) & 1) == 1) + { + m_nontrapping->add (exp); + } + else +- { ++ { + /* EXP might trap, so insert it into the hash table. */ +- if (n2bb) ++ if (r2bb) + { +- n2bb->phase = nt_call_phase; +- n2bb->bb = bb; ++ r2bb->phase = nt_call_phase; ++ r2bb->bb = bb; + } + else + { +- n2bb = XNEW (struct name_to_bb); +- n2bb->ssa_name_ver = SSA_NAME_VERSION (name); +- n2bb->phase = nt_call_phase; +- n2bb->bb = bb; +- n2bb->store = store; +- n2bb->offset = map.offset; +- n2bb->size = size; +- *slot = n2bb; ++ r2bb = XNEW (struct ref_to_bb); ++ r2bb->phase = nt_call_phase; ++ r2bb->bb = bb; ++ r2bb->exp = exp; ++ r2bb->size = size; ++ *slot = r2bb; + } + } + } +-- +2.21.0.windows.1 + diff --git a/0003-version-Set-version-to-10.3.1.patch b/0003-version-Set-version-to-10.3.1.patch new file mode 100644 index 0000000..d069bdd --- /dev/null +++ b/0003-version-Set-version-to-10.3.1.patch @@ -0,0 +1,31 @@ +From 309f459021a3681d728e5cf644a288ecf2b95175 Mon Sep 17 00:00:00 2001 +From: zhanghaijian +Date: Mon, 12 Jul 2021 09:42:11 +0800 +Subject: [PATCH 03/13] [version] Set version to 10.3.1 + +Set version to 10.3.1 and clear DATESTAMP_s. + +diff --git a/gcc/BASE-VER b/gcc/BASE-VER +index 0719d810258..a9368325816 100644 +--- a/gcc/BASE-VER ++++ b/gcc/BASE-VER +@@ -1 +1 @@ +-10.3.0 ++10.3.1 +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 646db219460..fdc2857d44a 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -885,8 +885,7 @@ PATCHLEVEL_c := \ + # significant - do not remove it. + BASEVER_s := "\"$(BASEVER_c)\"" + DEVPHASE_s := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\"" +-DATESTAMP_s := \ +- "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\"" ++DATESTAMP_s := "\"\"" + PKGVERSION_s:= "\"@PKGVERSION@\"" + BUGURL_s := "\"@REPORT_BUGS_TO@\"" + +-- +2.21.0.windows.1 + diff --git a/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch b/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch new file mode 100644 index 0000000..54b4116 --- /dev/null +++ b/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch @@ -0,0 +1,138 @@ +From bdb0f40cea4aa1a92ead381b645363ae0571c065 Mon Sep 17 00:00:00 2001 +From: zhanghaijian +Date: Mon, 12 Jul 2021 10:36:15 +0800 +Subject: [PATCH 04/13] [Backport]tree-optimization: Avoid issueing loads in SM + when possible + +Reference:https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9e1ea10e657af9fb02fafecf1a600740fd34409 + +Currently store-motion emits a load of the value in the loop +preheader even when the original loop does not contain any read +of the reference. This avoids doing this. In the conditional +store-motion case we need to mark the sunk stores with no-warning +since the control dependence is too tricky to figure out for +the uninit warning. + +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c +new file mode 100755 +index 00000000000..884f905148f +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c +@@ -0,0 +1,21 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */ ++ ++void foo(int *); ++void f2(int dst[3], int R) ++{ ++ int i, inter[2]; ++ ++ for (i = 1; i < R; i++) { ++ if (i & 8) ++ { ++ inter[0] = 1; ++ inter[1] = 1; ++ } ++ } ++ ++ foo(inter); ++} ++ ++/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */ ++/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */ +diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c +index abd5f702b91..b3fd1647fbd 100644 +--- a/gcc/tree-ssa-loop-im.c ++++ b/gcc/tree-ssa-loop-im.c +@@ -127,6 +127,8 @@ public: + + bitmap stored; /* The set of loops in that this memory location + is stored to. */ ++ bitmap loaded; /* The set of loops in that this memory location ++ is loaded from. */ + vec accesses_in_loop; + /* The locations of the accesses. Vector + indexed by the loop number. */ +@@ -1395,6 +1397,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id) + ref->ref_decomposed = false; + ref->hash = hash; + ref->stored = NULL; ++ ref->loaded = NULL; + bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack); + bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack); + ref->accesses_in_loop.create (1); +@@ -1435,6 +1438,27 @@ mark_ref_stored (im_mem_ref *ref, class loop *loop) + loop = loop_outer (loop); + } + ++/* Set the LOOP bit in REF loaded bitmap and allocate that if ++ necessary. Return whether a bit was changed. */ ++ ++static bool ++set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop) ++{ ++ if (!ref->loaded) ++ ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack); ++ return bitmap_set_bit (ref->loaded, loop->num); ++} ++ ++/* Marks reference REF as loaded in LOOP. */ ++ ++static void ++mark_ref_loaded (im_mem_ref *ref, class loop *loop) ++{ ++ while (loop != current_loops->tree_root ++ && set_ref_loaded_in_loop (ref, loop)) ++ loop = loop_outer (loop); ++} ++ + /* Gathers memory references in statement STMT in LOOP, storing the + information about them in the memory_accesses structure. Marks + the vops accessed through unrecognized statements there as +@@ -1571,6 +1595,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt) + bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id); + mark_ref_stored (ref, loop); + } ++ else ++ mark_ref_loaded (ref, loop); + init_lim_data (stmt)->ref = ref->id; + return; + } +@@ -1968,6 +1994,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, + gsi = gsi_start_bb (then_bb); + /* Insert actual store. */ + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); ++ /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ ++ gimple_set_no_warning (stmt, true); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + edge e1 = single_succ_edge (new_bb); +@@ -2115,14 +2143,17 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) + by move_computations after all dependencies. */ + gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt); + +- /* FIXME/TODO: For the multi-threaded variant, we could avoid this +- load altogether, since the store is predicated by a flag. We +- could, do the load only if it was originally in the loop. */ +- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); +- lim_data = init_lim_data (load); +- lim_data->max_loop = loop; +- lim_data->tgt_loop = loop; +- gsi_insert_before (&gsi, load, GSI_SAME_STMT); ++ /* Avoid doing a load if there was no load of the ref in the loop. ++ Esp. when the ref is not always stored we cannot optimize it ++ away later. */ ++ if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) ++ { ++ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); ++ lim_data = init_lim_data (load); ++ lim_data->max_loop = loop; ++ lim_data->tgt_loop = loop; ++ gsi_insert_before (&gsi, load, GSI_SAME_STMT); ++ } + + if (multi_threaded_model_p) + { +-- +2.21.0.windows.1 + diff --git a/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch b/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch new file mode 100644 index 0000000..0ab01ea --- /dev/null +++ b/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch @@ -0,0 +1,66 @@ +From dc238e97a75835231939e77e8568ccd9bc5187d5 Mon Sep 17 00:00:00 2001 +From: zhanghaijian +Date: Mon, 12 Jul 2021 10:46:16 +0800 +Subject: [PATCH 05/13] [Backport]tree-optimization: Fix load eliding in SM + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0424a5ece5307cc22bbc0fe97edf4707d7a798ed + +This fixes the case of not using the multithreaded model when +only conditionally storing to the destination. We cannot elide +the load in this case. + +diff --git a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c +new file mode 100755 +index 00000000000..6182d77b3cd +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr94949.c +@@ -0,0 +1,17 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fallow-store-data-races" } */ ++ ++static int x = 1; ++static volatile int y = -1; ++int ++main() ++{ ++ for (int i = 0; i < 128; ++i) ++ { ++ if (i == y) ++ x = i; ++ } ++ if (x != 1) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c +index b3fd1647fbd..8c33735b1fa 100644 +--- a/gcc/tree-ssa-loop-im.c ++++ b/gcc/tree-ssa-loop-im.c +@@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) + fmt_data.orig_loop = loop; + for_each_index (&ref->mem.ref, force_move_till, &fmt_data); + ++ bool always_stored = ref_always_accessed_p (loop, ref, true); + if (bb_in_transaction (loop_preheader_edge (loop)->src) +- || (! flag_store_data_races +- && ! ref_always_accessed_p (loop, ref, true))) ++ || (! flag_store_data_races && ! always_stored)) + multi_threaded_model_p = true; + + if (multi_threaded_model_p) +@@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) + + /* Avoid doing a load if there was no load of the ref in the loop. + Esp. when the ref is not always stored we cannot optimize it +- away later. */ +- if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)) ++ away later. But when it is not always stored we must use a conditional ++ store then. */ ++ if ((!always_stored && !multi_threaded_model_p) ++ || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) + { + load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); + lim_data = init_lim_data (load); +-- +2.21.0.windows.1 + diff --git a/0006-simdmath-Enable-simdmath-on-kunpeng.patch b/0006-simdmath-Enable-simdmath-on-kunpeng.patch new file mode 100644 index 0000000..9d7bb57 --- /dev/null +++ b/0006-simdmath-Enable-simdmath-on-kunpeng.patch @@ -0,0 +1,289 @@ +From cfd6920125f7968f0c1f5cb225f9fbd5bc8988b9 Mon Sep 17 00:00:00 2001 +From: bule +Date: Tue, 13 Jul 2021 15:26:54 +0800 +Subject: [PATCH 06/13] [simdmath] Enable simdmath on kunpeng + +This enable simd math function supported by libmathlib on fortran/c/c++. +Use -fsimdmath to turn on the generation of simdmath function. The +supported functions can be found in simdmath.h. Add more simd declaration +if you need more kinds of math functions. -msimdmath-64 is used to turn +on 64-bit simd math functions which is not supported by libmathlib. +Therefore, this option is default to off. + +diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c +index c51d6d34726..dc1a8984871 100644 +--- a/gcc/c-family/c-opts.c ++++ b/gcc/c-family/c-opts.c +@@ -780,6 +780,10 @@ c_common_post_options (const char **pfilename) + if (cpp_opts->deps.style == DEPS_NONE) + check_deps_environment_vars (); + ++ if (flag_simdmath) ++ { ++ defer_opt (OPT_include, "simdmath.h"); ++ } + handle_deferred_opts (); + + sanitize_cpp_opts (); +diff --git a/gcc/common.opt b/gcc/common.opt +index ec5235c3a41..8eb05570418 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1977,6 +1977,10 @@ fmath-errno + Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined + Set errno after built-in math functions. + ++fsimdmath ++Common Report Var(flag_simdmath) Init(0) Optimization ++Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. ++ + fmax-errors= + Common Joined RejectNegative UInteger Var(flag_max_errors) + -fmax-errors= Maximum number of errors to report. +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index 9b400c49ac6..79dc8f186f4 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -23077,8 +23077,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node, + elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); + if (clonei->simdlen == 0) + { +- count = 2; +- vec_bits = (num == 0 ? 64 : 128); ++ /* Currently mathlib or sleef hasn't provide function for V2SF mode ++ simdclone of single precision functions. (e.g._ZCVnN2v_expf) ++ Therefore this mode is disabled by default to avoid link error. ++ Use -msimdmath-64 option to enable this mode. */ ++ count = flag_simdmath_64 ? 2 : 1; ++ vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128); + clonei->simdlen = vec_bits / elt_bits; + } + else +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index 1b3d942e0f5..4539156d6f4 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for + single precision and to 32 bits for double precision. + If enabled, it implies -mlow-precision-recip-sqrt. + ++msimdmath-64 ++Target Var(flag_simdmath_64) Optimization ++Allow compiler to generate V2SF 64 bits simdclone of math functions, ++which is not currently supported in mathlib or sleef. ++Therefore this option is disabled by default. ++ + mlow-precision-div + Target Var(flag_mlow_precision_div) Optimization + Enable the division approximation. Enabling this reduces +diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c +index 6f93508f934..42fd5a8be1e 100644 +--- a/gcc/fortran/scanner.c ++++ b/gcc/fortran/scanner.c +@@ -2737,6 +2737,10 @@ gfc_new_file (void) + && !load_file (flag_pre_include, NULL, false)) + exit (FATAL_EXIT_CODE); + ++ if (flag_simdmath ++ && !load_file ("simdmath_f.h", NULL, false)) ++ exit (FATAL_EXIT_CODE); ++ + if (gfc_cpp_enabled ()) + { + result = gfc_cpp_preprocess (gfc_source_file); +diff --git a/gcc/opts.c b/gcc/opts.c +index 73162528938..e31aa560564 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -189,6 +189,7 @@ static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.") + + typedef char *char_p; /* For DEF_VEC_P. */ + ++static void set_simdmath_flags (struct gcc_options *opts, int set); + static void set_debug_level (enum debug_info_type type, int extended, + const char *arg, struct gcc_options *opts, + struct gcc_options *opts_set, +@@ -2469,6 +2470,10 @@ common_handle_option (struct gcc_options *opts, + dc->min_margin_width = value; + break; + ++ case OPT_fsimdmath: ++ set_simdmath_flags (opts, value); ++ break; ++ + case OPT_fdump_: + /* Deferred. */ + break; +@@ -2847,6 +2852,18 @@ common_handle_option (struct gcc_options *opts, + return true; + } + ++/* The following routines are used to set -fno-math-errno and -fopenmp-simd ++ to enable vector mathlib. */ ++static void ++set_simdmath_flags (struct gcc_options *opts, int set) ++{ ++ if (set) ++ { ++ opts->x_flag_errno_math = 0; ++ opts->x_flag_openmp_simd = 1; ++ } ++} ++ + /* Used to set the level of strict aliasing warnings in OPTS, + when no level is specified (i.e., when -Wstrict-aliasing, and not + -Wstrict-aliasing=level was given). +diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am +index 669b9e4defd..0d9cc96481c 100644 +--- a/libgomp/Makefile.am ++++ b/libgomp/Makefile.am +@@ -74,10 +74,10 @@ libgomp_la_SOURCES += openacc.f90 + endif + + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + if USE_FORTRAN + nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +- openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + endif + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) +diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in +index ae5d9d54705..dd4b334895e 100644 +--- a/libgomp/Makefile.in ++++ b/libgomp/Makefile.in +@@ -148,7 +148,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ + configure.lineno config.status.lineno + mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs + CONFIG_HEADER = config.h +-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ ++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ + libgomp.spec + CONFIG_CLEAN_VPATH_FILES = + am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +@@ -609,9 +609,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \ + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS) + @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h + @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ +-@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod ++@USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) + LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \ +@@ -702,6 +702,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in +diff --git a/libgomp/configure b/libgomp/configure +index 5240f7e9d39..b03036c2738 100644 +--- a/libgomp/configure ++++ b/libgomp/configure +@@ -17050,7 +17050,7 @@ fi + + + +-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" ++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" + + ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" + +@@ -18205,6 +18205,8 @@ do + "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; + "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; + "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; ++ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; ++ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; + "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; + "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; +diff --git a/libgomp/configure.ac b/libgomp/configure.ac +index ef5d293c31e..569c2065a66 100644 +--- a/libgomp/configure.ac ++++ b/libgomp/configure.ac +@@ -433,7 +433,7 @@ CFLAGS="$save_CFLAGS" + # Determine what GCC version number to use in filesystem paths. + GCC_BASE_VER + +-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) ++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) + AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) + AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) + AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp]) +diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in +new file mode 100644 +index 00000000000..ab91a4ec317 +--- /dev/null ++++ b/libgomp/simdmath.h.in +@@ -0,0 +1,40 @@ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double cos (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float cosf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double sin (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float sinf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double exp (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float expf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double log (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float logf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double pow (double x, double y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float powf (float x, float y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float exp2f (float x); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif +diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in +new file mode 100644 +index 00000000000..550595015db +--- /dev/null ++++ b/libgomp/simdmath_f.h.in +@@ -0,0 +1,11 @@ ++!GCC$ builtin (cos) attributes simd (notinbranch) ++!GCC$ builtin (cosf) attributes simd (notinbranch) ++!GCC$ builtin (sin) attributes simd (notinbranch) ++!GCC$ builtin (sinf) attributes simd (notinbranch) ++!GCC$ builtin (exp) attributes simd (notinbranch) ++!GCC$ builtin (expf) attributes simd (notinbranch) ++!GCC$ builtin (exp2f) attributes simd (notinbranch) ++!GCC$ builtin (log) attributes simd (notinbranch) ++!GCC$ builtin (logf) attributes simd (notinbranch) ++!GCC$ builtin (pow) attributes simd (notinbranch) ++!GCC$ builtin (powf) attributes simd (notinbranch) +-- +2.21.0.windows.1 + diff --git a/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch b/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch new file mode 100644 index 0000000..ffe5327 --- /dev/null +++ b/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch @@ -0,0 +1,68 @@ +From 07033bcc5b9e4c03846cd84b4587cd493fcf7d53 Mon Sep 17 00:00:00 2001 +From: zhoukaipeng +Date: Wed, 14 Jul 2021 11:24:06 +0800 +Subject: [PATCH 07/13] [Vect] Enable skipping vectorization on reduction + chains + +Sometimes either vectorization on reduction chains or reductions is +possible. But the latter is better. The option "-ftree-vect-analyze +-slp-group" skips the former. + +diff --git a/gcc/common.opt b/gcc/common.opt +index 8eb05570418..55d4eb5a351 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2968,6 +2968,10 @@ ftree-slp-vectorize + Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize) + Enable basic block vectorization (SLP) on trees. + ++ftree-vect-analyze-slp-group ++Common Report Var(flag_tree_slp_group) Init(0) ++Disable SLP vectorization for reduction chain on tree. ++ + fvect-cost-model= + Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization + -fvect-cost-model=[unlimited|dynamic|cheap] Specifies the cost model for vectorization. +diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c +new file mode 100644 +index 00000000000..913f1ef28df +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */ ++void f(double *a, double *res, double m) { ++ double res1, res0; ++ res1 = 0; ++ res0 = 0; ++ for (int i = 0; i < 1000; i+=8) { ++ res0 += a[i] * m; ++ res1 += a[i+1] * m; ++ res0 += a[i+2] * m; ++ res1 += a[i+3] * m; ++ res0 += a[i+4] * m; ++ res1 += a[i+5] * m; ++ res0 += a[i+6] * m; ++ res1 += a[i+7] * m; ++ } ++ res[0] += res0; ++ res[1] += res1; ++} ++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */ +diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +index adc579ff544..476b3237054 100644 +--- a/gcc/tree-vect-slp.c ++++ b/gcc/tree-vect-slp.c +@@ -2480,7 +2480,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) + { + /* Find SLP sequences starting from reduction chains. */ + FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) +- if (! vect_analyze_slp_instance (vinfo, bst_map, first_element, ++ if (flag_tree_slp_group ++ || ! vect_analyze_slp_instance (vinfo, bst_map, first_element, + max_tree_size)) + { + /* Dissolve reduction chain group. */ +-- +2.21.0.windows.1 + diff --git a/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch b/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch new file mode 100644 index 0000000..9b8c4f8 --- /dev/null +++ b/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch @@ -0,0 +1,97 @@ +From 79d1ed2d7f166a498662f6111a4defc55f0061c7 Mon Sep 17 00:00:00 2001 +From: yangyang +Date: Thu, 15 Jul 2021 09:27:27 +0800 +Subject: [PATCH 08/13] [Backport]tree-optimization: Add checks to avoid + spoiling if-conversion + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=33d114f570b4a3583421c700396fd5945acebc28 + +Add some checks in pass_splits_paths, so that pass_split_paths can recognize +the missed if-conversion opportunity and do not duplicate the corresponding +block. + +diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c +index b3efd43c7ef..9c32da76369 100644 +--- a/gcc/gimple-ssa-split-paths.c ++++ b/gcc/gimple-ssa-split-paths.c +@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see + #include "gimple-ssa.h" + #include "tree-phinodes.h" + #include "ssa-iterators.h" ++#include "fold-const.h" + + /* Given LATCH, the latch block in a loop, see if the shape of the + path reaching LATCH is suitable for being split by duplication. +@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb) + } + } + ++ /* Canonicalize the form. */ ++ if (single_pred_p (pred1) && single_pred (pred1) == pred2 ++ && num_stmts_in_pred1 == 0) ++ std::swap (pred1, pred2); ++ ++ /* This is meant to catch another kind of cases that are likely opportunities ++ for if-conversion. After canonicalizing, PRED2 must be an empty block and ++ PRED1 must be the only predecessor of PRED2. Moreover, PRED1 is supposed ++ to end with a cond_stmt which has the same args with the PHI in BB. */ ++ if (single_pred_p (pred2) && single_pred (pred2) == pred1 ++ && num_stmts_in_pred2 == 0) ++ { ++ gimple *cond_stmt = last_stmt (pred1); ++ if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND) ++ { ++ tree lhs = gimple_cond_lhs (cond_stmt); ++ tree rhs = gimple_cond_rhs (cond_stmt); ++ ++ gimple_stmt_iterator gsi; ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ gimple *phi = gsi_stmt (gsi); ++ if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs) ++ && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs)) ++ || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs) ++ && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs)))) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, ++ "Block %d appears to be optimized to a join " ++ "point for if-convertable half-diamond.\n", ++ bb->index); ++ return false; ++ } ++ } ++ } ++ } ++ + /* If the joiner has no PHIs with useful uses there is zero chance + of CSE/DCE/jump-threading possibilities exposed by duplicating it. */ + bool found_useful_phi = false; +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c +new file mode 100644 +index 00000000000..19a130d9bf1 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */ ++ ++double ++foo(double *d1, double *d2, double *d3, int num, double *ip) ++{ ++ double dmax[3]; ++ ++ for (int i = 0; i < num; i++) { ++ dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i]; ++ dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i]; ++ dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i]; ++ ip[i] = dmax[2]; ++ } ++ ++ return dmax[0] + dmax[1] + dmax[2]; ++} ++ ++/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */ +-- +2.21.0.windows.1 + diff --git a/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch new file mode 100644 index 0000000..434c669 --- /dev/null +++ b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch @@ -0,0 +1,141 @@ +From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001 +From: zhanghaijian +Date: Thu, 15 Jul 2021 09:04:55 +0800 +Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when + expanding a copy [PR95254] + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8 + +In rtl expand, if we have a copy that matches one of the following patterns: + (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) + (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) + (set (subreg:M1 (reg:M2 ...)) (constant C)) +where mode M1 is equal in size to M2, try to detect whether the mode change +involves an implicit round trip through memory. If so, see if we can avoid +that by removing the subregs and doing the move in mode M2 instead. + +diff --git a/gcc/expr.c b/gcc/expr.c +index 991b26f3341..d66fdd4e93d 100644 +--- a/gcc/expr.c ++++ b/gcc/expr.c +@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y) + gcc_assert (mode != BLKmode + && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode)); + ++ /* If we have a copy that looks like one of the following patterns: ++ (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...))) ++ (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR)) ++ (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...))) ++ (set (subreg:M1 (reg:M2 ...)) (constant C)) ++ where mode M1 is equal in size to M2, try to detect whether the ++ mode change involves an implicit round trip through memory. ++ If so, see if we can avoid that by removing the subregs and ++ doing the move in mode M2 instead. */ ++ ++ rtx x_inner = NULL_RTX; ++ rtx y_inner = NULL_RTX; ++ ++#define CANDIDATE_SUBREG_P(subreg) \ ++ (REG_P (SUBREG_REG (subreg)) \ ++ && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \ ++ GET_MODE_SIZE (GET_MODE (subreg))) \ ++ && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \ ++ != CODE_FOR_nothing) ++ ++#define CANDIDATE_MEM_P(innermode, mem) \ ++ (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \ ++ && !push_operand ((mem), GET_MODE (mem)) \ ++ /* Not a candiate if innermode requires too much alignment. */ \ ++ && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode) \ ++ || targetm.slow_unaligned_access (GET_MODE (mem), \ ++ MEM_ALIGN (mem)) \ ++ || !targetm.slow_unaligned_access ((innermode), \ ++ MEM_ALIGN (mem)))) ++ ++ if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x)) ++ x_inner = SUBREG_REG (x); ++ ++ if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y)) ++ y_inner = SUBREG_REG (y); ++ ++ if (x_inner != NULL_RTX ++ && y_inner != NULL_RTX ++ && GET_MODE (x_inner) == GET_MODE (y_inner) ++ && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS)) ++ { ++ x = x_inner; ++ y = y_inner; ++ mode = GET_MODE (x_inner); ++ } ++ else if (x_inner != NULL_RTX ++ && MEM_P (y) ++ && CANDIDATE_MEM_P (GET_MODE (x_inner), y)) ++ { ++ x = x_inner; ++ y = adjust_address (y, GET_MODE (x_inner), 0); ++ mode = GET_MODE (x_inner); ++ } ++ else if (y_inner != NULL_RTX ++ && MEM_P (x) ++ && CANDIDATE_MEM_P (GET_MODE (y_inner), x)) ++ { ++ x = adjust_address (x, GET_MODE (y_inner), 0); ++ y = y_inner; ++ mode = GET_MODE (y_inner); ++ } ++ else if (x_inner != NULL_RTX ++ && CONSTANT_P (y) ++ && !targetm.can_change_mode_class (GET_MODE (x_inner), ++ mode, ALL_REGS) ++ && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0))) ++ { ++ x = x_inner; ++ y = y_inner; ++ mode = GET_MODE (x_inner); ++ } ++ + if (CONSTANT_P (y)) + { + if (optimize +diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c +new file mode 100644 +index 00000000000..10bfc868197 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c +@@ -0,0 +1,19 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */ ++ ++typedef short __attribute__((vector_size (8))) v4hi; ++ ++typedef union U4HI { v4hi v; short a[4]; } u4hi; ++ ++short b[4]; ++ ++void pass_v4hi (v4hi v) ++{ ++ int i; ++ u4hi u; ++ u.v = v; ++ for (i = 0; i < 4; i++) ++ b[i] = u.a[i]; ++}; ++ ++/* { dg-final { scan-assembler-not "ptrue" } } */ +diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c +index 518071bdd86..398cdba5d5f 100644 +--- a/gcc/testsuite/gcc.target/i386/pr67609.c ++++ b/gcc/testsuite/gcc.target/i386/pr67609.c +@@ -1,7 +1,7 @@ + /* { dg-do compile } */ + /* { dg-options "-O2 -msse2" } */ + /* { dg-require-effective-target lp64 } */ +-/* { dg-final { scan-assembler "movdqa" } } */ ++/* { dg-final { scan-assembler "movq\t%xmm0" } } */ + + #include + __m128d reg; +-- +2.21.0.windows.1 + diff --git a/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch b/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch new file mode 100644 index 0000000..e5dbbf2 --- /dev/null +++ b/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch @@ -0,0 +1,98 @@ +From b8b3e29e4cceae2bab6e0774b1af994dbe713d97 Mon Sep 17 00:00:00 2001 +From: zhanghaijian +Date: Thu, 15 Jul 2021 09:13:11 +0800 +Subject: [PATCH 10/13] [Backport]tree-optimization/94963 - avoid bogus uninit + warning with store-motion + +Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=371905d12259c180efb9b1f1b5716e969feb60f9 + +Eliding the load for store-motion causes an uninitialized variable +flowing into the loop, conditionally initialized and used. The +uninit warning cannot relate the flag used to guard the initialization +and use with the actual initialization so the following robustifies +the previous approach of marking the conditional store as not to +be warned on by instead initializing the variable on loop entry +from an uninitialized variable we mark as not to be warned for. + +diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c +new file mode 100644 +index 00000000000..09c0524fb3a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/pr94963.c +@@ -0,0 +1,35 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -Wall" } */ ++ ++typedef struct ++{ ++ int p1; ++ int p2; ++ int p3; ++} P; ++struct S ++{ ++ int field; ++}; ++extern int v2; ++extern void foo (struct S *map); ++static struct S var; ++const P *pv; ++int ps; ++void ++f (void) ++{ ++ if (pv != 0) ++ for (const P *ph = pv; ph < &pv[ps]; ++ph) ++ switch (ph->p1) ++ { ++ case 1: ++ v2 = ph->p2; ++ break; ++ case 2: ++ var.field = ph->p3; ++ break; ++ } ++ if (var.field != 0) /* { dg-bogus "uninitialized" } */ ++ foo (&var); ++} +diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c +index 8c33735b1fa..d74a46ef352 100644 +--- a/gcc/tree-ssa-loop-im.c ++++ b/gcc/tree-ssa-loop-im.c +@@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag, + gsi = gsi_start_bb (then_bb); + /* Insert actual store. */ + stmt = gimple_build_assign (unshare_expr (mem), tmp_var); +- /* Make sure to not warn about maybe-uninit uses of tmp_var here. */ +- gimple_set_no_warning (stmt, true); + gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING); + + edge e1 = single_succ_edge (new_bb); +@@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec exits, im_mem_ref *ref) + store then. */ + if ((!always_stored && !multi_threaded_model_p) + || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))) ++ load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); ++ else + { +- load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref)); +- lim_data = init_lim_data (load); +- lim_data->max_loop = loop; +- lim_data->tgt_loop = loop; +- gsi_insert_before (&gsi, load, GSI_SAME_STMT); ++ /* If not emitting a load mark the uninitialized state on the ++ loop entry as not to be warned for. */ ++ tree uninit = create_tmp_reg (TREE_TYPE (tmp_var)); ++ TREE_NO_WARNING (uninit) = 1; ++ load = gimple_build_assign (tmp_var, uninit); + } ++ lim_data = init_lim_data (load); ++ lim_data->max_loop = loop; ++ lim_data->tgt_loop = loop; ++ gsi_insert_before (&gsi, load, GSI_SAME_STMT); + + if (multi_threaded_model_p) + { +-- +2.21.0.windows.1 + diff --git a/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch b/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch new file mode 100644 index 0000000..b9f642c --- /dev/null +++ b/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch @@ -0,0 +1,23 @@ +From 78cf3b95d7b895cfe8d6f1c2a48ebc08a662eef0 Mon Sep 17 00:00:00 2001 +From: bule +Date: Sat, 17 Jul 2021 16:38:10 +0800 +Subject: [PATCH 11/13] [simdmath] Enable 64-bits simd when test + simd_pcs_attribute-3 + +Enable 64-bits simd when test simd_pcs_attribute-3. The 64-bits simd +is default to off without specify the -msimdmath-64. + +diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +index 95f6a6803e8..e0e0efa9d7e 100644 +--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c ++++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-Ofast" } */ ++/* { dg-options "-Ofast -msimdmath-64" } */ + + __attribute__ ((__simd__)) + __attribute__ ((__nothrow__ , __leaf__ , __const__)) +-- +2.21.0.windows.1 + diff --git a/0012-fp-model-Enable-fp-model-on-kunpeng.patch b/0012-fp-model-Enable-fp-model-on-kunpeng.patch new file mode 100644 index 0000000..3e99f88 --- /dev/null +++ b/0012-fp-model-Enable-fp-model-on-kunpeng.patch @@ -0,0 +1,397 @@ +From 26ea42402eede6a441c9d74ec6b6086e5bf0bf79 Mon Sep 17 00:00:00 2001 +From: bule +Date: Mon, 19 Jul 2021 12:04:08 +0800 +Subject: [PATCH 12/13] [fp-model] Enable fp-model on kunpeng + +Enable fp-model options on kunpeng for precision control. + +diff --git a/gcc/common.opt b/gcc/common.opt +index 55d4eb5a351..79c9ef6615b 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1545,6 +1545,32 @@ ffp-int-builtin-inexact + Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization + Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. + ++fftz ++Common Report Var(flag_ftz) Optimization ++Control fpcr register for flush to zero. ++ ++fp-model= ++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization ++-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control. ++ ++Enum ++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) ++ ++EnumValue ++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) ++ ++EnumValue ++Enum(fp_model) String(fast) Value(FP_MODEL_FAST) ++ ++EnumValue ++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) ++ ++EnumValue ++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) ++ ++EnumValue ++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) ++ + ; Nonzero means don't put addresses of constant functions in registers. + ; Used for compiling the Unix kernel, where strange substitutions are + ; done on the assembly output. +diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h +index e587e2e9ad6..331b12c8702 100644 +--- a/gcc/config/aarch64/aarch64-linux.h ++++ b/gcc/config/aarch64/aarch64-linux.h +@@ -50,7 +50,8 @@ + #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" ++ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ ++ %{!fno-ftz:crtfastmath.o%s}}" + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +diff --git a/gcc/flag-types.h b/gcc/flag-types.h +index 852ea76eaa2..5832298251e 100644 +--- a/gcc/flag-types.h ++++ b/gcc/flag-types.h +@@ -223,6 +223,15 @@ enum fp_contract_mode { + FP_CONTRACT_FAST = 2 + }; + ++/* Floating-point precision mode. */ ++enum fp_model { ++ FP_MODEL_NORMAL = 0, ++ FP_MODEL_FAST = 1, ++ FP_MODEL_PRECISE = 2, ++ FP_MODEL_EXCEPT = 3, ++ FP_MODEL_STRICT = 4 ++}; ++ + /* Scalar storage order kind. */ + enum scalar_storage_order_kind { + SSO_NATIVE = 0, +diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c +index 4cc8a908417..c59dcf63781 100644 +--- a/gcc/fortran/options.c ++++ b/gcc/fortran/options.c +@@ -250,6 +250,7 @@ form_from_filename (const char *filename) + return f_form; + } + ++static void gfc_handle_fpe_option (const char *arg, bool trap); + + /* Finalize commandline options. */ + +@@ -277,6 +278,13 @@ gfc_post_options (const char **pfilename) + if (flag_protect_parens == -1) + flag_protect_parens = !optimize_fast; + ++ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ ++ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) ++ { ++ gfc_handle_fpe_option ("all", false); ++ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); ++ } ++ + /* -Ofast sets implies -fstack-arrays unless an explicit size is set for + stack arrays. */ + if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) +diff --git a/gcc/opts-common.c b/gcc/opts-common.c +index de9510abd64..bf82b05c8a2 100644 +--- a/gcc/opts-common.c ++++ b/gcc/opts-common.c +@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. If not see + #include "diagnostic.h" + #include "spellcheck.h" + +-static void prune_options (struct cl_decoded_option **, unsigned int *); ++static void prune_options (struct cl_decoded_option **, unsigned int *, ++ unsigned int); + + /* An option that is undocumented, that takes a joined argument, and + that doesn't fit any of the classes of uses (language/common, +@@ -988,7 +989,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv, + + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; +- prune_options (decoded_options, decoded_options_count); ++ prune_options (decoded_options, decoded_options_count, lang_mask); + } + + /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the +@@ -1009,11 +1010,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx) + return false; + } + ++/* Check whether opt_idx exists in decoded_options array bewteen index ++ start and end. If found, return its index in decoded_options, ++ else return end. */ ++static unsigned int ++find_opt_idx (const struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int start, unsigned int end, unsigned int opt_idx) ++{ ++ gcc_assert (end <= decoded_options_count); ++ gcc_assert (opt_idx < cl_options_count); ++ unsigned int k; ++ for (k = start; k < end; k++) ++ { ++ if (decoded_options[k].opt_index == opt_idx) ++ { ++ return k; ++ } ++ } ++ return k; ++} ++ ++/* remove the opt_index element from decoded_options array. */ ++static unsigned int ++remove_option (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int opt_index) ++{ ++ gcc_assert (opt_index < decoded_options_count); ++ unsigned int i; ++ for (i = opt_index; i < decoded_options_count - 1; i++) ++ { ++ decoded_options[i] = decoded_options[i + 1]; ++ } ++ return decoded_options_count - 1; ++} ++ ++/* Handle the priority between fp-model, Ofast, and ++ ffast-math. */ ++static unsigned int ++handle_fp_model_driver (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int fp_model_index, ++ unsigned int lang_mask) ++{ ++ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index]; ++ enum fp_model model = (enum fp_model) fp_model_opt.value; ++ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) ++ { ++ /* If found Ofast, override Ofast with O3. */ ++ unsigned int Ofast_index; ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ while (Ofast_index != decoded_options_count) ++ { ++ const char *tmp_argv = "-O3"; ++ decode_cmdline_option (&tmp_argv, lang_mask, ++ &decoded_options[Ofast_index]); ++ warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs", ++ fp_model_opt.orig_option_with_args_text); ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ } ++ /* If found ffast-math before fp-model=precise/strict ++ it, cancel it. */ ++ unsigned int ffast_math_index; ++ ffast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, 0, ++ fp_model_index, OPT_ffast_math); ++ if (ffast_math_index != fp_model_index) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ ffast_math_index); ++ warning (0, "%<-ffast-math%> before %qs is canceled", ++ fp_model_opt.orig_option_with_args_text); ++ } ++ } ++ if (model == FP_MODEL_FAST) ++ { ++ /* If found -fno-fast-math after fp-model=fast, cancel this one. */ ++ unsigned int fno_fast_math_index; ++ fno_fast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, ++ decoded_options_count, OPT_ffast_math); ++ if (fno_fast_math_index != decoded_options_count ++ && decoded_options[fno_fast_math_index].value == 0) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ fp_model_index); ++ warning (0, ++ "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled"); ++ } ++ } ++ return decoded_options_count; ++} ++ + /* Filter out options canceled by the ones after them. */ + + static void + prune_options (struct cl_decoded_option **decoded_options, +- unsigned int *decoded_options_count) ++ unsigned int *decoded_options_count, ++ unsigned int lang_mask) + { + unsigned int old_decoded_options_count = *decoded_options_count; + struct cl_decoded_option *old_decoded_options = *decoded_options; +@@ -1024,7 +1123,12 @@ prune_options (struct cl_decoded_option **decoded_options, + const struct cl_option *option; + unsigned int fdiagnostics_color_idx = 0; + ++ if (!diagnostic_ready_p ()) ++ diagnostic_initialize (global_dc, 0); ++ + /* Remove arguments which are negated by others after them. */ ++ ++ unsigned int fp_model_index = old_decoded_options_count; + new_decoded_options_count = 0; + for (i = 0; i < old_decoded_options_count; i++) + { +@@ -1048,6 +1152,34 @@ prune_options (struct cl_decoded_option **decoded_options, + fdiagnostics_color_idx = i; + continue; + ++ case OPT_fp_model_: ++ /* Only the last fp-model option will take effect. */ ++ unsigned int next_fp_model_idx; ++ next_fp_model_idx = find_opt_idx (old_decoded_options, ++ old_decoded_options_count, ++ i + 1, ++ old_decoded_options_count, ++ OPT_fp_model_); ++ if (next_fp_model_idx != old_decoded_options_count) ++ { ++ /* Found more than one fp-model, cancel this one. */ ++ if (old_decoded_options[i].value ++ != old_decoded_options[next_fp_model_idx].value) ++ { ++ warning (0, "%qs is overrided by %qs", ++ old_decoded_options[i]. ++ orig_option_with_args_text, ++ old_decoded_options[next_fp_model_idx]. ++ orig_option_with_args_text); ++ } ++ break; ++ } ++ else ++ { ++ /* Found the last fp-model option. */ ++ fp_model_index = new_decoded_options_count; ++ } ++ /* FALLTHRU. */ + default: + gcc_assert (opt_idx < cl_options_count); + option = &cl_options[opt_idx]; +@@ -1087,6 +1219,14 @@ keep: + break; + } + } ++ if (fp_model_index < new_decoded_options_count) ++ { ++ new_decoded_options_count ++ = handle_fp_model_driver (new_decoded_options, ++ new_decoded_options_count, ++ fp_model_index, ++ lang_mask); ++ } + + if (fdiagnostics_color_idx >= 1) + { +diff --git a/gcc/opts.c b/gcc/opts.c +index e31aa560564..6924a973a5b 100644 +--- a/gcc/opts.c ++++ b/gcc/opts.c +@@ -195,6 +195,7 @@ static void set_debug_level (enum debug_info_type type, int extended, + struct gcc_options *opts_set, + location_t loc); + static void set_fast_math_flags (struct gcc_options *opts, int set); ++static void set_fp_model_flags (struct gcc_options *opts, int set); + static void decode_d_option (const char *arg, struct gcc_options *opts, + location_t loc, diagnostic_context *dc); + static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, +@@ -2482,6 +2483,10 @@ common_handle_option (struct gcc_options *opts, + set_fast_math_flags (opts, value); + break; + ++ case OPT_fp_model_: ++ set_fp_model_flags (opts, value); ++ break; ++ + case OPT_funsafe_math_optimizations: + set_unsafe_math_optimizations_flags (opts, value); + break; +@@ -2908,6 +2913,69 @@ set_fast_math_flags (struct gcc_options *opts, int set) + } + } + ++/* Handle fp-model options. */ ++static void ++set_fp_model_flags (struct gcc_options *opts, int set) ++{ ++ enum fp_model model = (enum fp_model) set; ++ switch (model) ++ { ++ case FP_MODEL_FAST: ++ /* Equivalent to open ffast-math. */ ++ set_fast_math_flags (opts, 1); ++ break; ++ ++ case FP_MODEL_PRECISE: ++ /* Equivalent to close ffast-math. */ ++ set_fast_math_flags (opts, 0); ++ /* Turn on -frounding-math -fsignaling-nans. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_EXCEPT: ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ /* Also turn on ffpe-trap in fortran. */ ++ break; ++ ++ case FP_MODEL_STRICT: ++ /* Turn on both precise and except. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_NORMAL: ++ /* Do nothing. */ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* When -funsafe-math-optimizations is set the following + flags are set as well. */ + static void +-- +2.21.0.windows.1 + diff --git a/0013-LoopElim-Redundant-loop-elimination-optimization.patch b/0013-LoopElim-Redundant-loop-elimination-optimization.patch new file mode 100644 index 0000000..d50107e --- /dev/null +++ b/0013-LoopElim-Redundant-loop-elimination-optimization.patch @@ -0,0 +1,499 @@ +From 0d14a2b7a3defc82ed16c99a18c2bc2e6be9f5b1 Mon Sep 17 00:00:00 2001 +From: xiezhiheng +Date: Fri, 16 Jul 2021 23:21:38 -0400 +Subject: [PATCH 13/13] [LoopElim] Redundant loop elimination optimization + +Introduce redundant loop elimination optimization controlled +by -floop-elim. And it's often used with -ffinite-loops. + +diff --git a/gcc/common.opt b/gcc/common.opt +index 79c9ef6615b..b2b0aac7fdf 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1169,6 +1169,10 @@ fcompare-elim + Common Report Var(flag_compare_elim_after_reload) Optimization + Perform comparison elimination after register allocation has finished. + ++floop-elim ++Common Report Var(flag_loop_elim) Init(0) Optimization ++Perform redundant loop elimination. ++ + fconserve-stack + Common Var(flag_conserve_stack) Optimization + Do not perform optimizations increasing noticeably stack usage. +diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +index 3b5b6907679..591b6435f78 100644 +--- a/gcc/tree-ssa-phiopt.c ++++ b/gcc/tree-ssa-phiopt.c +@@ -69,6 +69,7 @@ static hash_set * get_non_trapping (); + static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); + static void hoist_adjacent_loads (basic_block, basic_block, + basic_block, basic_block); ++static bool do_phiopt_pattern (basic_block, basic_block, basic_block); + static bool gate_hoist_loads (void); + + /* This pass tries to transform conditional stores into unconditional +@@ -257,6 +258,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p) + hoist_adjacent_loads (bb, bb1, bb2, bb3); + continue; + } ++ else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2)) ++ { ++ continue; ++ } + else + continue; + +@@ -2819,6 +2824,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1, + } + } + ++static bool check_uses (tree, hash_set *); ++ ++/* Check SSA_NAME is used in ++ if (SSA_NAME == 0) ++ ... ++ or ++ if (SSA_NAME != 0) ++ ... ++*/ ++static bool ++check_uses_cond (const_tree ssa_name, gimple *stmt, ++ hash_set *hset ATTRIBUTE_UNUSED) ++{ ++ tree_code code = gimple_cond_code (stmt); ++ if (code != EQ_EXPR && code != NE_EXPR) ++ { ++ return false; ++ } ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ if ((lhs == ssa_name && integer_zerop (rhs)) ++ || (rhs == ssa_name && integer_zerop (lhs))) ++ { ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ _tmp = SSA_NAME == 0; ++ or ++ _tmp = SSA_NAME != 0; ++ or ++ _tmp = SSA_NAME | _tmp2; ++*/ ++static bool ++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ tree_code code = gimple_assign_rhs_code (stmt); ++ tree lhs, rhs1, rhs2; ++ ++ switch (code) ++ { ++ case EQ_EXPR: ++ case NE_EXPR: ++ rhs1 = gimple_assign_rhs1 (stmt); ++ rhs2 = gimple_assign_rhs2 (stmt); ++ if ((rhs1 == ssa_name && integer_zerop (rhs2)) ++ || (rhs2 == ssa_name && integer_zerop (rhs1))) ++ { ++ return true; ++ } ++ break; ++ ++ case BIT_IOR_EXPR: ++ lhs = gimple_assign_lhs (stmt); ++ if (hset->contains (lhs)) ++ { ++ return false; ++ } ++ /* We should check the use of _tmp further. */ ++ return check_uses (lhs, hset); ++ ++ default: ++ break; ++ } ++ return false; ++} ++ ++/* Check SSA_NAME is used in ++ # result = PHI ++*/ ++static bool ++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) ++{ ++ for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) ++ { ++ tree arg = gimple_phi_arg_def (stmt, i); ++ if (!integer_zerop (arg) && arg != ssa_name) ++ { ++ return false; ++ } ++ } ++ ++ tree result = gimple_phi_result (stmt); ++ ++ /* It is used to avoid infinite recursion, ++ ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)> ++ {BODY} ++ if (cond) ++ goto ++ else ++ goto ++ ++ ++ ... ++ */ ++ if (hset->contains (result)) ++ { ++ return false; ++ } ++ ++ return check_uses (result, hset); ++} ++ ++/* Check the use of SSA_NAME, it should only be used in comparison ++ operation and PHI node. HSET is used to record the ssa_names ++ that have been already checked. */ ++static bool ++check_uses (tree ssa_name, hash_set *hset) ++{ ++ imm_use_iterator imm_iter; ++ use_operand_p use_p; ++ ++ if (TREE_CODE (ssa_name) != SSA_NAME) ++ { ++ return false; ++ } ++ ++ if (SSA_NAME_VAR (ssa_name) ++ && is_global_var (SSA_NAME_VAR (ssa_name))) ++ { ++ return false; ++ } ++ ++ hset->add (ssa_name); ++ ++ FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name) ++ { ++ gimple *stmt = USE_STMT (use_p); ++ ++ /* Ignore debug gimple statements. */ ++ if (is_gimple_debug (stmt)) ++ { ++ continue; ++ } ++ ++ switch (gimple_code (stmt)) ++ { ++ case GIMPLE_COND: ++ if (!check_uses_cond (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_ASSIGN: ++ if (!check_uses_assign (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ case GIMPLE_PHI: ++ if (!check_uses_phi (ssa_name, stmt, hset)) ++ { ++ return false; ++ } ++ break; ++ ++ default: ++ return false; ++ } ++ } ++ return true; ++} ++ ++static bool ++check_def_gimple (gimple *def1, gimple *def2, const_tree result) ++{ ++ /* def1 and def2 should be POINTER_PLUS_EXPR. */ ++ if (!is_gimple_assign (def1) || !is_gimple_assign (def2) ++ || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR ++ || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR) ++ { ++ return false; ++ } ++ ++ tree rhs12 = gimple_assign_rhs2 (def1); ++ ++ tree rhs21 = gimple_assign_rhs1 (def2); ++ tree rhs22 = gimple_assign_rhs2 (def2); ++ ++ if (rhs21 != result) ++ { ++ return false; ++ } ++ ++ /* We should have a positive pointer-plus constant to ensure ++ that the pointer value is continuously increasing. */ ++ if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST ++ || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++static bool ++check_loop_body (basic_block bb0, basic_block bb2, const_tree result) ++{ ++ gimple *g01 = first_stmt (bb0); ++ if (!g01 || !is_gimple_assign (g01) ++ || gimple_assign_rhs_code (g01) != MEM_REF ++ || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result) ++ { ++ return false; ++ } ++ ++ gimple *g02 = g01->next; ++ /* GIMPLE_COND would be the last gimple in a basic block, ++ and have no other side effects on RESULT. */ ++ if (!g02 || gimple_code (g02) != GIMPLE_COND) ++ { ++ return false; ++ } ++ ++ if (first_stmt (bb2) != last_stmt (bb2)) ++ { ++ return false; ++ } ++ ++ return true; ++} ++ ++/* Pattern is like ++
++   arg1 = base (rhs11) + cst (rhs12); [def1]
++   goto 
++
++   
++   arg2 = result (rhs21) + cst (rhs22); [def2]
++
++   
++   # result = PHI 
++   _v = *result;  [g01]
++   if (_v == 0)   [g02]
++     goto 
++   else
++     goto 
++
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
++		gphi *phi_stmt, gimple *&output)
++{
++  /* Start check from PHI node in BB0.  */
++  if (gimple_phi_num_args (phi_stmt) != 2
++      || virtual_operand_p (gimple_phi_result (phi_stmt)))
++    {
++      return false;
++    }
++
++  tree result = gimple_phi_result (phi_stmt);
++  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
++  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
++
++  if (TREE_CODE (arg1) != SSA_NAME
++      || TREE_CODE (arg2) != SSA_NAME
++      || SSA_NAME_IS_DEFAULT_DEF (arg1)
++      || SSA_NAME_IS_DEFAULT_DEF (arg2))
++    {
++      return false;
++    }
++
++  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
++  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
++
++  /* Swap bb1 and bb2 if pattern is like
++     if (_v != 0)
++       goto 
++     else
++       goto 
++  */
++  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
++    {
++      std::swap (bb1, bb2);
++    }
++
++  /* prebb[def1] --> bb0 <-- bb2[def2] */
++  if (!gimple_bb (def1)
++      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
++      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
++    {
++      return false;
++    }
++
++  /* Check whether define gimple meets the pattern requirements.  */
++  if (!check_def_gimple (def1, def2, result))
++    {
++      return false;
++    }
++
++  if (!check_loop_body (bb0, bb2, result))
++    {
++      return false;
++    }
++
++  output = def1;
++  return true;
++}
++
++/* Check pattern
++   
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
++		    const_tree result, gimple *&output)
++{
++  gimple *g1 = first_stmt (bb1);
++  if (!g1 || !is_gimple_assign (g1)
++      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
++      || gimple_assign_rhs1 (g1) != result
++      || gimple_assign_rhs2 (g1) != base)
++    {
++      return false;
++    }
++
++  gimple *g2 = g1->next;
++  if (!g2 || !is_gimple_assign (g2)
++      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
++      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
++      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
++    {
++      return false;
++    }
++
++  /* INTEGER_CST cst in gimple def1.  */
++  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
++  /* INTEGER_CST cst in gimple g2.  */
++  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
++  /* _2 must be at least a positive number.  */
++  if (num2 == 0 || num1 / num2 <= 0)
++    {
++      return false;
++    }
++
++  gimple *g3 = g2->next;
++  if (!g3 || !is_gimple_assign (g3)
++      || gimple_assign_rhs_code (g3) != NOP_EXPR
++      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
++      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
++    {
++      return false;
++    }
++
++  /* _3 should only be used in comparison operation or PHI node.  */
++  hash_set *hset = new hash_set;
++  if (!check_uses (gimple_assign_lhs (g3), hset))
++    {
++      delete hset;
++      return false;
++    }
++  delete hset;
++
++  output = g3;
++  return true;
++}
++
++static bool
++do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
++{
++  gphi_iterator gsi;
++
++  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi_stmt = gsi.phi ();
++      gimple *def1 = NULL;
++      tree base, cst, result;
++
++      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
++	{
++	  continue;
++	}
++
++      base = gimple_assign_rhs1 (def1);
++      cst = gimple_assign_rhs2 (def1);
++      result = gimple_phi_result (phi_stmt);
++
++      gimple *stmt = NULL;
++      if (!check_gimple_order (bb1, base, cst, result, stmt))
++	{
++	  continue;
++	}
++
++      gcc_assert (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Rewrite statement
++	   _3 = (unsigned int) _2;
++	 to
++	   _3 = (unsigned int) 1;
++      */
++      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
++      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Determine whether we should attempt to hoist adjacent loads out of
+    diamond patterns in pass_phiopt.  Always hoist loads if
+    -fhoist-adjacent-loads is specified and the target machine has
+-- 
+2.21.0.windows.1
+
diff --git a/gcc.spec b/gcc.spec
index 5818b77..e66145e 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,7 +1,7 @@
-%global DATE 20210727
+%global DATE 20210728
 
-%global gcc_version 10.3.0
-%global gcc_major 10.3.0
+%global gcc_version 10.3.1
+%global gcc_major 10.3.1
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
 
@@ -13,7 +13,7 @@
 %global build_go 0
 %global build_d 0
 %global build_check 0
-%ifarch %{ix86} x86_64 ia64 ppc64le
+%ifarch %{ix86} x86_64 ia64 ppc64le aarch64
 %global build_libquadmath 1
 %else
 %global build_libquadmath 0
@@ -59,7 +59,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{DATE}.1
+Release: %{DATE}.2
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
@@ -114,6 +114,19 @@ Provides: bundled(libiberty)
 Provides: gcc(major) = %{gcc_major}
 
 
+Patch1: 0001-libquadmath-Enable-libquadmath-on-kunpeng.patch
+Patch2: 0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch
+Patch3: 0003-version-Set-version-to-10.3.1.patch
+Patch4: 0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch
+Patch5: 0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch
+Patch6: 0006-simdmath-Enable-simdmath-on-kunpeng.patch
+Patch7: 0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch
+Patch8: 0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch
+Patch9: 0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
+Patch10: 0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch
+Patch11: 0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch
+Patch12: 0012-fp-model-Enable-fp-model-on-kunpeng.patch
+Patch13: 0013-LoopElim-Redundant-loop-elimination-optimization.patch
 
 %global gcc_target_platform %{_arch}-linux-gnu
 
@@ -555,6 +568,19 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %setup -q -n gcc-10.3.0
 /bin/pwd
 
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
 
 
 %build
@@ -1602,6 +1628,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdnoreturn.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdatomic.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/gcov.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/simdmath.h
 %ifarch %{ix86} x86_64
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h
@@ -2038,6 +2065,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_arithmetic.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_exceptions.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_features.mod
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/simdmath_f.h
 %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/f951
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libgfortran.spec
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libcaf_single.a
@@ -2518,6 +2546,12 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Wed Jul 28 2021 eastb233  - 10.3.0-20210728.2
+- Type:Sync
+- ID:NA
+- SUG:NA
+- DESC:Sync patch from openeuler/gcc
+
 * Tue Jul 27 2021 eastb233  - 10.3.0-20210727.1
 - Type:Init
 - ID:NA
-- 
Gitee