From 5335700c03c24b9be05496b986de032f28ad75a7 Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Wed, 28 Jul 2021 11:42:49 +0800
Subject: [PATCH] [Sync] Sync patch from openeuler/gcc

Sync patch from openeuler/gcc - 20210728
---
 ...admath-Enable-libquadmath-on-kunpeng.patch | 473 +++++++++++++++++
 ...Extend-to-check-non-trapping-for-mor.patch | 318 +++++++++++
 0003-version-Set-version-to-10.3.1.patch      |  31 ++
 ...timization-Avoid-issueing-loads-in-S.patch | 138 +++++
 ...-optimization-Fix-load-eliding-in-SM.patch |  66 +++
 ...-simdmath-Enable-simdmath-on-kunpeng.patch | 289 ++++++++++
 ...ping-vectorization-on-reduction-chai.patch |  68 +++
 ...timization-Add-checks-to-avoid-spoil.patch |  97 ++++
 ...Simplify-removing-subregs-when-expan.patch | 141 +++++
 ...timization-94963-avoid-bogus-uninit-.patch |  98 ++++
 ...64-bits-simd-when-test-simd_pcs_attr.patch |  23 +
 ...-fp-model-Enable-fp-model-on-kunpeng.patch | 397 ++++++++++++++
 ...undant-loop-elimination-optimization.patch | 499 ++++++++++++++++++
 gcc.spec                                      |  44 +-
 14 files changed, 2677 insertions(+), 5 deletions(-)
 create mode 100644 0001-libquadmath-Enable-libquadmath-on-kunpeng.patch
 create mode 100644 0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch
 create mode 100644 0003-version-Set-version-to-10.3.1.patch
 create mode 100644 0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch
 create mode 100644 0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch
 create mode 100644 0006-simdmath-Enable-simdmath-on-kunpeng.patch
 create mode 100644 0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch
 create mode 100644 0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch
 create mode 100644 0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
 create mode 100644 0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch
 create mode 100644 0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch
 create mode 100644 0012-fp-model-Enable-fp-model-on-kunpeng.patch
 create mode 100644 0013-LoopElim-Redundant-loop-elimination-optimization.patch

diff --git a/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch b/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch
new file mode 100644
index 0000000..33dd94e
--- /dev/null
+++ b/0001-libquadmath-Enable-libquadmath-on-kunpeng.patch
@@ -0,0 +1,473 @@
+From 85740d3cc56fda699beae689b5d73233d16097af Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Thu, 8 Jul 2021 11:52:47 +0800
+Subject: [PATCH 01/13] [libquadmath] Enable libquadmath on kunpeng
+
+This enable libquadmath on kunpeng platform to convenient
+users that migrating from x86 platform. libquadmath uses "__float128"
+as quad precision floating point type and with math functions with "q"
+suffix like "cosq". For those who do not need to adapt to x86 platform,
+you can use "long double" as quad precision floating point type and math
+functions with "l" suffix like "cosl" in libm for quad precision math.
+
+diff --git a/libquadmath/Makefile.in b/libquadmath/Makefile.in
+index 8c011212258..66df9c922f8 100644
+--- a/libquadmath/Makefile.in
++++ b/libquadmath/Makefile.in
+@@ -90,7 +90,7 @@ POST_UNINSTALL = :
+ build_triplet = @build@
+ host_triplet = @host@
+ target_triplet = @target@
+-@BUILD_LIBQUADMATH_FALSE@libquadmath_la_DEPENDENCIES =
++#libquadmath_la_DEPENDENCIES =
+ subdir = .
+ ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+ am__aclocal_m4_deps = $(top_srcdir)/../config/acx.m4 \
+@@ -147,68 +147,68 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" "$(DESTDIR)$(infodir)" \
+ 	"$(DESTDIR)$(libsubincludedir)"
+ LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
+ am__dirstamp = $(am__leading_dot)dirstamp
+-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/acoshq.lo math/fmodq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/acosq.lo math/frexpq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/rem_pio2q.lo math/asinhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/hypotq.lo math/remainderq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/asinq.lo math/rintq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/atan2q.lo math/isinfq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/roundq.lo math/atanhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/isnanq.lo math/scalblnq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/atanq.lo math/j0q.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/scalbnq.lo math/cbrtq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/j1q.lo math/signbitq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/ceilq.lo math/jnq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/sincos_table.lo math/complex.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/ldexpq.lo math/sincosq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/copysignq.lo math/lgammaq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/sincosq_kernel.lo math/coshq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/llroundq.lo math/sinhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/cosq.lo math/log10q.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/sinq.lo math/cosq_kernel.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/log1pq.lo math/sinq_kernel.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/erfq.lo math/logq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/sqrtq.lo math/expm1q.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/lroundq.lo math/tanhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/expq.lo math/modfq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/tanq.lo math/fabsq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/nanq.lo math/tgammaq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/finiteq.lo math/nextafterq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/truncq.lo math/floorq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/powq.lo math/fmaq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/logbq.lo math/exp2q.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/issignalingq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/lgammaq_neg.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/lgammaq_product.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/tanq_kernel.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/tgammaq_product.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/casinhq_kernel.lo math/cacoshq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/cacosq.lo math/casinhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/casinq.lo math/catanhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/catanq.lo math/cimagq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/conjq.lo math/cprojq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/crealq.lo math/fdimq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/fmaxq.lo math/fminq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/ilogbq.lo math/llrintq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/log2q.lo math/lrintq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/nearbyintq.lo math/remquoq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/ccoshq.lo math/cexpq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/clog10q.lo math/clogq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/csinq.lo math/csinhq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/csqrtq.lo math/ctanq.lo \
+-@BUILD_LIBQUADMATH_TRUE@	math/ctanhq.lo printf/addmul_1.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/add_n.lo printf/cmp.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/divrem.lo printf/flt1282mpn.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/fpioconst.lo printf/lshift.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/mul_1.lo printf/mul_n.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/mul.lo printf/printf_fphex.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/printf_fp.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/quadmath-printf.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/rshift.lo printf/submul_1.lo \
+-@BUILD_LIBQUADMATH_TRUE@	printf/sub_n.lo strtod/strtoflt128.lo \
+-@BUILD_LIBQUADMATH_TRUE@	strtod/mpn2flt128.lo \
+-@BUILD_LIBQUADMATH_TRUE@	strtod/tens_in_limb.lo
++am_libquadmath_la_OBJECTS = math/x2y2m1q.lo \
++	math/acoshq.lo math/fmodq.lo \
++	math/acosq.lo math/frexpq.lo \
++	math/rem_pio2q.lo math/asinhq.lo \
++	math/hypotq.lo math/remainderq.lo \
++	math/asinq.lo math/rintq.lo \
++	math/atan2q.lo math/isinfq.lo \
++	math/roundq.lo math/atanhq.lo \
++	math/isnanq.lo math/scalblnq.lo \
++	math/atanq.lo math/j0q.lo \
++	math/scalbnq.lo math/cbrtq.lo \
++	math/j1q.lo math/signbitq.lo \
++	math/ceilq.lo math/jnq.lo \
++	math/sincos_table.lo math/complex.lo \
++	math/ldexpq.lo math/sincosq.lo \
++	math/copysignq.lo math/lgammaq.lo \
++	math/sincosq_kernel.lo math/coshq.lo \
++	math/llroundq.lo math/sinhq.lo \
++	math/cosq.lo math/log10q.lo \
++	math/sinq.lo math/cosq_kernel.lo \
++	math/log1pq.lo math/sinq_kernel.lo \
++	math/erfq.lo math/logq.lo \
++	math/sqrtq.lo math/expm1q.lo \
++	math/lroundq.lo math/tanhq.lo \
++	math/expq.lo math/modfq.lo \
++	math/tanq.lo math/fabsq.lo \
++	math/nanq.lo math/tgammaq.lo \
++	math/finiteq.lo math/nextafterq.lo \
++	math/truncq.lo math/floorq.lo \
++	math/powq.lo math/fmaq.lo \
++	math/logbq.lo math/exp2q.lo \
++	math/issignalingq.lo \
++	math/lgammaq_neg.lo \
++	math/lgammaq_product.lo \
++	math/tanq_kernel.lo \
++	math/tgammaq_product.lo \
++	math/casinhq_kernel.lo math/cacoshq.lo \
++	math/cacosq.lo math/casinhq.lo \
++	math/casinq.lo math/catanhq.lo \
++	math/catanq.lo math/cimagq.lo \
++	math/conjq.lo math/cprojq.lo \
++	math/crealq.lo math/fdimq.lo \
++	math/fmaxq.lo math/fminq.lo \
++	math/ilogbq.lo math/llrintq.lo \
++	math/log2q.lo math/lrintq.lo \
++	math/nearbyintq.lo math/remquoq.lo \
++	math/ccoshq.lo math/cexpq.lo \
++	math/clog10q.lo math/clogq.lo \
++	math/csinq.lo math/csinhq.lo \
++	math/csqrtq.lo math/ctanq.lo \
++	math/ctanhq.lo printf/addmul_1.lo \
++	printf/add_n.lo printf/cmp.lo \
++	printf/divrem.lo printf/flt1282mpn.lo \
++	printf/fpioconst.lo printf/lshift.lo \
++	printf/mul_1.lo printf/mul_n.lo \
++	printf/mul.lo printf/printf_fphex.lo \
++	printf/printf_fp.lo \
++	printf/quadmath-printf.lo \
++	printf/rshift.lo printf/submul_1.lo \
++	printf/sub_n.lo strtod/strtoflt128.lo \
++	strtod/mpn2flt128.lo \
++	strtod/tens_in_limb.lo
+ libquadmath_la_OBJECTS = $(am_libquadmath_la_OBJECTS)
+ AM_V_lt = $(am__v_lt_@AM_V@)
+ am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@)
+@@ -218,8 +218,8 @@ libquadmath_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC \
+ 	$(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link $(CCLD) \
+ 	$(AM_CFLAGS) $(CFLAGS) $(libquadmath_la_LDFLAGS) $(LDFLAGS) -o \
+ 	$@
+-@BUILD_LIBQUADMATH_TRUE@am_libquadmath_la_rpath = -rpath \
+-@BUILD_LIBQUADMATH_TRUE@	$(toolexeclibdir)
++am_libquadmath_la_rpath = -rpath \
++	$(toolexeclibdir)
+ AM_V_P = $(am__v_P_@AM_V@)
+ am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+ am__v_P_0 = false
+@@ -337,7 +337,7 @@ CFLAGS = @CFLAGS@
+ CPP = @CPP@
+ CPPFLAGS = @CPPFLAGS@
+ CYGPATH_W = @CYGPATH_W@
+-DEFS = @DEFS@
++DEFS = @DEFS@ -D__float128="long double"
+ DEPDIR = @DEPDIR@
+ DSYMUTIL = @DSYMUTIL@
+ DUMPBIN = @DUMPBIN@
+@@ -409,7 +409,7 @@ datadir = @datadir@
+ datarootdir = @datarootdir@
+ docdir = @docdir@
+ dvidir = @dvidir@
+-enable_shared = @enable_shared@
++enable_shared = yes
+ enable_static = @enable_static@
+ exec_prefix = @exec_prefix@
+ get_gcc_base_ver = @get_gcc_base_ver@
+@@ -451,109 +451,109 @@ top_build_prefix = @top_build_prefix@
+ top_builddir = @top_builddir@
+ top_srcdir = @top_srcdir@
+ AUTOMAKE_OPTIONS = foreign info-in-builddir
+-@BUILD_LIBQUADMATH_TRUE@ACLOCAL_AMFLAGS = -I .. -I ../config
+-@BUILD_LIBQUADMATH_TRUE@AM_CPPFLAGS = -I $(top_srcdir)/../include
+-@BUILD_LIBQUADMATH_TRUE@AM_CFLAGS = $(XCFLAGS)
+-@BUILD_LIBQUADMATH_TRUE@gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_arg = 
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_FALSE@version_dep = 
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
+-@BUILD_LIBQUADMATH_TRUE@toolexeclib_LTLIBRARIES = libquadmath.la
+-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LIBADD = 
+-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
+-@BUILD_LIBQUADMATH_TRUE@			 $(version_arg) $(lt_host_flags) -lm
+-
+-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
+-@BUILD_LIBQUADMATH_TRUE@nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
+-@BUILD_LIBQUADMATH_TRUE@libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
+-@BUILD_LIBQUADMATH_TRUE@libquadmath_la_SOURCES = \
+-@BUILD_LIBQUADMATH_TRUE@  math/x2y2m1q.c math/acoshq.c math/fmodq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/acosq.c math/frexpq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
+-@BUILD_LIBQUADMATH_TRUE@  math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
+-@BUILD_LIBQUADMATH_TRUE@  printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
+-@BUILD_LIBQUADMATH_TRUE@  printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
+-@BUILD_LIBQUADMATH_TRUE@  printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
+-@BUILD_LIBQUADMATH_TRUE@  printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
+-@BUILD_LIBQUADMATH_TRUE@  strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
++ACLOCAL_AMFLAGS = -I .. -I ../config
++AM_CPPFLAGS = -I $(top_srcdir)/../include
++AM_CFLAGS = $(XCFLAGS)
++gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
++@LIBQUAD_USE_SYMVER_FALSE@version_arg = 
++@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,--version-script=$(srcdir)/quadmath.map
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_arg = -Wl,-M,quadmath.map-sun
++@LIBQUAD_USE_SYMVER_FALSE@version_dep = 
++@LIBQUAD_USE_SYMVER_GNU_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = $(srcdir)/quadmath.map
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@version_dep = quadmath.map-sun
++toolexeclib_LTLIBRARIES = libquadmath.la
++libquadmath_la_LIBADD = 
++libquadmath_la_LDFLAGS = -version-info `grep -v '^\#' $(srcdir)/libtool-version` \
++			 $(version_arg) $(lt_host_flags) -lm
++
++libquadmath_la_DEPENDENCIES = $(version_dep) $(libquadmath_la_LIBADD)
++nodist_libsubinclude_HEADERS = quadmath.h quadmath_weak.h
++libsubincludedir = $(libdir)/gcc/$(target_alias)/$(gcc_version)/include
++libquadmath_la_SOURCES = \
++  math/x2y2m1q.c math/acoshq.c math/fmodq.c \
++  math/acosq.c math/frexpq.c \
++  math/rem_pio2q.c math/asinhq.c math/hypotq.c math/remainderq.c \
++  math/asinq.c math/rintq.c math/atan2q.c math/isinfq.c \
++  math/roundq.c math/atanhq.c math/isnanq.c math/scalblnq.c math/atanq.c \
++  math/j0q.c math/scalbnq.c math/cbrtq.c math/j1q.c math/signbitq.c \
++  math/ceilq.c math/jnq.c math/sincos_table.c math/complex.c math/ldexpq.c \
++  math/sincosq.c math/copysignq.c math/lgammaq.c math/sincosq_kernel.c \
++  math/coshq.c math/llroundq.c math/sinhq.c math/cosq.c math/log10q.c \
++  math/sinq.c math/cosq_kernel.c math/log1pq.c math/sinq_kernel.c \
++  math/erfq.c math/logq.c math/sqrtq.c math/expm1q.c math/lroundq.c \
++  math/tanhq.c math/expq.c math/modfq.c math/tanq.c math/fabsq.c \
++  math/nanq.c math/tgammaq.c math/finiteq.c math/nextafterq.c \
++  math/truncq.c math/floorq.c math/powq.c math/fmaq.c math/logbq.c \
++  math/exp2q.c math/issignalingq.c math/lgammaq_neg.c math/lgammaq_product.c \
++  math/tanq_kernel.c math/tgammaq_product.c math/casinhq_kernel.c \
++  math/cacoshq.c math/cacosq.c math/casinhq.c math/casinq.c \
++  math/catanhq.c math/catanq.c math/cimagq.c math/conjq.c math/cprojq.c \
++  math/crealq.c math/fdimq.c math/fmaxq.c math/fminq.c math/ilogbq.c \
++  math/llrintq.c math/log2q.c math/lrintq.c math/nearbyintq.c math/remquoq.c \
++  math/ccoshq.c math/cexpq.c math/clog10q.c math/clogq.c math/csinq.c \
++  math/csinhq.c math/csqrtq.c math/ctanq.c math/ctanhq.c \
++  printf/addmul_1.c printf/add_n.c printf/cmp.c printf/divrem.c \
++  printf/flt1282mpn.c printf/fpioconst.c printf/lshift.c printf/mul_1.c \
++  printf/mul_n.c printf/mul.c printf/printf_fphex.c printf/printf_fp.c \
++  printf/quadmath-printf.c printf/rshift.c printf/submul_1.c printf/sub_n.c \
++  strtod/strtoflt128.c strtod/mpn2flt128.c strtod/tens_in_limb.c
+ 
+ 
+ # Work around what appears to be a GNU make bug handling MAKEFLAGS
+ # values defined in terms of make variables, as is the case for CC and
+ # friends when we are called from the top level Makefile.
+-@BUILD_LIBQUADMATH_TRUE@AM_MAKEFLAGS = \
+-@BUILD_LIBQUADMATH_TRUE@	"AR_FLAGS=$(AR_FLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CC_FOR_BUILD=$(CC_FOR_BUILD)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CFLAGS=$(CFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CXXFLAGS=$(CXXFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
+-@BUILD_LIBQUADMATH_TRUE@	"INSTALL=$(INSTALL)" \
+-@BUILD_LIBQUADMATH_TRUE@	"INSTALL_DATA=$(INSTALL_DATA)" \
+-@BUILD_LIBQUADMATH_TRUE@	"INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
+-@BUILD_LIBQUADMATH_TRUE@	"INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
+-@BUILD_LIBQUADMATH_TRUE@	"JC1FLAGS=$(JC1FLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"LDFLAGS=$(LDFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"LIBCFLAGS=$(LIBCFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
+-@BUILD_LIBQUADMATH_TRUE@	"MAKE=$(MAKE)" \
+-@BUILD_LIBQUADMATH_TRUE@	"MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"PICFLAG=$(PICFLAG)" \
+-@BUILD_LIBQUADMATH_TRUE@	"PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
+-@BUILD_LIBQUADMATH_TRUE@	"SHELL=$(SHELL)" \
+-@BUILD_LIBQUADMATH_TRUE@	"RUNTESTFLAGS=$(RUNTESTFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"exec_prefix=$(exec_prefix)" \
+-@BUILD_LIBQUADMATH_TRUE@	"infodir=$(infodir)" \
+-@BUILD_LIBQUADMATH_TRUE@	"libdir=$(libdir)" \
+-@BUILD_LIBQUADMATH_TRUE@	"prefix=$(prefix)" \
+-@BUILD_LIBQUADMATH_TRUE@	"includedir=$(includedir)" \
+-@BUILD_LIBQUADMATH_TRUE@	"AR=$(AR)" \
+-@BUILD_LIBQUADMATH_TRUE@	"AS=$(AS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CC=$(CC)" \
+-@BUILD_LIBQUADMATH_TRUE@	"CXX=$(CXX)" \
+-@BUILD_LIBQUADMATH_TRUE@	"LD=$(LD)" \
+-@BUILD_LIBQUADMATH_TRUE@	"LIBCFLAGS=$(LIBCFLAGS)" \
+-@BUILD_LIBQUADMATH_TRUE@	"NM=$(NM)" \
+-@BUILD_LIBQUADMATH_TRUE@	"PICFLAG=$(PICFLAG)" \
+-@BUILD_LIBQUADMATH_TRUE@	"RANLIB=$(RANLIB)" \
+-@BUILD_LIBQUADMATH_TRUE@	"DESTDIR=$(DESTDIR)"
++AM_MAKEFLAGS = \
++	"AR_FLAGS=$(AR_FLAGS)" \
++	"CC_FOR_BUILD=$(CC_FOR_BUILD)" \
++	"CFLAGS=$(CFLAGS)" \
++	"CXXFLAGS=$(CXXFLAGS)" \
++	"CFLAGS_FOR_BUILD=$(CFLAGS_FOR_BUILD)" \
++	"CFLAGS_FOR_TARGET=$(CFLAGS_FOR_TARGET)" \
++	"INSTALL=$(INSTALL)" \
++	"INSTALL_DATA=$(INSTALL_DATA)" \
++	"INSTALL_PROGRAM=$(INSTALL_PROGRAM)" \
++	"INSTALL_SCRIPT=$(INSTALL_SCRIPT)" \
++	"JC1FLAGS=$(JC1FLAGS)" \
++	"LDFLAGS=$(LDFLAGS)" \
++	"LIBCFLAGS=$(LIBCFLAGS)" \
++	"LIBCFLAGS_FOR_TARGET=$(LIBCFLAGS_FOR_TARGET)" \
++	"MAKE=$(MAKE)" \
++	"MAKEINFO=$(MAKEINFO) $(MAKEINFOFLAGS)" \
++	"PICFLAG=$(PICFLAG)" \
++	"PICFLAG_FOR_TARGET=$(PICFLAG_FOR_TARGET)" \
++	"SHELL=$(SHELL)" \
++	"RUNTESTFLAGS=$(RUNTESTFLAGS)" \
++	"exec_prefix=$(exec_prefix)" \
++	"infodir=$(infodir)" \
++	"libdir=$(libdir)" \
++	"prefix=$(prefix)" \
++	"includedir=$(includedir)" \
++	"AR=$(AR)" \
++	"AS=$(AS)" \
++  "CC=$(CC)" \
++	"CXX=$(CXX)" \
++	"LD=$(LD)" \
++	"LIBCFLAGS=$(LIBCFLAGS)" \
++	"NM=$(NM)" \
++	"PICFLAG=$(PICFLAG)" \
++	"RANLIB=$(RANLIB)" \
++	"DESTDIR=$(DESTDIR)"
+ 
+ 
+ # Subdir rules rely on $(FLAGS_TO_PASS)
+-@BUILD_LIBQUADMATH_TRUE@FLAGS_TO_PASS = $(AM_MAKEFLAGS)
+-@BUILD_LIBQUADMATH_TRUE@MAKEOVERRIDES = 
+-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_FALSE@STAMP_GENINSRC = 
++FLAGS_TO_PASS = $(AM_MAKEFLAGS)
++MAKEOVERRIDES = 
++@GENINSRC_FALSE@STAMP_GENINSRC = 
+ 
+ # AM_CONDITIONAL on configure option --generated-files-in-srcdir
+-@BUILD_LIBQUADMATH_TRUE@@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
+-@BUILD_LIBQUADMATH_TRUE@ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
+-@BUILD_INFO_FALSE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = 
++@GENINSRC_TRUE@STAMP_GENINSRC = stamp-geninsrc
++ALL_LOCAL_DEPS = $(STAMP_GENINSRC)
++@BUILD_INFO_FALSE@STAMP_BUILD_INFO = 
+ 
+ # AM_CONDITIONAL on configure check ACX_CHECK_PROG_VER([MAKEINFO])
+-@BUILD_INFO_TRUE@@BUILD_LIBQUADMATH_TRUE@STAMP_BUILD_INFO = stamp-build-info
+-@BUILD_LIBQUADMATH_TRUE@CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
+-@BUILD_LIBQUADMATH_TRUE@MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
++@BUILD_INFO_TRUE@STAMP_BUILD_INFO = stamp-build-info
++CLEANFILES = $(STAMP_GENINSRC) $(STAMP_BUILD_INFO)
++MAINTAINERCLEANFILES = $(srcdir)/libquadmath.info
+ 
+ # Automake Documentation:
+ # If your package has Texinfo files in many directories, you can use the
+@@ -564,8 +564,8 @@ TEXINFO_TEX = ../gcc/doc/include/texinfo.tex
+ 
+ # Defines info, dvi, pdf and html targets
+ MAKEINFOFLAGS = -I $(srcdir)/../gcc/doc/include
+-@BUILD_LIBQUADMATH_FALSE@info_TEXINFOS = 
+-@BUILD_LIBQUADMATH_TRUE@info_TEXINFOS = libquadmath.texi
++info_TEXINFOS = 
++info_TEXINFOS = libquadmath.texi
+ libquadmath_TEXINFOS = libquadmath-vers.texi
+ MULTISRCTOP = 
+ MULTIBUILDTOP = 
+@@ -1187,6 +1187,7 @@ distclean-tags:
+ 	-rm -f cscope.out cscope.in.out cscope.po.out cscope.files
+ check-am: all-am
+ check: check-am
++#all-local
+ all-am: Makefile $(INFO_DEPS) $(LTLIBRARIES) $(HEADERS) config.h \
+ 		all-local
+ installdirs:
+@@ -1425,22 +1426,22 @@ uninstall-am: uninstall-dvi-am uninstall-html-am uninstall-info-am \
+ 
+ .PRECIOUS: Makefile
+ 
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@		$(top_srcdir)/../contrib/make_sunver.pl \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@		$(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	perl $(top_srcdir)/../contrib/make_sunver.pl \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	  $(srcdir)/quadmath.map \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	 `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	   sed 's,\([^/ 	]*\)\.l\([ao]\),.libs/\1.\2,g'` \
+-@BUILD_LIBQUADMATH_TRUE@@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	 > $@ || (rm -f $@ ; exit 1)
+-
+-@BUILD_LIBQUADMATH_TRUE@stamp-geninsrc: libquadmath.info
+-@BUILD_LIBQUADMATH_TRUE@	cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
+-@BUILD_LIBQUADMATH_TRUE@	@touch $@
+-
+-@BUILD_LIBQUADMATH_TRUE@stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
+-@BUILD_LIBQUADMATH_TRUE@	$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
+-@BUILD_LIBQUADMATH_TRUE@	@touch $@
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@quadmath.map-sun : $(srcdir)/quadmath.map \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@		$(top_srcdir)/../contrib/make_sunver.pl \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@		$(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD)
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	perl $(top_srcdir)/../contrib/make_sunver.pl \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	  $(srcdir)/quadmath.map \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	 `echo $(libquadmath_la_OBJECTS) $(libquadmath_la_LIBADD) | \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	   sed 's,\([^/ 	]*\)\.l\([ao]\),.libs/\1.\2,g'` \
++@LIBQUAD_USE_SYMVER_SUN_TRUE@@LIBQUAD_USE_SYMVER_TRUE@	 > $@ || (rm -f $@ ; exit 1)
++
++stamp-geninsrc: libquadmath.info
++	cp -p $(top_builddir)/libquadmath.info $(srcdir)/libquadmath.info
++	@touch $@
++
++stamp-build-info: libquadmath.texi $(libquadmath_TEXINFOS)
++	$(MAKEINFO) $(AM_MAKEINFOFLAGS) $(MAKEINFOFLAGS) -I $(srcdir) -o libquadmath.info $(srcdir)/libquadmath.texi
++	@touch $@
+ 
+ all-local: $(ALL_LOCAL_DEPS)
+ 
+diff --git a/libquadmath/quadmath.h b/libquadmath/quadmath.h
+index 81eb957d2fa..faa5977cbc9 100644
+--- a/libquadmath/quadmath.h
++++ b/libquadmath/quadmath.h
+@@ -27,6 +27,9 @@ Boston, MA 02110-1301, USA.  */
+ extern "C" {
+ #endif
+ 
++#ifdef AARCH64_QUADMATH
++typedef long double __float128;
++#endif
+ /* Define the complex type corresponding to __float128
+    ("_Complex __float128" is not allowed) */
+ #if (!defined(_ARCH_PPC)) || defined(__LONG_DOUBLE_IEEE128__)
+@@ -160,10 +163,9 @@ extern int quadmath_snprintf (char *str, size_t size,
+ #define FLT128_MAX_10_EXP 4932
+ 
+ 
+-#define HUGE_VALQ __builtin_huge_valq()
+ /* The following alternative is valid, but brings the warning:
+    (floating constant exceeds range of ‘__float128’)  */
+-/* #define HUGE_VALQ (__extension__ 0x1.0p32767Q) */
++ #define HUGE_VALQ (__extension__ 0x1.0p32767Q)
+ 
+ #define M_Eq		2.718281828459045235360287471352662498Q  /* e */
+ #define M_LOG2Eq	1.442695040888963407359924681001892137Q  /* log_2 e */
+-- 
+2.21.0.windows.1
+
diff --git a/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch b/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch
new file mode 100644
index 0000000..c5df4d9
--- /dev/null
+++ b/0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch
@@ -0,0 +1,318 @@
+From d1e1ec0cd539f96be5a86b369b8c20b36ce9567f Mon Sep 17 00:00:00 2001
+From: yangyang <yangyang305@huawei.com>
+Date: Thu, 8 Jul 2021 14:38:39 +0800
+Subject: [PATCH 02/13] [Backport] cselim: Extend to check non-trapping for
+ more references
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=54ecfb182bc32140722022c1d9818dee4bdc0e45
+
+If there is a dominating store, a store to the same reference can not be
+trapped.  But previously, it only supports such check on MEM_REFs.
+So this patch extends it to support ARRAY_REFs and COMPONENT_REFs.
+
+This patch also supports a special case: if there is a dominating load of
+local variable without address escape, a store is not trapped, as local
+stack is always writable.  Other loads are ignored for simplicity, as they
+don't help to check if a store can be trapped (the memory may be read-only).
+
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+index ce242ba569b..8ee1850ac63 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+index 90ae36bfce2..9b96875ac7a 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+index c633cbe947d..b2d04119381 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+@@ -13,4 +13,4 @@ int test(int b, int k) {
+     return a.data[0] + a.data[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+index 7cad563128d..8d3c4f7cc6a 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+@@ -16,4 +16,4 @@ int test(int b, int k) {
+     return a.data[0].x + a.data[1].x;
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
+new file mode 100644
+index 00000000000..c35a2afc70b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-7-comp-ref.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-cselim-details" } */
++
++typedef union {
++  int i;
++  float f;
++} U;
++
++int foo(U *u, int b, int i)
++{
++  u->i = 0;
++  if (b)
++    u->i = i;
++  return u->i;
++}
++
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
+new file mode 100644
+index 00000000000..f9e66aefb13
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-8-mem-ref-size.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-cselim-details" } */
++
++int *t;
++
++int f1 (int tt)
++{
++  int *t1 = t;
++  *t1 = -5;
++  if (*t1 < tt)
++    *((unsigned *) t1) = 5;
++  return *t1;
++}
++
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+index 09313716598..a06f339f0bb 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-pre-17.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdump-tree-pre-stats" } */
++/* { dg-options "-O2 -fdump-tree-pre-stats -fno-tree-cselim" } */
+ 
+ typedef union {
+   int i;
+diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
+index b1e0dce93d8..3b5b6907679 100644
+--- a/gcc/tree-ssa-phiopt.c
++++ b/gcc/tree-ssa-phiopt.c
+@@ -1986,26 +1986,33 @@ abs_replacement (basic_block cond_bb, basic_block middle_bb,
+ 
+    ??? We currently are very conservative and assume that a load might
+    trap even if a store doesn't (write-only memory).  This probably is
+-   overly conservative.  */
++   overly conservative.
+ 
+-/* A hash-table of SSA_NAMEs, and in which basic block an MEM_REF
+-   through it was seen, which would constitute a no-trap region for
+-   same accesses.  */
+-struct name_to_bb
++   We currently support a special case that for !TREE_ADDRESSABLE automatic
++   variables, it could ignore whether something is a load or store because the
++   local stack should be always writable.  */
++
++/* A hash-table of references (MEM_REF/ARRAY_REF/COMPONENT_REF), and in which
++   basic block an *_REF through it was seen, which would constitute a
++   no-trap region for same accesses.
++
++   Size is needed to support 2 MEM_REFs of different types, like
++   MEM<double>(s_1) and MEM<long>(s_1), which would compare equal with
++   OEP_ADDRESS_OF.  */
++struct ref_to_bb
+ {
+-  unsigned int ssa_name_ver;
++  tree exp;
++  HOST_WIDE_INT size;
+   unsigned int phase;
+-  bool store;
+-  HOST_WIDE_INT offset, size;
+   basic_block bb;
+ };
+ 
+ /* Hashtable helpers.  */
+ 
+-struct ssa_names_hasher : free_ptr_hash <name_to_bb>
++struct refs_hasher : free_ptr_hash<ref_to_bb>
+ {
+-  static inline hashval_t hash (const name_to_bb *);
+-  static inline bool equal (const name_to_bb *, const name_to_bb *);
++  static inline hashval_t hash (const ref_to_bb *);
++  static inline bool equal (const ref_to_bb *, const ref_to_bb *);
+ };
+ 
+ /* Used for quick clearing of the hash-table when we see calls.
+@@ -2015,28 +2022,29 @@ static unsigned int nt_call_phase;
+ /* The hash function.  */
+ 
+ inline hashval_t
+-ssa_names_hasher::hash (const name_to_bb *n)
++refs_hasher::hash (const ref_to_bb *n)
+ {
+-  return n->ssa_name_ver ^ (((hashval_t) n->store) << 31)
+-         ^ (n->offset << 6) ^ (n->size << 3);
++  inchash::hash hstate;
++  inchash::add_expr (n->exp, hstate, OEP_ADDRESS_OF);
++  hstate.add_hwi (n->size);
++  return hstate.end ();
+ }
+ 
+ /* The equality function of *P1 and *P2.  */
+ 
+ inline bool
+-ssa_names_hasher::equal (const name_to_bb *n1, const name_to_bb *n2)
++refs_hasher::equal (const ref_to_bb *n1, const ref_to_bb *n2)
+ {
+-  return n1->ssa_name_ver == n2->ssa_name_ver
+-         && n1->store == n2->store
+-         && n1->offset == n2->offset
+-         && n1->size == n2->size;
++  return operand_equal_p (n1->exp, n2->exp, OEP_ADDRESS_OF)
++	 && n1->size == n2->size;
+ }
+ 
+ class nontrapping_dom_walker : public dom_walker
+ {
+ public:
+   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
+-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
++    : dom_walker (direction), m_nontrapping (ps), m_seen_refs (128)
++  {}
+ 
+   virtual edge before_dom_children (basic_block);
+   virtual void after_dom_children (basic_block);
+@@ -2053,7 +2061,7 @@ private:
+   hash_set<tree> *m_nontrapping;
+ 
+   /* The hash table for remembering what we've seen.  */
+-  hash_table<ssa_names_hasher> m_seen_ssa_names;
++  hash_table<refs_hasher> m_seen_refs;
+ };
+ 
+ /* Called by walk_dominator_tree, when entering the block BB.  */
+@@ -2102,65 +2110,68 @@ nontrapping_dom_walker::after_dom_children (basic_block bb)
+ }
+ 
+ /* We see the expression EXP in basic block BB.  If it's an interesting
+-   expression (an MEM_REF through an SSA_NAME) possibly insert the
+-   expression into the set NONTRAP or the hash table of seen expressions.
+-   STORE is true if this expression is on the LHS, otherwise it's on
+-   the RHS.  */
++   expression of:
++     1) MEM_REF
++     2) ARRAY_REF
++     3) COMPONENT_REF
++   possibly insert the expression into the set NONTRAP or the hash table
++   of seen expressions.  STORE is true if this expression is on the LHS,
++   otherwise it's on the RHS.  */
+ void
+ nontrapping_dom_walker::add_or_mark_expr (basic_block bb, tree exp, bool store)
+ {
+   HOST_WIDE_INT size;
+ 
+-  if (TREE_CODE (exp) == MEM_REF
+-      && TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME
+-      && tree_fits_shwi_p (TREE_OPERAND (exp, 1))
++  if ((TREE_CODE (exp) == MEM_REF || TREE_CODE (exp) == ARRAY_REF
++       || TREE_CODE (exp) == COMPONENT_REF)
+       && (size = int_size_in_bytes (TREE_TYPE (exp))) > 0)
+     {
+-      tree name = TREE_OPERAND (exp, 0);
+-      struct name_to_bb map;
+-      name_to_bb **slot;
+-      struct name_to_bb *n2bb;
++      struct ref_to_bb map;
++      ref_to_bb **slot;
++      struct ref_to_bb *r2bb;
+       basic_block found_bb = 0;
+ 
+-      /* Try to find the last seen MEM_REF through the same
+-         SSA_NAME, which can trap.  */
+-      map.ssa_name_ver = SSA_NAME_VERSION (name);
+-      map.phase = 0;
+-      map.bb = 0;
+-      map.store = store;
+-      map.offset = tree_to_shwi (TREE_OPERAND (exp, 1));
+-      map.size = size;
++      if (!store)
++	{
++	  tree base = get_base_address (exp);
++	  /* Only record a LOAD of a local variable without address-taken, as
++	     the local stack is always writable.  This allows cselim on a STORE
++	     with a dominating LOAD.  */
++	  if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
++	    return;
++	}
+ 
+-      slot = m_seen_ssa_names.find_slot (&map, INSERT);
+-      n2bb = *slot;
+-      if (n2bb && n2bb->phase >= nt_call_phase)
+-        found_bb = n2bb->bb;
++      /* Try to find the last seen *_REF, which can trap.  */
++      map.exp = exp;
++      map.size = size;
++      slot = m_seen_refs.find_slot (&map, INSERT);
++      r2bb = *slot;
++      if (r2bb && r2bb->phase >= nt_call_phase)
++	found_bb = r2bb->bb;
+ 
+-      /* If we've found a trapping MEM_REF, _and_ it dominates EXP
+-         (it's in a basic block on the path from us to the dominator root)
++      /* If we've found a trapping *_REF, _and_ it dominates EXP
++	 (it's in a basic block on the path from us to the dominator root)
+ 	 then we can't trap.  */
+       if (found_bb && (((size_t)found_bb->aux) & 1) == 1)
+ 	{
+ 	  m_nontrapping->add (exp);
+ 	}
+       else
+-        {
++	{
+ 	  /* EXP might trap, so insert it into the hash table.  */
+-	  if (n2bb)
++	  if (r2bb)
+ 	    {
+-	      n2bb->phase = nt_call_phase;
+-	      n2bb->bb = bb;
++	      r2bb->phase = nt_call_phase;
++	      r2bb->bb = bb;
+ 	    }
+ 	  else
+ 	    {
+-	      n2bb = XNEW (struct name_to_bb);
+-	      n2bb->ssa_name_ver = SSA_NAME_VERSION (name);
+-	      n2bb->phase = nt_call_phase;
+-	      n2bb->bb = bb;
+-	      n2bb->store = store;
+-	      n2bb->offset = map.offset;
+-	      n2bb->size = size;
+-	      *slot = n2bb;
++	      r2bb = XNEW (struct ref_to_bb);
++	      r2bb->phase = nt_call_phase;
++	      r2bb->bb = bb;
++	      r2bb->exp = exp;
++	      r2bb->size = size;
++	      *slot = r2bb;
+ 	    }
+ 	}
+     }
+-- 
+2.21.0.windows.1
+
diff --git a/0003-version-Set-version-to-10.3.1.patch b/0003-version-Set-version-to-10.3.1.patch
new file mode 100644
index 0000000..d069bdd
--- /dev/null
+++ b/0003-version-Set-version-to-10.3.1.patch
@@ -0,0 +1,31 @@
+From 309f459021a3681d728e5cf644a288ecf2b95175 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Mon, 12 Jul 2021 09:42:11 +0800
+Subject: [PATCH 03/13] [version] Set version to 10.3.1
+
+Set version to 10.3.1 and clear DATESTAMP_s.
+
+diff --git a/gcc/BASE-VER b/gcc/BASE-VER
+index 0719d810258..a9368325816 100644
+--- a/gcc/BASE-VER
++++ b/gcc/BASE-VER
+@@ -1 +1 @@
+-10.3.0
++10.3.1
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 646db219460..fdc2857d44a 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -885,8 +885,7 @@ PATCHLEVEL_c := \
+ # significant - do not remove it.
+ BASEVER_s   := "\"$(BASEVER_c)\""
+ DEVPHASE_s  := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\""
+-DATESTAMP_s := \
+-  "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
++DATESTAMP_s := "\"\""
+ PKGVERSION_s:= "\"@PKGVERSION@\""
+ BUGURL_s    := "\"@REPORT_BUGS_TO@\""
+ 
+-- 
+2.21.0.windows.1
+
diff --git a/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch b/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch
new file mode 100644
index 0000000..54b4116
--- /dev/null
+++ b/0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch
@@ -0,0 +1,138 @@
+From bdb0f40cea4aa1a92ead381b645363ae0571c065 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Mon, 12 Jul 2021 10:36:15 +0800
+Subject: [PATCH 04/13] [Backport]tree-optimization: Avoid issueing loads in SM
+ when possible
+
+Reference:https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9e1ea10e657af9fb02fafecf1a600740fd34409
+
+Currently store-motion emits a load of the value in the loop
+preheader even when the original loop does not contain any read
+of the reference.  This avoids doing this.  In the conditional
+store-motion case we need to mark the sunk stores with no-warning
+since the control dependence is too tricky to figure out for
+the uninit warning.
+
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
+new file mode 100755
+index 00000000000..884f905148f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */
++
++void foo(int *);
++void f2(int dst[3], int R)
++{
++  int i, inter[2];
++
++  for (i = 1; i < R; i++) {
++    if (i & 8)
++      {
++	inter[0] = 1;
++	inter[1] = 1;
++      }
++  }
++
++  foo(inter);
++}
++
++/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */
++/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */
+diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+index abd5f702b91..b3fd1647fbd 100644
+--- a/gcc/tree-ssa-loop-im.c
++++ b/gcc/tree-ssa-loop-im.c
+@@ -127,6 +127,8 @@ public:
+ 
+   bitmap stored;		/* The set of loops in that this memory location
+ 				   is stored to.  */
++  bitmap loaded;		 /* The set of loops in that this memory location
++				   is loaded from.  */
+   vec<mem_ref_loc>		accesses_in_loop;
+ 				/* The locations of the accesses.  Vector
+ 				   indexed by the loop number.  */
+@@ -1395,6 +1397,7 @@ mem_ref_alloc (ao_ref *mem, unsigned hash, unsigned id)
+   ref->ref_decomposed = false;
+   ref->hash = hash;
+   ref->stored = NULL;
++  ref->loaded = NULL;
+   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
+   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
+   ref->accesses_in_loop.create (1);
+@@ -1435,6 +1438,27 @@ mark_ref_stored (im_mem_ref *ref, class loop *loop)
+     loop = loop_outer (loop);
+ }
+ 
++/* Set the LOOP bit in REF loaded bitmap and allocate that if
++   necessary.  Return whether a bit was changed.  */
++
++static bool
++set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
++{
++  if (!ref->loaded)
++    ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
++  return bitmap_set_bit (ref->loaded, loop->num);
++}
++
++/* Marks reference REF as loaded in LOOP.  */
++
++static void
++mark_ref_loaded (im_mem_ref *ref, class loop *loop)
++{
++  while (loop != current_loops->tree_root
++        && set_ref_loaded_in_loop (ref, loop))
++    loop = loop_outer (loop);
++}
++
+ /* Gathers memory references in statement STMT in LOOP, storing the
+    information about them in the memory_accesses structure.  Marks
+    the vops accessed through unrecognized statements there as
+@@ -1571,6 +1595,8 @@ gather_mem_refs_stmt (class loop *loop, gimple *stmt)
+       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
+       mark_ref_stored (ref, loop);
+     }
++  else
++    mark_ref_loaded (ref, loop);
+   init_lim_data (stmt)->ref = ref->id;
+   return;
+ }
+@@ -1968,6 +1994,8 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
+   gsi = gsi_start_bb (then_bb);
+   /* Insert actual store.  */
+   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
++  /* Make sure to not warn about maybe-uninit uses of tmp_var here.  */
++  gimple_set_no_warning (stmt, true);
+   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+ 
+   edge e1 = single_succ_edge (new_bb);
+@@ -2115,14 +2143,17 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
+      by move_computations after all dependencies.  */
+   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
+ 
+-  /* FIXME/TODO: For the multi-threaded variant, we could avoid this
+-     load altogether, since the store is predicated by a flag.  We
+-     could, do the load only if it was originally in the loop.  */
+-  load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+-  lim_data = init_lim_data (load);
+-  lim_data->max_loop = loop;
+-  lim_data->tgt_loop = loop;
+-  gsi_insert_before (&gsi, load, GSI_SAME_STMT);
++  /* Avoid doing a load if there was no load of the ref in the loop.
++     Esp. when the ref is not always stored we cannot optimize it
++     away later.  */
++  if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
++    {
++      load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
++      lim_data = init_lim_data (load);
++      lim_data->max_loop = loop;
++      lim_data->tgt_loop = loop;
++      gsi_insert_before (&gsi, load, GSI_SAME_STMT);
++    }
+ 
+   if (multi_threaded_model_p)
+     {
+-- 
+2.21.0.windows.1
+
diff --git a/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch b/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch
new file mode 100644
index 0000000..0ab01ea
--- /dev/null
+++ b/0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch
@@ -0,0 +1,66 @@
+From dc238e97a75835231939e77e8568ccd9bc5187d5 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Mon, 12 Jul 2021 10:46:16 +0800
+Subject: [PATCH 05/13] [Backport]tree-optimization: Fix load eliding in SM
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0424a5ece5307cc22bbc0fe97edf4707d7a798ed
+
+This fixes the case of not using the multithreaded model when
+only conditionally storing to the destination.  We cannot elide
+the load in this case.
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c
+new file mode 100755
+index 00000000000..6182d77b3cd
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr94949.c
+@@ -0,0 +1,17 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fallow-store-data-races" } */
++
++static int x = 1;
++static volatile int y = -1;
++int
++main()
++{
++  for (int i = 0; i < 128; ++i)
++    {
++      if (i == y)
++	x = i;
++    }
++  if (x != 1)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+index b3fd1647fbd..8c33735b1fa 100644
+--- a/gcc/tree-ssa-loop-im.c
++++ b/gcc/tree-ssa-loop-im.c
+@@ -2128,9 +2128,9 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
+   fmt_data.orig_loop = loop;
+   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
+ 
++  bool always_stored = ref_always_accessed_p (loop, ref, true);
+   if (bb_in_transaction (loop_preheader_edge (loop)->src)
+-      || (! flag_store_data_races
+-	  && ! ref_always_accessed_p (loop, ref, true)))
++      || (! flag_store_data_races && ! always_stored))
+     multi_threaded_model_p = true;
+ 
+   if (multi_threaded_model_p)
+@@ -2145,8 +2145,10 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
+ 
+   /* Avoid doing a load if there was no load of the ref in the loop.
+      Esp. when the ref is not always stored we cannot optimize it
+-     away later.  */
+-  if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
++     away later.  But when it is not always stored we must use a conditional
++     store then.  */
++  if ((!always_stored && !multi_threaded_model_p)
++      || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
+     {
+       load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+       lim_data = init_lim_data (load);
+-- 
+2.21.0.windows.1
+
diff --git a/0006-simdmath-Enable-simdmath-on-kunpeng.patch b/0006-simdmath-Enable-simdmath-on-kunpeng.patch
new file mode 100644
index 0000000..9d7bb57
--- /dev/null
+++ b/0006-simdmath-Enable-simdmath-on-kunpeng.patch
@@ -0,0 +1,289 @@
+From cfd6920125f7968f0c1f5cb225f9fbd5bc8988b9 Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Tue, 13 Jul 2021 15:26:54 +0800
+Subject: [PATCH 06/13] [simdmath] Enable simdmath on kunpeng
+
+This enable simd math function supported by libmathlib on fortran/c/c++.
+Use -fsimdmath to turn on the generation of simdmath function. The
+supported functions can be found in simdmath.h. Add more simd declaration
+if you need more kinds of math functions. -msimdmath-64 is used to turn
+on 64-bit simd math functions which is not supported by libmathlib.
+Therefore, this option is default to off.
+
+diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
+index c51d6d34726..dc1a8984871 100644
+--- a/gcc/c-family/c-opts.c
++++ b/gcc/c-family/c-opts.c
+@@ -780,6 +780,10 @@ c_common_post_options (const char **pfilename)
+   if (cpp_opts->deps.style == DEPS_NONE)
+     check_deps_environment_vars ();
+ 
++  if (flag_simdmath)
++    {
++      defer_opt (OPT_include, "simdmath.h");
++    }
+   handle_deferred_opts ();
+ 
+   sanitize_cpp_opts ();
+diff --git a/gcc/common.opt b/gcc/common.opt
+index ec5235c3a41..8eb05570418 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1977,6 +1977,10 @@ fmath-errno
+ Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined
+ Set errno after built-in math functions.
+ 
++fsimdmath
++Common Report Var(flag_simdmath) Init(0) Optimization
++Enable auto-vectorize math functions for mathlib.  This option will turn on -fno-math-errno and -fopenmp-simd.
++
+ fmax-errors=
+ Common Joined RejectNegative UInteger Var(flag_max_errors)
+ -fmax-errors=<number>	Maximum number of errors to report.
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 9b400c49ac6..79dc8f186f4 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -23077,8 +23077,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
+   elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+   if (clonei->simdlen == 0)
+     {
+-      count = 2;
+-      vec_bits = (num == 0 ? 64 : 128);
++      /* Currently mathlib or sleef hasn't provide function for V2SF mode
++      simdclone of single precision functions. (e.g._ZCVnN2v_expf)
++      Therefore this mode is disabled by default to avoid link error.
++      Use -msimdmath-64 option to enable this mode.  */
++      count = flag_simdmath_64 ? 2 : 1;
++      vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
+       clonei->simdlen = vec_bits / elt_bits;
+     }
+   else
+diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+index 1b3d942e0f5..4539156d6f4 100644
+--- a/gcc/config/aarch64/aarch64.opt
++++ b/gcc/config/aarch64/aarch64.opt
+@@ -190,6 +190,12 @@ precision of square root results to about 16 bits for
+ single precision and to 32 bits for double precision.
+ If enabled, it implies -mlow-precision-recip-sqrt.
+ 
++msimdmath-64
++Target Var(flag_simdmath_64) Optimization
++Allow compiler to generate V2SF 64 bits simdclone of math functions,
++which is not currently supported in mathlib or sleef.
++Therefore this option is disabled by default.
++
+ mlow-precision-div
+ Target Var(flag_mlow_precision_div) Optimization
+ Enable the division approximation.  Enabling this reduces
+diff --git a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
+index 6f93508f934..42fd5a8be1e 100644
+--- a/gcc/fortran/scanner.c
++++ b/gcc/fortran/scanner.c
+@@ -2737,6 +2737,10 @@ gfc_new_file (void)
+       && !load_file (flag_pre_include, NULL, false))
+     exit (FATAL_EXIT_CODE);
+ 
++  if (flag_simdmath
++      && !load_file ("simdmath_f.h", NULL, false))
++    exit (FATAL_EXIT_CODE);
++
+   if (gfc_cpp_enabled ())
+     {
+       result = gfc_cpp_preprocess (gfc_source_file);
+diff --git a/gcc/opts.c b/gcc/opts.c
+index 73162528938..e31aa560564 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -189,6 +189,7 @@ static const char use_diagnosed_msg[] = N_("Uses of this option are diagnosed.")
+ 
+ typedef char *char_p; /* For DEF_VEC_P.  */
+ 
++static void set_simdmath_flags (struct gcc_options *opts, int set);
+ static void set_debug_level (enum debug_info_type type, int extended,
+ 			     const char *arg, struct gcc_options *opts,
+ 			     struct gcc_options *opts_set,
+@@ -2469,6 +2470,10 @@ common_handle_option (struct gcc_options *opts,
+       dc->min_margin_width = value;
+       break;
+ 
++    case OPT_fsimdmath:
++      set_simdmath_flags (opts, value);
++      break;
++
+     case OPT_fdump_:
+       /* Deferred.  */
+       break;
+@@ -2847,6 +2852,18 @@ common_handle_option (struct gcc_options *opts,
+   return true;
+ }
+ 
++/* The following routines are used to set -fno-math-errno and -fopenmp-simd
++   to enable vector mathlib.  */
++static void
++set_simdmath_flags (struct gcc_options *opts, int set)
++{
++  if (set)
++    {
++      opts->x_flag_errno_math = 0;
++      opts->x_flag_openmp_simd = 1;
++    }
++}
++
+ /* Used to set the level of strict aliasing warnings in OPTS,
+    when no level is specified (i.e., when -Wstrict-aliasing, and not
+    -Wstrict-aliasing=level was given).
+diff --git a/libgomp/Makefile.am b/libgomp/Makefile.am
+index 669b9e4defd..0d9cc96481c 100644
+--- a/libgomp/Makefile.am
++++ b/libgomp/Makefile.am
+@@ -74,10 +74,10 @@ libgomp_la_SOURCES += openacc.f90
+ endif
+ 
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
+ if USE_FORTRAN
+ nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+-	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
++	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
+ endif
+ 
+ LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
+diff --git a/libgomp/Makefile.in b/libgomp/Makefile.in
+index ae5d9d54705..dd4b334895e 100644
+--- a/libgomp/Makefile.in
++++ b/libgomp/Makefile.in
+@@ -148,7 +148,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \
+  configure.lineno config.status.lineno
+ mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
+ CONFIG_HEADER = config.h
+-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
+ 	libgomp.spec
+ CONFIG_CLEAN_VPATH_FILES =
+ am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+@@ -609,9 +609,9 @@ libgomp_la_SOURCES = alloc.c atomic.c barrier.c critical.c env.c \
+ @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBADD = libgomp.la $(PLUGIN_GCN_LIBS)
+ @PLUGIN_GCN_TRUE@libgomp_plugin_gcn_la_LIBTOOLFLAGS = --tag=disable-static
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h
++nodist_libsubinclude_HEADERS = omp.h openacc.h acc_prof.h simdmath.h
+ @USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+-@USE_FORTRAN_TRUE@	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
++@USE_FORTRAN_TRUE@	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod simdmath_f.h
+ 
+ LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
+ LINK = $(LIBTOOL) --tag CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=link \
+@@ -702,6 +702,10 @@ omp.h: $(top_builddir)/config.status $(srcdir)/omp.h.in
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
+ omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in
++	cd $(top_builddir) && $(SHELL) ./config.status $@
++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in
++	cd $(top_builddir) && $(SHELL) ./config.status $@
+ omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
+ libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in
+diff --git a/libgomp/configure b/libgomp/configure
+index 5240f7e9d39..b03036c2738 100644
+--- a/libgomp/configure
++++ b/libgomp/configure
+@@ -17050,7 +17050,7 @@ fi
+ 
+ 
+ 
+-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h"
+ 
+ ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
+ 
+@@ -18205,6 +18205,8 @@ do
+     "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
+     "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;;
+     "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;;
++    "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;;
++    "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;;
+     "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;;
+     "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;;
+     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+diff --git a/libgomp/configure.ac b/libgomp/configure.ac
+index ef5d293c31e..569c2065a66 100644
+--- a/libgomp/configure.ac
++++ b/libgomp/configure.ac
+@@ -433,7 +433,7 @@ CFLAGS="$save_CFLAGS"
+ # Determine what GCC version number to use in filesystem paths.
+ GCC_BASE_VER
+ 
+-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h)
+ AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
+ AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
+ AC_CONFIG_FILES([testsuite/libgomp-site-extra.exp])
+diff --git a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in
+new file mode 100644
+index 00000000000..ab91a4ec317
+--- /dev/null
++++ b/libgomp/simdmath.h.in
+@@ -0,0 +1,40 @@
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#pragma omp declare simd simdlen(2) notinbranch
++double cos (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float cosf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double sin (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float sinf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double exp (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float expf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double log (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float logf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double pow (double x, double y);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float powf (float x, float y);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float exp2f (float x);
++
++#ifdef __cplusplus
++} // extern "C"
++#endif
+diff --git a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in
+new file mode 100644
+index 00000000000..550595015db
+--- /dev/null
++++ b/libgomp/simdmath_f.h.in
+@@ -0,0 +1,11 @@
++!GCC$ builtin (cos) attributes simd (notinbranch)
++!GCC$ builtin (cosf) attributes simd (notinbranch)
++!GCC$ builtin (sin) attributes simd (notinbranch)
++!GCC$ builtin (sinf) attributes simd (notinbranch)
++!GCC$ builtin (exp) attributes simd (notinbranch)
++!GCC$ builtin (expf) attributes simd (notinbranch)
++!GCC$ builtin (exp2f) attributes simd (notinbranch)
++!GCC$ builtin (log) attributes simd (notinbranch)
++!GCC$ builtin (logf) attributes simd (notinbranch)
++!GCC$ builtin (pow) attributes simd (notinbranch)
++!GCC$ builtin (powf) attributes simd (notinbranch)
+-- 
+2.21.0.windows.1
+
diff --git a/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch b/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch
new file mode 100644
index 0000000..ffe5327
--- /dev/null
+++ b/0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch
@@ -0,0 +1,68 @@
+From 07033bcc5b9e4c03846cd84b4587cd493fcf7d53 Mon Sep 17 00:00:00 2001
+From: zhoukaipeng <zhoukaipeng3@huawei.com>
+Date: Wed, 14 Jul 2021 11:24:06 +0800
+Subject: [PATCH 07/13] [Vect] Enable skipping vectorization on reduction
+ chains
+
+Sometimes either vectorization on reduction chains or reductions is
+possible. But the latter is better. The option "-ftree-vect-analyze
+-slp-group" skips the former.
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 8eb05570418..55d4eb5a351 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2968,6 +2968,10 @@ ftree-slp-vectorize
+ Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+ 
++ftree-vect-analyze-slp-group
++Common Report Var(flag_tree_slp_group) Init(0)
++Disable SLP vectorization for reduction chain on tree.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=[unlimited|dynamic|cheap]	Specifies the cost model for vectorization.
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
+new file mode 100644
+index 00000000000..913f1ef28df
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */
++void f(double *a, double *res, double m) {
++  double res1, res0;
++  res1 = 0;
++  res0 = 0;
++  for (int i = 0; i < 1000; i+=8) {
++    res0 += a[i] * m;
++    res1 += a[i+1] * m;
++    res0 += a[i+2] * m;
++    res1 += a[i+3] * m;
++    res0 += a[i+4] * m;
++    res1 += a[i+5] * m;
++    res0 += a[i+6] * m;
++    res1 += a[i+7] * m;
++  }
++  res[0] += res0;
++  res[1] += res1;
++}
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+index adc579ff544..476b3237054 100644
+--- a/gcc/tree-vect-slp.c
++++ b/gcc/tree-vect-slp.c
+@@ -2480,7 +2480,8 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
+ 	{
+ 	  /* Find SLP sequences starting from reduction chains.  */
+ 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
+-	    if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
++	    if (flag_tree_slp_group
++		|| ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ 					     max_tree_size))
+ 	      {
+ 		/* Dissolve reduction chain group.  */
+-- 
+2.21.0.windows.1
+
diff --git a/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch b/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch
new file mode 100644
index 0000000..9b8c4f8
--- /dev/null
+++ b/0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch
@@ -0,0 +1,97 @@
+From 79d1ed2d7f166a498662f6111a4defc55f0061c7 Mon Sep 17 00:00:00 2001
+From: yangyang <yangyang305@huawei.com>
+Date: Thu, 15 Jul 2021 09:27:27 +0800
+Subject: [PATCH 08/13] [Backport]tree-optimization: Add checks to avoid
+ spoiling if-conversion
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=33d114f570b4a3583421c700396fd5945acebc28
+
+Add some checks in pass_splits_paths, so that pass_split_paths can recognize
+the missed if-conversion opportunity and do not duplicate the corresponding
+block.
+
+diff --git a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c
+index b3efd43c7ef..9c32da76369 100644
+--- a/gcc/gimple-ssa-split-paths.c
++++ b/gcc/gimple-ssa-split-paths.c
+@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "gimple-ssa.h"
+ #include "tree-phinodes.h"
+ #include "ssa-iterators.h"
++#include "fold-const.h"
+ 
+ /* Given LATCH, the latch block in a loop, see if the shape of the
+    path reaching LATCH is suitable for being split by duplication.
+@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb)
+ 	}
+     }
+ 
++  /* Canonicalize the form.  */
++  if (single_pred_p (pred1) && single_pred (pred1) == pred2
++      && num_stmts_in_pred1 == 0)
++    std::swap (pred1, pred2);
++
++  /* This is meant to catch another kind of cases that are likely opportunities
++     for if-conversion.  After canonicalizing, PRED2 must be an empty block and
++     PRED1 must be the only predecessor of PRED2.  Moreover, PRED1 is supposed
++     to end with a cond_stmt which has the same args with the PHI in BB.  */
++  if (single_pred_p (pred2) && single_pred (pred2) == pred1
++      && num_stmts_in_pred2 == 0)
++    {
++      gimple *cond_stmt = last_stmt (pred1);
++      if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND)
++	{
++	  tree lhs = gimple_cond_lhs (cond_stmt);
++	  tree rhs = gimple_cond_rhs (cond_stmt);
++
++	  gimple_stmt_iterator gsi;
++	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++	    {
++	      gimple *phi = gsi_stmt (gsi);
++	      if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs)
++		   && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs))
++		  || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs)
++		      && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs))))
++		{
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    fprintf (dump_file,
++			     "Block %d appears to be optimized to a join "
++			     "point for if-convertable half-diamond.\n",
++			     bb->index);
++		  return false;
++		}
++	    }
++	}
++    }
++
+   /* If the joiner has no PHIs with useful uses there is zero chance
+      of CSE/DCE/jump-threading possibilities exposed by duplicating it.  */
+   bool found_useful_phi = false;
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+new file mode 100644
+index 00000000000..19a130d9bf1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */
++
++double
++foo(double *d1, double *d2, double *d3, int num, double *ip)
++{
++  double dmax[3];
++
++  for (int i = 0; i < num; i++) {
++    dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i];
++    dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i];
++    dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i];
++    ip[i] = dmax[2];
++  }
++
++  return dmax[0] + dmax[1] + dmax[2];
++}
++
++/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
+-- 
+2.21.0.windows.1
+
diff --git a/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
new file mode 100644
index 0000000..434c669
--- /dev/null
+++ b/0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
@@ -0,0 +1,141 @@
+From 7bc78d0ab13c37e2b11adb385d9916181ec4cc20 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Thu, 15 Jul 2021 09:04:55 +0800
+Subject: [PATCH 09/13] [Backport]expand: Simplify removing subregs when
+ expanding a copy [PR95254]
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9a182ef9ee011935d827ab5c6c9a7cd8e22257d8
+
+In rtl expand, if we have a copy that matches one of the following patterns:
+  (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
+  (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
+  (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
+  (set (subreg:M1 (reg:M2 ...)) (constant C))
+where mode M1 is equal in size to M2, try to detect whether the mode change
+involves an implicit round trip through memory.  If so, see if we can avoid
+that by removing the subregs and doing the move in mode M2 instead.
+
+diff --git a/gcc/expr.c b/gcc/expr.c
+index 991b26f3341..d66fdd4e93d 100644
+--- a/gcc/expr.c
++++ b/gcc/expr.c
+@@ -3814,6 +3814,78 @@ emit_move_insn (rtx x, rtx y)
+   gcc_assert (mode != BLKmode
+ 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ 
++  /* If we have a copy that looks like one of the following patterns:
++       (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
++       (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
++       (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
++       (set (subreg:M1 (reg:M2 ...)) (constant C))
++     where mode M1 is equal in size to M2, try to detect whether the
++     mode change involves an implicit round trip through memory.
++     If so, see if we can avoid that by removing the subregs and
++     doing the move in mode M2 instead.  */
++
++  rtx x_inner = NULL_RTX;
++  rtx y_inner = NULL_RTX;
++
++#define CANDIDATE_SUBREG_P(subreg) \
++  (REG_P (SUBREG_REG (subreg)) \
++   && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
++               GET_MODE_SIZE (GET_MODE (subreg))) \
++   && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
++      != CODE_FOR_nothing)
++
++#define CANDIDATE_MEM_P(innermode, mem) \
++  (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
++   && !push_operand ((mem), GET_MODE (mem))                              \
++   /* Not a candiate if innermode requires too much alignment.  */       \
++   && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode)                 \
++       || targetm.slow_unaligned_access (GET_MODE (mem),                 \
++                                        MEM_ALIGN (mem))                 \
++       || !targetm.slow_unaligned_access ((innermode),                   \
++                                         MEM_ALIGN (mem))))
++
++  if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
++    x_inner = SUBREG_REG (x);
++
++  if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
++    y_inner = SUBREG_REG (y);
++
++  if (x_inner != NULL_RTX
++      && y_inner != NULL_RTX
++      && GET_MODE (x_inner) == GET_MODE (y_inner)
++      && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
++    {
++      x = x_inner;
++      y = y_inner;
++      mode = GET_MODE (x_inner);
++    }
++  else if (x_inner != NULL_RTX
++          && MEM_P (y)
++          && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
++    {
++      x = x_inner;
++      y = adjust_address (y, GET_MODE (x_inner), 0);
++      mode = GET_MODE (x_inner);
++    }
++  else if (y_inner != NULL_RTX
++          && MEM_P (x)
++          && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
++    {
++      x = adjust_address (x, GET_MODE (y_inner), 0);
++      y = y_inner;
++      mode = GET_MODE (y_inner);
++    }
++  else if (x_inner != NULL_RTX
++          && CONSTANT_P (y)
++          && !targetm.can_change_mode_class (GET_MODE (x_inner),
++                                             mode, ALL_REGS)
++          && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
++    {
++      x = x_inner;
++      y = y_inner;
++      mode = GET_MODE (x_inner);
++    }
++
+   if (CONSTANT_P (y))
+     {
+       if (optimize
+diff --git a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
+new file mode 100644
+index 00000000000..10bfc868197
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
++
++typedef short __attribute__((vector_size (8))) v4hi;
++
++typedef union U4HI { v4hi v; short a[4]; } u4hi;
++
++short b[4];
++
++void pass_v4hi (v4hi v)
++{
++    int i;
++    u4hi u;
++    u.v = v;
++    for (i = 0; i < 4; i++)
++      b[i] = u.a[i];
++};
++
++/* { dg-final { scan-assembler-not "ptrue" } } */
+diff --git a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
+index 518071bdd86..398cdba5d5f 100644
+--- a/gcc/testsuite/gcc.target/i386/pr67609.c
++++ b/gcc/testsuite/gcc.target/i386/pr67609.c
+@@ -1,7 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -msse2" } */
+ /* { dg-require-effective-target lp64 } */
+-/* { dg-final { scan-assembler "movdqa" } } */
++/* { dg-final { scan-assembler "movq\t%xmm0" } } */
+ 
+ #include <emmintrin.h>
+ __m128d reg;
+-- 
+2.21.0.windows.1
+
diff --git a/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch b/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch
new file mode 100644
index 0000000..e5dbbf2
--- /dev/null
+++ b/0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch
@@ -0,0 +1,98 @@
+From b8b3e29e4cceae2bab6e0774b1af994dbe713d97 Mon Sep 17 00:00:00 2001
+From: zhanghaijian <z.zhanghaijian@huawei.com>
+Date: Thu, 15 Jul 2021 09:13:11 +0800
+Subject: [PATCH 10/13] [Backport]tree-optimization/94963 - avoid bogus uninit
+ warning with store-motion
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=371905d12259c180efb9b1f1b5716e969feb60f9
+
+Eliding the load for store-motion causes an uninitialized variable
+flowing into the loop, conditionally initialized and used.  The
+uninit warning cannot relate the flag used to guard the initialization
+and use with the actual initialization so the following robustifies
+the previous approach of marking the conditional store as not to
+be warned on by instead initializing the variable on loop entry
+from an uninitialized variable we mark as not to be warned for.
+
+diff --git a/gcc/testsuite/gcc.dg/pr94963.c b/gcc/testsuite/gcc.dg/pr94963.c
+new file mode 100644
+index 00000000000..09c0524fb3a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/pr94963.c
+@@ -0,0 +1,35 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -Wall" } */
++
++typedef struct
++{
++  int p1;
++  int p2;
++  int p3;
++} P;
++struct S
++{
++  int field;
++};
++extern int v2;
++extern void foo (struct S *map);
++static struct S var;
++const P *pv;
++int ps;
++void
++f (void)
++{
++  if (pv != 0)
++    for (const P *ph = pv; ph < &pv[ps]; ++ph)
++      switch (ph->p1)
++       {
++       case 1:
++       v2 = ph->p2;
++       break;
++       case 2:
++       var.field = ph->p3;
++       break;
++       }
++  if (var.field != 0) /* { dg-bogus "uninitialized" } */
++    foo (&var);
++}
+diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+index 8c33735b1fa..d74a46ef352 100644
+--- a/gcc/tree-ssa-loop-im.c
++++ b/gcc/tree-ssa-loop-im.c
+@@ -1994,8 +1994,6 @@ execute_sm_if_changed (edge ex, tree mem, tree tmp_var, tree flag,
+   gsi = gsi_start_bb (then_bb);
+   /* Insert actual store.  */
+   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
+-  /* Make sure to not warn about maybe-uninit uses of tmp_var here.  */
+-  gimple_set_no_warning (stmt, true);
+   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+ 
+   edge e1 = single_succ_edge (new_bb);
+@@ -2149,13 +2147,19 @@ execute_sm (class loop *loop, vec<edge> exits, im_mem_ref *ref)
+      store then.  */
+   if ((!always_stored && !multi_threaded_model_p)
+       || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
++    load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
++  else
+     {
+-      load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+-      lim_data = init_lim_data (load);
+-      lim_data->max_loop = loop;
+-      lim_data->tgt_loop = loop;
+-      gsi_insert_before (&gsi, load, GSI_SAME_STMT);
++      /* If not emitting a load mark the uninitialized state on the
++        loop entry as not to be warned for.  */
++      tree uninit = create_tmp_reg (TREE_TYPE (tmp_var));
++      TREE_NO_WARNING (uninit) = 1;
++      load = gimple_build_assign (tmp_var, uninit);
+     }
++  lim_data = init_lim_data (load);
++  lim_data->max_loop = loop;
++  lim_data->tgt_loop = loop;
++  gsi_insert_before (&gsi, load, GSI_SAME_STMT);
+ 
+   if (multi_threaded_model_p)
+     {
+-- 
+2.21.0.windows.1
+
diff --git a/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch b/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch
new file mode 100644
index 0000000..b9f642c
--- /dev/null
+++ b/0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch
@@ -0,0 +1,23 @@
+From 78cf3b95d7b895cfe8d6f1c2a48ebc08a662eef0 Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Sat, 17 Jul 2021 16:38:10 +0800
+Subject: [PATCH 11/13] [simdmath] Enable 64-bits simd when test
+ simd_pcs_attribute-3
+
+Enable 64-bits simd when test simd_pcs_attribute-3. The 64-bits simd
+is default to off without specify the -msimdmath-64.
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+index 95f6a6803e8..e0e0efa9d7e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
++++ b/gcc/testsuite/gcc.target/aarch64/simd_pcs_attribute-3.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-Ofast" } */
++/* { dg-options "-Ofast -msimdmath-64" } */
+ 
+ __attribute__ ((__simd__))
+ __attribute__ ((__nothrow__ , __leaf__ , __const__))
+-- 
+2.21.0.windows.1
+
diff --git a/0012-fp-model-Enable-fp-model-on-kunpeng.patch b/0012-fp-model-Enable-fp-model-on-kunpeng.patch
new file mode 100644
index 0000000..3e99f88
--- /dev/null
+++ b/0012-fp-model-Enable-fp-model-on-kunpeng.patch
@@ -0,0 +1,397 @@
+From 26ea42402eede6a441c9d74ec6b6086e5bf0bf79 Mon Sep 17 00:00:00 2001
+From: bule <bule1@huawei.com>
+Date: Mon, 19 Jul 2021 12:04:08 +0800
+Subject: [PATCH 12/13] [fp-model] Enable fp-model on kunpeng
+
+Enable fp-model options on kunpeng for precision control.
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 55d4eb5a351..79c9ef6615b 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1545,6 +1545,32 @@ ffp-int-builtin-inexact
+ Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization
+ Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
+ 
++fftz
++Common Report Var(flag_ftz) Optimization
++Control fpcr register for flush to zero.
++
++fp-model=
++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
++-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control.
++
++Enum
++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
++
++EnumValue
++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
++
++EnumValue
++Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
++
++EnumValue
++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
++
++EnumValue
++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
++
++EnumValue
++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
++
+ ; Nonzero means don't put addresses of constant functions in registers.
+ ; Used for compiling the Unix kernel, where strange substitutions are
+ ; done on the assembly output.
+diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
+index e587e2e9ad6..331b12c8702 100644
+--- a/gcc/config/aarch64/aarch64-linux.h
++++ b/gcc/config/aarch64/aarch64-linux.h
+@@ -50,7 +50,8 @@
+ #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
+ 
+ #define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
++  "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
++  %{!fno-ftz:crtfastmath.o%s}}"
+ 
+ #undef ENDFILE_SPEC
+ #define ENDFILE_SPEC   \
+diff --git a/gcc/flag-types.h b/gcc/flag-types.h
+index 852ea76eaa2..5832298251e 100644
+--- a/gcc/flag-types.h
++++ b/gcc/flag-types.h
+@@ -223,6 +223,15 @@ enum fp_contract_mode {
+   FP_CONTRACT_FAST = 2
+ };
+ 
++/* Floating-point precision mode.  */
++enum fp_model {
++  FP_MODEL_NORMAL = 0,
++  FP_MODEL_FAST = 1,
++  FP_MODEL_PRECISE = 2,
++  FP_MODEL_EXCEPT = 3,
++  FP_MODEL_STRICT = 4
++};
++
+ /* Scalar storage order kind.  */
+ enum scalar_storage_order_kind {
+   SSO_NATIVE = 0,
+diff --git a/gcc/fortran/options.c b/gcc/fortran/options.c
+index 4cc8a908417..c59dcf63781 100644
+--- a/gcc/fortran/options.c
++++ b/gcc/fortran/options.c
+@@ -250,6 +250,7 @@ form_from_filename (const char *filename)
+   return f_form;
+ }
+ 
++static void gfc_handle_fpe_option (const char *arg, bool trap);
+ 
+ /* Finalize commandline options.  */
+ 
+@@ -277,6 +278,13 @@ gfc_post_options (const char **pfilename)
+   if (flag_protect_parens == -1)
+     flag_protect_parens = !optimize_fast;
+ 
++  /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary.  */
++  if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
++    {
++      gfc_handle_fpe_option ("all", false);
++      gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
++    }
++
+   /* -Ofast sets implies -fstack-arrays unless an explicit size is set for
+      stack arrays.  */
+   if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
+diff --git a/gcc/opts-common.c b/gcc/opts-common.c
+index de9510abd64..bf82b05c8a2 100644
+--- a/gcc/opts-common.c
++++ b/gcc/opts-common.c
+@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "diagnostic.h"
+ #include "spellcheck.h"
+ 
+-static void prune_options (struct cl_decoded_option **, unsigned int *);
++static void prune_options (struct cl_decoded_option **, unsigned int *,
++			   unsigned int);
+ 
+ /* An option that is undocumented, that takes a joined argument, and
+    that doesn't fit any of the classes of uses (language/common,
+@@ -988,7 +989,7 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
+ 
+   *decoded_options = opt_array;
+   *decoded_options_count = num_decoded_options;
+-  prune_options (decoded_options, decoded_options_count);
++  prune_options (decoded_options, decoded_options_count, lang_mask);
+ }
+ 
+ /* Return true if NEXT_OPT_IDX cancels OPT_IDX.  Return false if the
+@@ -1009,11 +1010,109 @@ cancel_option (int opt_idx, int next_opt_idx, int orig_next_opt_idx)
+   return false;
+ }
+ 
++/* Check whether opt_idx exists in decoded_options array bewteen index
++   start and end.  If found, return its index in decoded_options,
++   else return end.  */
++static unsigned int
++find_opt_idx (const struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int start, unsigned int end, unsigned int opt_idx)
++{
++  gcc_assert (end <= decoded_options_count);
++  gcc_assert (opt_idx < cl_options_count);
++  unsigned int k;
++  for (k = start; k < end; k++)
++    {
++      if (decoded_options[k].opt_index == opt_idx)
++	{
++	  return k;
++	}
++    }
++  return k;
++}
++
++/* remove the opt_index element from decoded_options array.  */
++static unsigned int
++remove_option (struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int opt_index)
++{
++  gcc_assert (opt_index < decoded_options_count);
++  unsigned int i;
++  for (i = opt_index; i < decoded_options_count - 1; i++)
++    {
++      decoded_options[i] = decoded_options[i + 1];
++    }
++  return decoded_options_count - 1;
++}
++
++/* Handle the priority between fp-model, Ofast, and
++   ffast-math.  */
++static unsigned int
++handle_fp_model_driver (struct cl_decoded_option *decoded_options,
++			unsigned int decoded_options_count,
++			unsigned int fp_model_index,
++			unsigned int lang_mask)
++{
++  struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index];
++  enum fp_model model = (enum fp_model) fp_model_opt.value;
++  if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
++    {
++      /* If found Ofast, override Ofast with O3.  */
++      unsigned int Ofast_index;
++      Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
++				  0, decoded_options_count, OPT_Ofast);
++      while (Ofast_index != decoded_options_count)
++	{
++	  const char *tmp_argv = "-O3";
++	  decode_cmdline_option (&tmp_argv, lang_mask,
++				 &decoded_options[Ofast_index]);
++	  warning (0, "%<-Ofast%> is degraded to %<-O3%> due to %qs",
++		   fp_model_opt.orig_option_with_args_text);
++	  Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
++				      0, decoded_options_count, OPT_Ofast);
++	}
++      /* If found ffast-math before fp-model=precise/strict
++	 it, cancel it.  */
++      unsigned int ffast_math_index;
++      ffast_math_index
++	= find_opt_idx (decoded_options, decoded_options_count, 0,
++			fp_model_index, OPT_ffast_math);
++      if (ffast_math_index != fp_model_index)
++	{
++	  decoded_options_count
++	    = remove_option (decoded_options, decoded_options_count,
++			     ffast_math_index);
++	  warning (0, "%<-ffast-math%> before %qs is canceled",
++		   fp_model_opt.orig_option_with_args_text);
++	}
++    }
++  if (model == FP_MODEL_FAST)
++    {
++      /* If found -fno-fast-math after fp-model=fast, cancel this one.  */
++      unsigned int fno_fast_math_index;
++      fno_fast_math_index
++	= find_opt_idx (decoded_options, decoded_options_count, fp_model_index,
++			decoded_options_count, OPT_ffast_math);
++      if (fno_fast_math_index != decoded_options_count
++	  && decoded_options[fno_fast_math_index].value == 0)
++	{
++	  decoded_options_count
++	    = remove_option (decoded_options, decoded_options_count,
++			     fp_model_index);
++	  warning (0,
++		   "%<-fp-model=fast%> before %<-fno-fast-math%> is canceled");
++	}
++    }
++  return decoded_options_count;
++}
++
+ /* Filter out options canceled by the ones after them.  */
+ 
+ static void
+ prune_options (struct cl_decoded_option **decoded_options,
+-	       unsigned int *decoded_options_count)
++	       unsigned int *decoded_options_count,
++	       unsigned int lang_mask)
+ {
+   unsigned int old_decoded_options_count = *decoded_options_count;
+   struct cl_decoded_option *old_decoded_options = *decoded_options;
+@@ -1024,7 +1123,12 @@ prune_options (struct cl_decoded_option **decoded_options,
+   const struct cl_option *option;
+   unsigned int fdiagnostics_color_idx = 0;
+ 
++  if (!diagnostic_ready_p ())
++    diagnostic_initialize (global_dc, 0);
++
+   /* Remove arguments which are negated by others after them.  */
++
++  unsigned int fp_model_index = old_decoded_options_count;
+   new_decoded_options_count = 0;
+   for (i = 0; i < old_decoded_options_count; i++)
+     {
+@@ -1048,6 +1152,34 @@ prune_options (struct cl_decoded_option **decoded_options,
+ 	  fdiagnostics_color_idx = i;
+ 	  continue;
+ 
++	case OPT_fp_model_:
++	  /* Only the last fp-model option will take effect.  */
++	  unsigned int next_fp_model_idx;
++	  next_fp_model_idx = find_opt_idx (old_decoded_options,
++					    old_decoded_options_count,
++					    i + 1,
++					    old_decoded_options_count,
++					    OPT_fp_model_);
++	  if (next_fp_model_idx != old_decoded_options_count)
++	    {
++	      /* Found more than one fp-model, cancel this one.  */
++	      if (old_decoded_options[i].value
++		  != old_decoded_options[next_fp_model_idx].value)
++		{
++		  warning (0, "%qs is overrided by %qs",
++			   old_decoded_options[i].
++			   orig_option_with_args_text,
++			   old_decoded_options[next_fp_model_idx].
++			   orig_option_with_args_text);
++		}
++	      break;
++	    }
++	  else
++	    {
++	      /* Found the last fp-model option.  */
++	      fp_model_index = new_decoded_options_count;
++	    }
++	  /* FALLTHRU.  */
+ 	default:
+ 	  gcc_assert (opt_idx < cl_options_count);
+ 	  option = &cl_options[opt_idx];
+@@ -1087,6 +1219,14 @@ keep:
+ 	  break;
+ 	}
+     }
++  if (fp_model_index < new_decoded_options_count)
++    {
++      new_decoded_options_count
++	= handle_fp_model_driver (new_decoded_options,
++				  new_decoded_options_count,
++				  fp_model_index,
++				  lang_mask);
++    }
+ 
+   if (fdiagnostics_color_idx >= 1)
+     {
+diff --git a/gcc/opts.c b/gcc/opts.c
+index e31aa560564..6924a973a5b 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -195,6 +195,7 @@ static void set_debug_level (enum debug_info_type type, int extended,
+ 			     struct gcc_options *opts_set,
+ 			     location_t loc);
+ static void set_fast_math_flags (struct gcc_options *opts, int set);
++static void set_fp_model_flags (struct gcc_options *opts, int set);
+ static void decode_d_option (const char *arg, struct gcc_options *opts,
+ 			     location_t loc, diagnostic_context *dc);
+ static void set_unsafe_math_optimizations_flags (struct gcc_options *opts,
+@@ -2482,6 +2483,10 @@ common_handle_option (struct gcc_options *opts,
+       set_fast_math_flags (opts, value);
+       break;
+ 
++    case OPT_fp_model_:
++      set_fp_model_flags (opts, value);
++      break;
++
+     case OPT_funsafe_math_optimizations:
+       set_unsafe_math_optimizations_flags (opts, value);
+       break;
+@@ -2908,6 +2913,69 @@ set_fast_math_flags (struct gcc_options *opts, int set)
+     }
+ }
+ 
++/* Handle fp-model options.  */
++static void
++set_fp_model_flags (struct gcc_options *opts, int set)
++{
++  enum fp_model model = (enum fp_model) set;
++  switch (model)
++    {
++      case FP_MODEL_FAST:
++	/* Equivalent to open ffast-math.  */
++	set_fast_math_flags (opts, 1);
++	break;
++
++      case FP_MODEL_PRECISE:
++	/* Equivalent to close ffast-math.  */
++	set_fast_math_flags (opts, 0);
++	/* Turn on -frounding-math -fsignaling-nans.  */
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_rounding_math)
++	  opts->x_flag_rounding_math = 1;
++	opts->x_flag_expensive_optimizations = 0;
++	opts->x_flag_code_hoisting = 0;
++	opts->x_flag_predictive_commoning = 0;
++	opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
++	break;
++
++      case FP_MODEL_EXCEPT:
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_errno_math)
++	  opts->x_flag_errno_math = 1;
++	if (!opts->frontend_set_flag_trapping_math)
++	  opts->x_flag_trapping_math = 1;
++	opts->x_flag_fp_int_builtin_inexact = 1;
++	/* Also turn on ffpe-trap in fortran.  */
++	break;
++
++      case FP_MODEL_STRICT:
++	/* Turn on both precise and except.  */
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_rounding_math)
++	  opts->x_flag_rounding_math = 1;
++	opts->x_flag_expensive_optimizations = 0;
++	opts->x_flag_code_hoisting = 0;
++	opts->x_flag_predictive_commoning = 0;
++	if (!opts->frontend_set_flag_errno_math)
++	  opts->x_flag_errno_math = 1;
++	if (!opts->frontend_set_flag_trapping_math)
++	  opts->x_flag_trapping_math = 1;
++	opts->x_flag_fp_int_builtin_inexact = 1;
++	opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
++	break;
++
++      case FP_MODEL_NORMAL:
++	/* Do nothing.  */
++	break;
++
++      default:
++	gcc_unreachable ();
++    }
++}
++
+ /* When -funsafe-math-optimizations is set the following
+    flags are set as well.  */
+ static void
+-- 
+2.21.0.windows.1
+
diff --git a/0013-LoopElim-Redundant-loop-elimination-optimization.patch b/0013-LoopElim-Redundant-loop-elimination-optimization.patch
new file mode 100644
index 0000000..d50107e
--- /dev/null
+++ b/0013-LoopElim-Redundant-loop-elimination-optimization.patch
@@ -0,0 +1,499 @@
+From 0d14a2b7a3defc82ed16c99a18c2bc2e6be9f5b1 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Fri, 16 Jul 2021 23:21:38 -0400
+Subject: [PATCH 13/13] [LoopElim] Redundant loop elimination optimization
+
+Introduce redundant loop elimination optimization controlled
+by -floop-elim. And it's often used with -ffinite-loops.
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 79c9ef6615b..b2b0aac7fdf 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1169,6 +1169,10 @@ fcompare-elim
+ Common Report Var(flag_compare_elim_after_reload) Optimization
+ Perform comparison elimination after register allocation has finished.
+ 
++floop-elim
++Common Report Var(flag_loop_elim) Init(0) Optimization
++Perform redundant loop elimination.
++
+ fconserve-stack
+ Common Var(flag_conserve_stack) Optimization
+ Do not perform optimizations increasing noticeably stack usage.
+diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
+index 3b5b6907679..591b6435f78 100644
+--- a/gcc/tree-ssa-phiopt.c
++++ b/gcc/tree-ssa-phiopt.c
+@@ -69,6 +69,7 @@ static hash_set<tree> * get_non_trapping ();
+ static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
+ static void hoist_adjacent_loads (basic_block, basic_block,
+ 				  basic_block, basic_block);
++static bool do_phiopt_pattern (basic_block, basic_block, basic_block);
+ static bool gate_hoist_loads (void);
+ 
+ /* This pass tries to transform conditional stores into unconditional
+@@ -257,6 +258,10 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool do_hoist_loads, bool early_p)
+ 	    hoist_adjacent_loads (bb, bb1, bb2, bb3);
+ 	  continue;
+ 	}
++      else if (flag_loop_elim && do_phiopt_pattern (bb, bb1, bb2))
++	{
++	  continue;
++	}
+       else
+ 	continue;
+ 
+@@ -2819,6 +2824,449 @@ hoist_adjacent_loads (basic_block bb0, basic_block bb1,
+     }
+ }
+ 
++static bool check_uses (tree, hash_set<tree> *);
++
++/* Check SSA_NAME is used in
++     if (SSA_NAME == 0)
++     ...
++   or
++     if (SSA_NAME != 0)
++     ...
++*/
++static bool
++check_uses_cond (const_tree ssa_name, gimple *stmt,
++		 hash_set<tree> *hset ATTRIBUTE_UNUSED)
++{
++  tree_code code = gimple_cond_code (stmt);
++  if (code != EQ_EXPR && code != NE_EXPR)
++    {
++      return false;
++    }
++
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
++  if ((lhs == ssa_name && integer_zerop (rhs))
++      || (rhs == ssa_name && integer_zerop (lhs)))
++    {
++      return true;
++    }
++
++  return false;
++}
++
++/* Check SSA_NAME is used in
++     _tmp = SSA_NAME == 0;
++   or
++     _tmp = SSA_NAME != 0;
++   or
++     _tmp = SSA_NAME | _tmp2;
++*/
++static bool
++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
++{
++  tree_code code = gimple_assign_rhs_code (stmt);
++  tree lhs, rhs1, rhs2;
++
++  switch (code)
++    {
++    case EQ_EXPR:
++    case NE_EXPR:
++      rhs1 = gimple_assign_rhs1 (stmt);
++      rhs2 = gimple_assign_rhs2 (stmt);
++      if ((rhs1 == ssa_name && integer_zerop (rhs2))
++	  || (rhs2 == ssa_name && integer_zerop (rhs1)))
++	{
++	  return true;
++	}
++      break;
++
++    case BIT_IOR_EXPR:
++      lhs = gimple_assign_lhs (stmt);
++      if (hset->contains (lhs))
++	{
++	  return false;
++	}
++      /* We should check the use of _tmp further.  */
++      return check_uses (lhs, hset);
++
++    default:
++      break;
++    }
++  return false;
++}
++
++/* Check SSA_NAME is used in
++     # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
++*/
++static bool
++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
++{
++  for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
++    {
++      tree arg = gimple_phi_arg_def (stmt, i);
++      if (!integer_zerop (arg) && arg != ssa_name)
++	{
++	  return false;
++	}
++    }
++
++  tree result = gimple_phi_result (stmt);
++
++  /* It is used to avoid infinite recursion,
++     <bb 1>
++     if (cond)
++       goto <bb 2>
++     else
++       goto <bb 3>
++
++     <bb 2>
++     # _tmp2 = PHI <0 (bb 1), _tmp3 (bb 3)>
++     {BODY}
++     if (cond)
++       goto <bb 3>
++     else
++       goto <bb 4>
++
++     <bb 3>
++     # _tmp3 = PHI <0 (bb 1), _tmp2 (bb 2)>
++     {BODY}
++     if (cond)
++       goto <bb 2>
++     else
++       goto <bb 4>
++
++     <bb 4>
++     ...
++  */
++  if (hset->contains (result))
++    {
++      return false;
++    }
++
++  return check_uses (result, hset);
++}
++
++/* Check the use of SSA_NAME, it should only be used in comparison
++   operation and PHI node.  HSET is used to record the ssa_names
++   that have been already checked.  */
++static bool
++check_uses (tree ssa_name, hash_set<tree> *hset)
++{
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++
++  if (TREE_CODE (ssa_name) != SSA_NAME)
++    {
++      return false;
++    }
++
++  if (SSA_NAME_VAR (ssa_name)
++      && is_global_var (SSA_NAME_VAR (ssa_name)))
++    {
++      return false;
++    }
++
++  hset->add (ssa_name);
++
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
++    {
++      gimple *stmt = USE_STMT (use_p);
++
++      /* Ignore debug gimple statements.  */
++      if (is_gimple_debug (stmt))
++	{
++	  continue;
++	}
++
++      switch (gimple_code (stmt))
++	{
++	case GIMPLE_COND:
++	  if (!check_uses_cond (ssa_name, stmt, hset))
++	    {
++	      return false;
++	    }
++	  break;
++
++	case GIMPLE_ASSIGN:
++	  if (!check_uses_assign (ssa_name, stmt, hset))
++	    {
++	      return false;
++	    }
++	  break;
++
++	case GIMPLE_PHI:
++	  if (!check_uses_phi (ssa_name, stmt, hset))
++	    {
++	      return false;
++	    }
++	  break;
++
++	default:
++	  return false;
++	}
++    }
++  return true;
++}
++
++static bool
++check_def_gimple (gimple *def1, gimple *def2, const_tree result)
++{
++  /* def1 and def2 should be POINTER_PLUS_EXPR.  */
++  if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
++      || gimple_assign_rhs_code (def1) != POINTER_PLUS_EXPR
++      || gimple_assign_rhs_code (def2) != POINTER_PLUS_EXPR)
++    {
++      return false;
++    }
++
++  tree rhs12 = gimple_assign_rhs2 (def1);
++
++  tree rhs21 = gimple_assign_rhs1 (def2);
++  tree rhs22 = gimple_assign_rhs2 (def2);
++
++  if (rhs21 != result)
++    {
++      return false;
++    }
++
++  /* We should have a positive pointer-plus constant to ensure
++     that the pointer value is continuously increasing.  */
++  if (TREE_CODE (rhs12) != INTEGER_CST || TREE_CODE (rhs22) != INTEGER_CST
++      || compare_tree_int (rhs12, 0) <= 0 || compare_tree_int (rhs22, 0) <= 0)
++    {
++      return false;
++    }
++
++  return true;
++}
++
++static bool
++check_loop_body (basic_block bb0, basic_block bb2, const_tree result)
++{
++  gimple *g01 = first_stmt (bb0);
++  if (!g01 || !is_gimple_assign (g01)
++      || gimple_assign_rhs_code (g01) != MEM_REF
++      || TREE_OPERAND (gimple_assign_rhs1 (g01), 0) != result)
++    {
++      return false;
++    }
++
++  gimple *g02 = g01->next;
++  /* GIMPLE_COND would be the last gimple in a basic block,
++     and have no other side effects on RESULT.  */
++  if (!g02 || gimple_code (g02) != GIMPLE_COND)
++    {
++      return false;
++    }
++
++  if (first_stmt (bb2) != last_stmt (bb2))
++    {
++      return false;
++    }
++
++  return true;
++}
++
++/* Pattern is like
++   <pre bb>
++   arg1 = base (rhs11) + cst (rhs12); [def1]
++   goto <bb 0>
++
++   <bb 2>
++   arg2 = result (rhs21) + cst (rhs22); [def2]
++
++   <bb 0>
++   # result = PHI <arg1 (pre bb), arg2 (bb 2)>
++   _v = *result;  [g01]
++   if (_v == 0)   [g02]
++     goto <bb 1>
++   else
++     goto <bb 2>
++
++   <bb 1>
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_bb_order (basic_block bb0, basic_block &bb1, basic_block &bb2,
++		gphi *phi_stmt, gimple *&output)
++{
++  /* Start check from PHI node in BB0.  */
++  if (gimple_phi_num_args (phi_stmt) != 2
++      || virtual_operand_p (gimple_phi_result (phi_stmt)))
++    {
++      return false;
++    }
++
++  tree result = gimple_phi_result (phi_stmt);
++  tree arg1 = gimple_phi_arg_def (phi_stmt, 0);
++  tree arg2 = gimple_phi_arg_def (phi_stmt, 1);
++
++  if (TREE_CODE (arg1) != SSA_NAME
++      || TREE_CODE (arg2) != SSA_NAME
++      || SSA_NAME_IS_DEFAULT_DEF (arg1)
++      || SSA_NAME_IS_DEFAULT_DEF (arg2))
++    {
++      return false;
++    }
++
++  gimple *def1 = SSA_NAME_DEF_STMT (arg1);
++  gimple *def2 = SSA_NAME_DEF_STMT (arg2);
++
++  /* Swap bb1 and bb2 if pattern is like
++     if (_v != 0)
++       goto <bb 2>
++     else
++       goto <bb 1>
++  */
++  if (gimple_bb (def2) == bb1 && EDGE_SUCC (bb1, 0)->dest == bb0)
++    {
++      std::swap (bb1, bb2);
++    }
++
++  /* prebb[def1] --> bb0 <-- bb2[def2] */
++  if (!gimple_bb (def1)
++      || EDGE_SUCC (gimple_bb (def1), 0)->dest != bb0
++      || gimple_bb (def2) != bb2 || EDGE_SUCC (bb2, 0)->dest != bb0)
++    {
++      return false;
++    }
++
++  /* Check whether define gimple meets the pattern requirements.  */
++  if (!check_def_gimple (def1, def2, result))
++    {
++      return false;
++    }
++
++  if (!check_loop_body (bb0, bb2, result))
++    {
++      return false;
++    }
++
++  output = def1;
++  return true;
++}
++
++/* Check pattern
++   <bb 1>
++   _1 = result - base;     [g1]
++   _2 = _1 /[ex] cst;      [g2]
++   _3 = (unsigned int) _2; [g3]
++   if (_3 == 0)
++   ...
++*/
++static bool
++check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
++		    const_tree result, gimple *&output)
++{
++  gimple *g1 = first_stmt (bb1);
++  if (!g1 || !is_gimple_assign (g1)
++      || gimple_assign_rhs_code (g1) != POINTER_DIFF_EXPR
++      || gimple_assign_rhs1 (g1) != result
++      || gimple_assign_rhs2 (g1) != base)
++    {
++      return false;
++    }
++
++  gimple *g2 = g1->next;
++  if (!g2 || !is_gimple_assign (g2)
++      || gimple_assign_rhs_code (g2) != EXACT_DIV_EXPR
++      || gimple_assign_lhs (g1) != gimple_assign_rhs1 (g2)
++      || TREE_CODE (gimple_assign_rhs2 (g2)) != INTEGER_CST)
++    {
++      return false;
++    }
++
++  /* INTEGER_CST cst in gimple def1.  */
++  HOST_WIDE_INT num1 = TREE_INT_CST_LOW (cst);
++  /* INTEGER_CST cst in gimple g2.  */
++  HOST_WIDE_INT num2 = TREE_INT_CST_LOW (gimple_assign_rhs2 (g2));
++  /* _2 must be at least a positive number.  */
++  if (num2 == 0 || num1 / num2 <= 0)
++    {
++      return false;
++    }
++
++  gimple *g3 = g2->next;
++  if (!g3 || !is_gimple_assign (g3)
++      || gimple_assign_rhs_code (g3) != NOP_EXPR
++      || gimple_assign_lhs (g2) != gimple_assign_rhs1 (g3)
++      || TREE_CODE (gimple_assign_lhs (g3)) != SSA_NAME)
++    {
++      return false;
++    }
++
++  /* _3 should only be used in comparison operation or PHI node.  */
++  hash_set<tree> *hset = new hash_set<tree>;
++  if (!check_uses (gimple_assign_lhs (g3), hset))
++    {
++      delete hset;
++      return false;
++    }
++  delete hset;
++
++  output = g3;
++  return true;
++}
++
++static bool
++do_phiopt_pattern (basic_block bb0, basic_block bb1, basic_block bb2)
++{
++  gphi_iterator gsi;
++
++  for (gsi = gsi_start_phis (bb0); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi_stmt = gsi.phi ();
++      gimple *def1 = NULL;
++      tree base, cst, result;
++
++      if (!check_bb_order (bb0, bb1, bb2, phi_stmt, def1))
++	{
++	  continue;
++	}
++
++      base = gimple_assign_rhs1 (def1);
++      cst = gimple_assign_rhs2 (def1);
++      result = gimple_phi_result (phi_stmt);
++
++      gimple *stmt = NULL;
++      if (!check_gimple_order (bb1, base, cst, result, stmt))
++	{
++	  continue;
++	}
++
++      gcc_assert (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "PHIOPT pattern optimization (1) - Rewrite:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Rewrite statement
++	   _3 = (unsigned int) _2;
++	 to
++	   _3 = (unsigned int) 1;
++      */
++      tree type = TREE_TYPE (gimple_assign_rhs1 (stmt));
++      gimple_assign_set_rhs1 (stmt, build_int_cst (type, 1));
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      return true;
++    }
++  return false;
++}
++
+ /* Determine whether we should attempt to hoist adjacent loads out of
+    diamond patterns in pass_phiopt.  Always hoist loads if
+    -fhoist-adjacent-loads is specified and the target machine has
+-- 
+2.21.0.windows.1
+
diff --git a/gcc.spec b/gcc.spec
index 5818b77..e66145e 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,7 +1,7 @@
-%global DATE 20210727
+%global DATE 20210728
 
-%global gcc_version 10.3.0
-%global gcc_major 10.3.0
+%global gcc_version 10.3.1
+%global gcc_major 10.3.1
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
 
@@ -13,7 +13,7 @@
 %global build_go 0
 %global build_d 0
 %global build_check 0
-%ifarch %{ix86} x86_64 ia64 ppc64le
+%ifarch %{ix86} x86_64 ia64 ppc64le aarch64
 %global build_libquadmath 1
 %else
 %global build_libquadmath 0
@@ -59,7 +59,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{DATE}.1
+Release: %{DATE}.2
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
@@ -114,6 +114,19 @@ Provides: bundled(libiberty)
 Provides: gcc(major) = %{gcc_major}
 
 
+Patch1: 0001-libquadmath-Enable-libquadmath-on-kunpeng.patch
+Patch2: 0002-Backport-cselim-Extend-to-check-non-trapping-for-mor.patch
+Patch3: 0003-version-Set-version-to-10.3.1.patch
+Patch4: 0004-Backport-tree-optimization-Avoid-issueing-loads-in-S.patch
+Patch5: 0005-Backport-tree-optimization-Fix-load-eliding-in-SM.patch
+Patch6: 0006-simdmath-Enable-simdmath-on-kunpeng.patch
+Patch7: 0007-Vect-Enable-skipping-vectorization-on-reduction-chai.patch
+Patch8: 0008-Backport-tree-optimization-Add-checks-to-avoid-spoil.patch
+Patch9: 0009-Backport-expand-Simplify-removing-subregs-when-expan.patch
+Patch10: 0010-Backport-tree-optimization-94963-avoid-bogus-uninit-.patch
+Patch11: 0011-simdmath-Enable-64-bits-simd-when-test-simd_pcs_attr.patch
+Patch12: 0012-fp-model-Enable-fp-model-on-kunpeng.patch
+Patch13: 0013-LoopElim-Redundant-loop-elimination-optimization.patch
 
 %global gcc_target_platform %{_arch}-linux-gnu
 
@@ -555,6 +568,19 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %setup -q -n gcc-10.3.0
 /bin/pwd
 
+%patch1 -p1
+%patch2 -p1
+%patch3 -p1
+%patch4 -p1
+%patch5 -p1
+%patch6 -p1
+%patch7 -p1
+%patch8 -p1
+%patch9 -p1
+%patch10 -p1
+%patch11 -p1
+%patch12 -p1
+%patch13 -p1
 
 
 %build
@@ -1602,6 +1628,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdnoreturn.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdatomic.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/gcov.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/simdmath.h
 %ifarch %{ix86} x86_64
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h
@@ -2038,6 +2065,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_arithmetic.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_exceptions.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_features.mod
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/simdmath_f.h
 %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/f951
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libgfortran.spec
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libcaf_single.a
@@ -2518,6 +2546,12 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Wed Jul 28 2021 eastb233 <xiezhiheng@huawei.com> - 10.3.0-20210728.2
+- Type:Sync
+- ID:NA
+- SUG:NA
+- DESC:Sync patch from openeuler/gcc
+
 * Tue Jul 27 2021 eastb233 <xiezhiheng@huawei.com> - 10.3.0-20210727.1
 - Type:Init
 - ID:NA
-- 
Gitee