Ai
23 Star 29 Fork 166

src-openEuler/gcc
关闭

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0341-Align-ix86_-move_max-store_max-with-vectorizer.patch 9.04 KB
一键复制 编辑 原始数据 按行查看 历史
Lin 提交于 2025-02-17 14:46 +08:00 . [Sync] Sync patches from openeuler/gcc
From 002e45c7f46a0f8dd2b5381cd1ee1341f8987fca Mon Sep 17 00:00:00 2001
From: liuhongt <hongtao.liu@intel.com>
Date: Thu, 15 Aug 2024 12:54:07 +0800
Subject: [PATCH 04/14] Align ix86_{move_max,store_max} with vectorizer.
When none of mprefer-vector-width, avx256_optimal/avx128_optimal,
avx256_store_by_pieces/avx512_store_by_pieces is specified, GCC will
set ix86_{move_max,store_max} as max available vector length except
for AVX part.
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
&& TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
opts->x_ix86_move_max = PVW_AVX512;
else
opts->x_ix86_move_max = PVW_AVX128;
So for -mavx2, vectorizer will choose 256-bit for vectorization, but
128-bit is used for struct copy, there could be a potential STLF issue
due to this "misalign".
The patch fixes that.
gcc/ChangeLog:
* config/i386/i386-options.cc (ix86_option_override_internal):
set ix86_{move_max,store_max} to PVW_AVX256 when TARGET_AVX
instead of PVW_AVX128.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pieces-memcpy-10.c: Add -mprefer-vector-width=128.
* gcc.target/i386/pieces-memcpy-6.c: Ditto.
* gcc.target/i386/pieces-memset-38.c: Ditto.
* gcc.target/i386/pieces-memset-40.c: Ditto.
* gcc.target/i386/pieces-memset-41.c: Ditto.
* gcc.target/i386/pieces-memset-42.c: Ditto.
* gcc.target/i386/pieces-memset-43.c: Ditto.
* gcc.target/i386/pieces-strcpy-2.c: Ditto.
* gcc.target/i386/pieces-memcpy-22.c: New test.
* gcc.target/i386/pieces-memset-51.c: New test.
* gcc.target/i386/pieces-strcpy-3.c: New test.
(cherry picked from commit aea374238cec1a1e53fb79575d2f998e16926999)
---
gcc/config/i386/i386-options.cc | 6 ++++++
gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c | 12 ++++++++++++
gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-38.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-40.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-41.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-42.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-43.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-memset-51.c | 12 ++++++++++++
gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c | 2 +-
gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c | 15 +++++++++++++++
12 files changed, 53 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-memset-51.c
create mode 100644 gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index 991661fe4a2..061a1584318 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -2802,6 +2802,9 @@ ix86_option_override_internal (bool main_args_p,
{
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_move_max = PVW_AVX512;
+ /* Align with vectorizer to avoid potential STLF issue. */
+ else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+ opts->x_ix86_move_max = PVW_AVX256;
else
opts->x_ix86_move_max = PVW_AVX128;
}
@@ -2823,6 +2826,9 @@ ix86_option_override_internal (bool main_args_p,
{
if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
opts->x_ix86_store_max = PVW_AVX512;
+ /* Align with vectorizer to avoid potential STLF issue. */
+ else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
+ opts->x_ix86_store_max = PVW_AVX256;
else
opts->x_ix86_store_max = PVW_AVX128;
}
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
index 5faee21f9b9..53ad0b3be44 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-10.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst, *src;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
new file mode 100644
index 00000000000..605b3623ffc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-22.c
@@ -0,0 +1,12 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+ __builtin_memcpy (dst, src, 33);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
index 5f99cc98c47..cfd2a86cf33 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memcpy-6.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst, *src;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
index ed4a24a54fd..ddd194debd5 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-38.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
index 4eda73ead59..9c206465d46 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-40.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx512f -mavx2 -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx512f -mavx2 -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
index 93df8101e4d..b0756182e35 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-41.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge -mno-stackrealign" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge -mno-stackrealign" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
index df0c122aae7..103da699ae5 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-42.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
index 2f2179c2df9..f1494e17610 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-43.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *dst;
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-51.c b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
new file mode 100644
index 00000000000..192ec0d1647
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-memset-51.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *dst;
+
+void
+foo (int x)
+{
+ __builtin_memset (dst, x, 64);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
index 90446edb4f3..9bb94b7419b 100644
--- a/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-2.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target { ! ia32 } } } */
-/* { dg-options "-O2 -mno-avx2 -mavx -mtune=sandybridge" } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mprefer-vector-width=128 -mtune=sandybridge" } */
extern char *strcpy (char *, const char *);
diff --git a/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
new file mode 100644
index 00000000000..df7571b547f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pieces-strcpy-3.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mno-avx2 -mavx -mtune=generic" } */
+
+extern char *strcpy (char *, const char *);
+
+void
+foo (char *s)
+{
+ strcpy (s,
+ "1234567890abcdef123456abcdef5678123456abcdef567abcdef678"
+ "1234567");
+}
+
+/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%ymm" 2 } } */
--
2.31.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/gcc.git
git@gitee.com:src-openeuler/gcc.git
src-openeuler
gcc
gcc
master

搜索帮助