21 Star 27 Fork 151

src-openEuler/gcc

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0344-i386-Fix-vfpclassph-non-optimizied-intrin.patch 3.93 KB
一键复制 编辑 原始数据 按行查看 历史
Lin 提交于 2025-02-17 14:46 +08:00 . [Sync] Sync patches from openeuler/gcc
From 9cb8d824a580c1ea79718300deed14b8ec5cc1e2 Mon Sep 17 00:00:00 2001
From: Haochen Jiang <haochen.jiang@intel.com>
Date: Mon, 2 Sep 2024 15:00:22 +0800
Subject: [PATCH 07/14] i386: Fix vfpclassph non-optimizied intrin
The intrin for non-optimized got a typo in mask type, which will cause
the high bits of __mmask32 being unexpectedly zeroed.
The test does not fail under O0 with current 1b since the testcase is
wrong. We need to include avx512-mask-type.h after SIZE is defined, or
it will always be __mmask8. That problem also happened in AVX10.2 testcases.
I will write a seperate patch to fix that.
gcc/ChangeLog:
* config/i386/avx512fp16intrin.h
(_mm512_mask_fpclass_ph_mask): Correct mask type to __mmask32.
(_mm512_fpclass_ph_mask): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fp16-vfpclassph-1c.c: New test.
(cherry picked from commit 6e59b188c4a051d4f2de5220d30681e6963d96c0) (gcc-12)
---
gcc/config/i386/avx512fp16intrin.h | 4 +-
.../i386/avx512fp16-vfpclassph-1c.c | 77 +++++++++++++++++++
2 files changed, 79 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index b16ccfcb7f1..6330e57ebb8 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -2321,11 +2321,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
#else
#define _mm512_mask_fpclass_ph_mask(u, x, c) \
((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)(u)))
+ (int) (c),(__mmask32)(u)))
#define _mm512_fpclass_ph_mask(x, c) \
((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
- (int) (c),(__mmask8)-1))
+ (int) (c),(__mmask32)-1))
#endif /* __OPIMTIZE__ */
/* Intrinsics vgetexpph, vgetexpsh. */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
new file mode 100644
index 00000000000..4739f1228e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512fp16-vfpclassph-1c.c
@@ -0,0 +1,77 @@
+/* { dg-do run } */
+/* { dg-options "-O0 -mavx512fp16" } */
+/* { dg-require-effective-target avx512fp16 } */
+
+#define AVX512FP16
+#include "avx512f-helper.h"
+
+#include <math.h>
+#include <limits.h>
+#include <float.h>
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+#ifndef __FPCLASSPH__
+#define __FPCLASSPH__
+int check_fp_class_hp (_Float16 src, int imm)
+{
+ int qNaN_res = isnan (src);
+ int sNaN_res = isnan (src);
+ int Pzero_res = (src == 0.0);
+ int Nzero_res = (src == -0.0);
+ int PInf_res = (isinf (src) == 1);
+ int NInf_res = (isinf (src) == -1);
+ int Denorm_res = (fpclassify (src) == FP_SUBNORMAL);
+ int FinNeg_res = __builtin_finite (src) && (src < 0);
+
+ int result = (((imm & 1) && qNaN_res)
+ || (((imm >> 1) & 1) && Pzero_res)
+ || (((imm >> 2) & 1) && Nzero_res)
+ || (((imm >> 3) & 1) && PInf_res)
+ || (((imm >> 4) & 1) && NInf_res)
+ || (((imm >> 5) & 1) && Denorm_res)
+ || (((imm >> 6) & 1) && FinNeg_res)
+ || (((imm >> 7) & 1) && sNaN_res));
+ return result;
+}
+#endif
+
+MASK_TYPE
+CALC (_Float16 *s1, int imm)
+{
+ int i;
+ MASK_TYPE res = 0;
+
+ for (i = 0; i < SIZE; i++)
+ if (check_fp_class_hp(s1[i], imm))
+ res = res | (1 << i);
+
+ return res;
+}
+
+void
+TEST (void)
+{
+ int i;
+ UNION_TYPE (AVX512F_LEN, h) src;
+ MASK_TYPE res1, res2, res_ref = 0;
+ MASK_TYPE mask = MASK_VALUE;
+
+ src.a[SIZE - 1] = NAN;
+ src.a[SIZE - 2] = 1.0 / 0.0;
+ for (i = 0; i < SIZE - 2; i++)
+ {
+ src.a[i] = -24.43 + 0.6 * i;
+ }
+
+ res1 = INTRINSIC (_fpclass_ph_mask) (src.x, 0xFF);
+ res2 = INTRINSIC (_mask_fpclass_ph_mask) (mask, src.x, 0xFF);
+
+ res_ref = CALC (src.a, 0xFF);
+
+ if (res_ref != res1)
+ abort ();
+
+ if ((mask & res_ref) != res2)
+ abort ();
+}
--
2.31.1
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/gcc.git
git@gitee.com:src-openeuler/gcc.git
src-openeuler
gcc
gcc
master

搜索帮助