21 Star 27 Fork 151

src-openEuler/gcc

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch 59.84 KB
一键复制 编辑 原始数据 按行查看 历史
huangzifeng 提交于 2024-11-21 11:35 +08:00 . Sync patches from branch openEuler-24.09
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824
From 737d2a5f1c5e725b7e5a20075270016ebf56b44c Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 13 Sep 2022 09:28:49 +0100
Subject: [PATCH 058/157] [Backport][SME] aarch64: Vector move fixes for
+nosimd
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=721c0fb3aca31d3bf8ad6e929eab32e29a427e60
This patch fixes various issues around the handling of vectors
and (particularly) vector structures with +nosimd. Previously,
passing and returning structures would trigger an ICE, since:
* we didn't allow the structure modes to be stored in FPRs
* we didn't provide +nosimd move patterns
* splitting the moves into word-sized pieces (the default
strategy without move patterns) doesn't work because the
registers are doubleword sized.
The patch is a bit of a hodge-podge since a lot of the handling of
moves, register costs, and register legitimacy is so interconnected.
It didn't seem feasible to split things further.
Some notes:
* The patch recognises vector and tuple modes based on TARGET_FLOAT
rather than TARGET_SIMD, and instead adds TARGET_SIMD to places
that really do need the vector ISA. This is necessary for the
modes to be handled correctly in register arguments and returns.
* The 64-bit (DREG) STP peephole required TARGET_SIMD but the
LDP peephole didn't. I think the LDP one is right, since
DREG moves could involve GPRs as well as FPRs.
* The patch keeps the existing choices of instructions for
TARGET_SIMD, just in case they happen to be better than FMOV
on some uarches.
* Before the patch, +nosimd Q<->Q moves of 128-bit scalars went via
a GPR, thanks to a secondary reload pattern. This approach might
not be ideal, but there's no reason that 128-bit vectors should
behave differently from 128-bit scalars. The patch therefore
extends the current scalar approach to vectors.
* Multi-vector LD1 and ST1 require TARGET_SIMD, so the TARGET_FLOAT
structure moves need to use LDP/STP and LDR/STR combinations
instead. That's also what we do for big-endian even with
TARGET_SIMD, so most of the code was already there. The patterns
for structures of 64-bit vectors are identical, but the patterns
for structures of 128-bit vectors need to cope with the lack of
128-bit Q<->Q moves.
It isn't feasible to move multi-vector tuples via GPRs, so the
patch moves them via memory instead. This contaminates the port
with its first secondary memory reload.
gcc/
* config/aarch64/aarch64.cc (aarch64_classify_vector_mode): Use
TARGET_FLOAT instead of TARGET_SIMD.
(aarch64_vectorize_related_mode): Restrict ADVSIMD handling to
TARGET_SIMD.
(aarch64_hard_regno_mode_ok): Don't allow tuples of 2 64-bit vectors
in GPRs.
(aarch64_classify_address): Treat little-endian structure moves
like big-endian for TARGET_FLOAT && !TARGET_SIMD.
(aarch64_secondary_memory_needed): New function.
(aarch64_secondary_reload): Handle 128-bit Advanced SIMD vectors
in the same way as TF, TI and TD.
(aarch64_rtx_mult_cost): Restrict ADVSIMD handling to TARGET_SIMD.
(aarch64_rtx_costs): Likewise.
(aarch64_register_move_cost): Treat a pair of 64-bit vectors
separately from a single 128-bit vector. Handle the cost implied
by aarch64_secondary_memory_needed.
(aarch64_simd_valid_immediate): Restrict ADVSIMD handling to
TARGET_SIMD.
(aarch64_expand_vec_perm_const_1): Likewise.
(TARGET_SECONDARY_MEMORY_NEEDED): New macro.
* config/aarch64/iterators.md (VTX): New iterator.
* config/aarch64/aarch64.md (arches): Add fp_q as a synonym of simd.
(arch_enabled): Adjust accordingly.
(@aarch64_reload_mov<TX:mode>): Extend to...
(@aarch64_reload_mov<VTX:mode>): ...this.
* config/aarch64/aarch64-simd.md (mov<mode>): Require TARGET_FLOAT
rather than TARGET_SIMD.
(movmisalign<mode>): Likewise.
(load_pair<DREG:mode><DREG2:mode>): Likewise.
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
(load_pair<VQ:mode><VQ2:mode>): Likewise.
(vec_store_pair<VQ:mode><VQ2:mode>): Likewise.
(@aarch64_split_simd_mov<mode>): Likewise.
(aarch64_get_low<mode>): Likewise.
(aarch64_get_high<mode>): Likewise.
(aarch64_get_half<mode>): Likewise. Canonicalize to a move for
lowpart extracts.
(*aarch64_simd_mov<VDMOV:mode>): Require TARGET_FLOAT rather than
TARGET_SIMD. Use different w<-w and r<-w instructions for
!TARGET_SIMD. Disable immediate moves for !TARGET_SIMD but
add an alternative specifically for w<-Z.
(*aarch64_simd_mov<VQMOV:mode>): Require TARGET_FLOAT rather than
TARGET_SIMD. Likewise for the associated define_splits. Disable
FPR moves and immediate moves for !TARGET_SIMD but add an alternative
specifically for w<-Z.
(aarch64_simd_mov_from_<mode>high): Require TARGET_FLOAT rather than
TARGET_SIMD. Restrict the existing alternatives to TARGET_SIMD
but add a new r<-w one for !TARGET_SIMD.
(*aarch64_get_high<mode>): New pattern.
(load_pair_lanes<mode>): Require TARGET_FLOAT rather than TARGET_SIMD.
(store_pair_lanes<mode>): Likewise.
(*aarch64_combine_internal<mode>): Likewise. Restrict existing
w<-w, w<-r and w<-m alternatives to TARGET_SIMD but add a new w<-r
alternative for !TARGET_SIMD.
(*aarch64_combine_internal_be<mode>): Likewise.
(aarch64_combinez<mode>): Require TARGET_FLOAT rather than TARGET_SIMD.
Remove bogus arch attribute.
(*aarch64_combinez_be<mode>): Likewise.
(@aarch64_vec_concat<mode>): Require TARGET_FLOAT rather than
TARGET_SIMD.
(aarch64_combine<mode>): Likewise.
(aarch64_rev_reglist<mode>): Likewise.
(mov<mode>): Likewise.
(*aarch64_be_mov<VSTRUCT_2D:mode>): Extend to TARGET_FLOAT &&
!TARGET_SIMD, regardless of endianness. Extend associated
define_splits in the same way, both for this pattern and the
ones below.
(*aarch64_be_mov<VSTRUCT_2Qmode>): Likewise. Restrict w<-w
alternative to TARGET_SIMD.
(*aarch64_be_movoi): Likewise.
(*aarch64_be_movci): Likewise.
(*aarch64_be_movxi): Likewise.
(*aarch64_be_mov<VSTRUCT_4QD:mode>): Extend to TARGET_FLOAT
&& !TARGET_SIMD, regardless of endianness. Restrict w<-w alternative
to TARGET_SIMD for tuples of 128-bit vectors.
(*aarch64_be_mov<VSTRUCT_4QD:mode>): Likewise.
* config/aarch64/aarch64-ldpstp.md: Remove TARGET_SIMD condition
from DREG STP peephole. Change TARGET_SIMD to TARGET_FLOAT in
the VQ and VP_2E LDP and STP peepholes.
gcc/testsuite/
* gcc.target/aarch64/ldp_stp_20.c: New test.
* gcc.target/aarch64/ldp_stp_21.c: Likewise.
* gcc.target/aarch64/ldp_stp_22.c: Likewise.
* gcc.target/aarch64/ldp_stp_23.c: Likewise.
* gcc.target/aarch64/ldp_stp_24.c: Likewise.
* gcc.target/aarch64/movv16qi_1.c (gpr_to_gpr): New function.
* gcc.target/aarch64/movv8qi_1.c (gpr_to_gpr): Likewise.
* gcc.target/aarch64/movv16qi_2.c: New test.
* gcc.target/aarch64/movv16qi_3.c: Likewise.
* gcc.target/aarch64/movv2di_1.c: Likewise.
* gcc.target/aarch64/movv2x16qi_1.c: Likewise.
* gcc.target/aarch64/movv2x8qi_1.c: Likewise.
* gcc.target/aarch64/movv3x16qi_1.c: Likewise.
* gcc.target/aarch64/movv3x8qi_1.c: Likewise.
* gcc.target/aarch64/movv4x16qi_1.c: Likewise.
* gcc.target/aarch64/movv4x8qi_1.c: Likewise.
* gcc.target/aarch64/movv8qi_2.c: Likewise.
* gcc.target/aarch64/movv8qi_3.c: Likewise.
* gcc.target/aarch64/vect_unary_2.c: Likewise.
---
gcc/config/aarch64/aarch64-ldpstp.md | 11 +-
gcc/config/aarch64/aarch64-simd.md | 199 +++++++++++-------
gcc/config/aarch64/aarch64.cc | 94 ++++++---
gcc/config/aarch64/aarch64.md | 11 +-
gcc/config/aarch64/iterators.md | 2 +
gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c | 7 +
gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c | 7 +
gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c | 13 ++
gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c | 16 ++
gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c | 16 ++
gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 21 ++
gcc/testsuite/gcc.target/aarch64/movv16qi_2.c | 27 +++
gcc/testsuite/gcc.target/aarch64/movv16qi_3.c | 30 +++
gcc/testsuite/gcc.target/aarch64/movv2di_1.c | 103 +++++++++
.../gcc.target/aarch64/movv2x16qi_1.c | 40 ++++
.../gcc.target/aarch64/movv2x8qi_1.c | 38 ++++
.../gcc.target/aarch64/movv3x16qi_1.c | 44 ++++
.../gcc.target/aarch64/movv3x8qi_1.c | 41 ++++
.../gcc.target/aarch64/movv4x16qi_1.c | 44 ++++
.../gcc.target/aarch64/movv4x8qi_1.c | 42 ++++
gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 15 ++
gcc/testsuite/gcc.target/aarch64/movv8qi_2.c | 27 +++
gcc/testsuite/gcc.target/aarch64/movv8qi_3.c | 30 +++
.../gcc.target/aarch64/vect_unary_2.c | 5 +
24 files changed, 774 insertions(+), 109 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2di_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/vect_unary_2.c
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md
index ba76a1b78..f8446e212 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -83,8 +83,7 @@
(match_operand:DREG 1 "register_operand" ""))
(set (match_operand:DREG2 2 "memory_operand" "")
(match_operand:DREG2 3 "register_operand" ""))]
- "TARGET_SIMD
- && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
+ "aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
[(parallel [(set (match_dup 0) (match_dup 1))
(set (match_dup 2) (match_dup 3))])]
{
@@ -96,7 +95,7 @@
(match_operand:VQ 1 "memory_operand" ""))
(set (match_operand:VQ2 2 "register_operand" "")
(match_operand:VQ2 3 "memory_operand" ""))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_operands_ok_for_ldpstp (operands, true, <VQ:MODE>mode)
&& (aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
@@ -111,7 +110,7 @@
(match_operand:VQ 1 "register_operand" ""))
(set (match_operand:VQ2 2 "memory_operand" "")
(match_operand:VQ2 3 "register_operand" ""))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_operands_ok_for_ldpstp (operands, false, <VQ:MODE>mode)
&& (aarch64_tune_params.extra_tuning_flags
& AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
@@ -306,7 +305,7 @@
(set (match_operand:VP_2E 6 "memory_operand" "")
(match_operand:VP_2E 7 "aarch64_reg_or_zero" ""))
(match_dup 8)]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
[(const_int 0)]
{
@@ -327,7 +326,7 @@
(set (match_operand:VP_2E 6 "register_operand" "")
(match_operand:VP_2E 7 "memory_operand" ""))
(match_dup 8)]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
[(const_int 0)]
{
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index a47b39281..ef7fc4ecb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -21,7 +21,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"
/* Force the operand into a register if it is not an
immediate whose use can be replaced with xzr.
@@ -52,7 +52,7 @@
(define_expand "movmisalign<mode>"
[(set (match_operand:VALL_F16 0 "nonimmediate_operand")
(match_operand:VALL_F16 1 "general_operand"))]
- "TARGET_SIMD && !STRICT_ALIGNMENT"
+ "TARGET_FLOAT && !STRICT_ALIGNMENT"
{
/* This pattern is not permitted to fail during expansion: if both arguments
are non-registers (e.g. memory := constant, which can be created by the
@@ -116,10 +116,10 @@
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, m, m, w, ?r, ?w, ?r, w")
+ "=w, m, m, w, ?r, ?w, ?r, w, w")
(match_operand:VDMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
+ "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
{
@@ -128,26 +128,34 @@
case 0: return "ldr\t%d0, %1";
case 1: return "str\txzr, %0";
case 2: return "str\t%d1, %0";
- case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
- case 4: return "umov\t%0, %1.d[0]";
+ case 3:
+ if (TARGET_SIMD)
+ return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+ return "fmov\t%d0, %d1";
+ case 4:
+ if (TARGET_SIMD)
+ return "umov\t%0, %1.d[0]";
+ return "fmov\t%x0, %d1";
case 5: return "fmov\t%d0, %1";
case 6: return "mov\t%0, %1";
case 7:
return aarch64_output_simd_mov_immediate (operands[1], 64);
+ case 8: return "fmov\t%d0, xzr";
default: gcc_unreachable ();
}
}
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
neon_logic<q>, neon_to_gp<q>, f_mcr,\
- mov_reg, neon_move<q>")]
+ mov_reg, neon_move<q>, f_mcr")
+ (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
)
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
[(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w")
+ "=w, Umn, m, w, ?r, ?w, ?r, w, w")
(match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn"))]
- "TARGET_SIMD
+ "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
{
@@ -167,14 +175,17 @@
return "#";
case 7:
return aarch64_output_simd_mov_immediate (operands[1], 128);
+ case 8:
+ return "fmov\t%d0, xzr";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>")
- (set_attr "length" "4,4,4,4,8,8,8,4")]
+ multiple, neon_move<q>, fmov")
+ (set_attr "length" "4,4,4,4,8,8,8,4,4")
+ (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
)
;; When storing lane zero we can use the normal STR and its more permissive
@@ -195,7 +206,7 @@
(match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:DREG2 2 "register_operand" "=w")
(match_operand:DREG2 3 "memory_operand" "m"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
@@ -209,7 +220,7 @@
(match_operand:DREG 1 "register_operand" "w"))
(set (match_operand:DREG2 2 "memory_operand" "=m")
(match_operand:DREG2 3 "register_operand" "w"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
@@ -223,7 +234,7 @@
(match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
(set (match_operand:VQ2 2 "register_operand" "=w")
(match_operand:VQ2 3 "memory_operand" "m"))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
@@ -237,10 +248,11 @@
(match_operand:VQ 1 "register_operand" "w"))
(set (match_operand:VQ2 2 "memory_operand" "=m")
(match_operand:VQ2 3 "register_operand" "w"))]
- "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
- plus_constant (Pmode,
- XEXP (operands[0], 0),
- GET_MODE_SIZE (<VQ:MODE>mode)))"
+ "TARGET_FLOAT
+ && rtx_equal_p (XEXP (operands[2], 0),
+ plus_constant (Pmode,
+ XEXP (operands[0], 0),
+ GET_MODE_SIZE (<VQ:MODE>mode)))"
"stp\\t%q1, %q3, %z0"
[(set_attr "type" "neon_stp_q")]
)
@@ -248,8 +260,9 @@
(define_split
[(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_SIMD && reload_completed
+ (match_operand:VQMOV 1 "register_operand" ""))]
+ "TARGET_FLOAT
+ && reload_completed
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
[(const_int 0)]
@@ -261,7 +274,8 @@
(define_split
[(set (match_operand:VQMOV 0 "register_operand" "")
(match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_SIMD && reload_completed
+ "TARGET_FLOAT
+ && reload_completed
&& ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|| (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
[(const_int 0)]
@@ -273,7 +287,7 @@
(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx dst = operands[0];
rtx src = operands[1];
@@ -306,13 +320,20 @@
(vec_select:<VHALF>
(match_operand:VQMOV 1 "register_operand")
(match_operand 2 "ascending_int_parallel")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
+ {
+ if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
+ {
+ emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
+ DONE;
+ }
+ }
)
(define_expand "aarch64_get_low<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
@@ -323,7 +344,7 @@
(define_expand "aarch64_get_high<mode>"
[(match_operand:<VHALF> 0 "register_operand")
(match_operand:VQMOV 1 "register_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
@@ -350,15 +371,17 @@
)
(define_insn "aarch64_simd_mov_from_<mode>high"
- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
(vec_select:<VHALF>
- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"@
- dup\\t%d0, %1.d[1]
- umov\t%0, %1.d[1]"
- [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
+ dup\t%d0, %1.d[1]
+ umov\t%0, %1.d[1]
+ fmov\t%0, %1.d[1]"
+ [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
+ (set_attr "arch" "simd,simd,*")
(set_attr "length" "4")]
)
@@ -4322,12 +4345,22 @@
[(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
)
+(define_insn "*aarch64_get_high<mode>"
+ [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
+ (vec_select:<VEL>
+ (match_operand:VQ_2E 1 "register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand")])))]
+ "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
+ "fmov\t%0, %1.d[1]"
+ [(set_attr "type" "f_mrc")]
+)
+
(define_insn "load_pair_lanes<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=w")
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "memory_operand" "Utq")
(match_operand:VDCSIF 2 "memory_operand" "m")))]
- "TARGET_SIMD
+ "TARGET_FLOAT
&& aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
"ldr\\t%<single_dtype>0, %1"
[(set_attr "type" "neon_load1_1reg<dblq>")]
@@ -4357,7 +4390,7 @@
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "register_operand" "w, r")
(match_operand:VDCSIF 2 "register_operand" "w, r")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
"@
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
@@ -4372,39 +4405,44 @@
;; the register alternatives either don't accept or themselves disparage.
(define_insn "*aarch64_combine_internal<mode>"
- [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+ [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
- "TARGET_SIMD
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
+ "TARGET_FLOAT
&& !BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
+ fmov\t%0.d[1], %2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>1, %<single_type>2, %y0
stp\t%<single_wx>1, %<single_wx>2, %y0"
- [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
+ neon_load1_one_lane<dblq>, neon_stp, store_16")
+ (set_attr "arch" "simd,simd,*,simd,*,*")]
)
(define_insn "*aarch64_combine_internal_be<mode>"
- [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+ [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
(vec_concat:<VDBL>
- (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
- (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
- "TARGET_SIMD
+ (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
+ (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))]
+ "TARGET_FLOAT
&& BYTES_BIG_ENDIAN
&& (register_operand (operands[0], <VDBL>mode)
|| register_operand (operands[2], <MODE>mode))"
"@
ins\t%0.<single_type>[1], %2.<single_type>[0]
ins\t%0.<single_type>[1], %<single_wx>2
+ fmov\t%0.d[1], %2
ld1\t{%0.<single_type>}[1], %2
stp\t%<single_type>2, %<single_type>1, %y0
stp\t%<single_wx>2, %<single_wx>1, %y0"
- [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+ [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
+ (set_attr "arch" "simd,simd,*,simd,*,*")]
)
;; In this insn, operand 1 should be low, and operand 2 the high part of the
@@ -4415,13 +4453,12 @@
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
- "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+ "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
- [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "arch" "simd,fp,simd")]
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
)
(define_insn "*aarch64_combinez_be<mode>"
@@ -4429,13 +4466,12 @@
(vec_concat:<VDBL>
(match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
(match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN"
+ "TARGET_FLOAT && BYTES_BIG_ENDIAN"
"@
fmov\\t%<single_type>0, %<single_type>1
fmov\t%<single_type>0, %<single_wx>1
ldr\\t%<single_type>0, %1"
- [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
- (set_attr "arch" "simd,fp,simd")]
+ [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
)
;; Form a vector whose first half (in array order) comes from operand 1
@@ -4446,7 +4482,7 @@
(vec_concat:<VDBL>
(match_operand:VDCSIF 1 "general_operand")
(match_operand:VDCSIF 2 "general_operand")))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
int lo = BYTES_BIG_ENDIAN ? 2 : 1;
int hi = BYTES_BIG_ENDIAN ? 1 : 2;
@@ -4464,7 +4500,7 @@
}
else
{
- /* Use *aarch64_combine_general<mode>. */
+ /* Use *aarch64_combine_internal<mode>. */
operands[lo] = force_reg (<MODE>mode, operands[lo]);
if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
{
@@ -4486,7 +4522,7 @@
[(match_operand:<VDBL> 0 "register_operand")
(match_operand:VDC 1 "general_operand")
(match_operand:VDC 2 "general_operand")]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (BYTES_BIG_ENDIAN)
std::swap (operands[1], operands[2]);
@@ -7367,7 +7403,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_QD 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (can_create_pseudo_p ())
{
@@ -7379,7 +7415,7 @@
(define_expand "mov<mode>"
[(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
(match_operand:VSTRUCT 1 "general_operand"))]
- "TARGET_SIMD"
+ "TARGET_FLOAT"
{
if (can_create_pseudo_p ())
{
@@ -7559,7 +7595,8 @@
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2D 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
@@ -7573,7 +7610,8 @@
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
(match_operand:VSTRUCT_2Q 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"@
@@ -7581,13 +7619,15 @@
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+ (set_attr "arch" "simd,*,*")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_movoi"
[(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
(match_operand:OI 1 "general_operand" " w,w,m"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], OImode)
|| register_operand (operands[1], OImode))"
"@
@@ -7595,57 +7635,66 @@
stp\\t%q1, %R1, %0
ldp\\t%q0, %R0, %1"
[(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+ (set_attr "arch" "simd,*,*")
(set_attr "length" "8,4,4")]
)
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_3QD 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
+ (set_attr "arch" "fp<q>,*,*")
(set_attr "length" "12,8,8")]
)
(define_insn "*aarch64_be_movci"
[(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:CI 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], CImode)
|| register_operand (operands[1], CImode))"
"#"
[(set_attr "type" "multiple")
- (set_attr "length" "12,4,4")]
+ (set_attr "arch" "simd,*,*")
+ (set_attr "length" "12,8,8")]
)
(define_insn "*aarch64_be_mov<mode>"
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
(match_operand:VSTRUCT_4QD 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
"#"
[(set_attr "type" "multiple")
+ (set_attr "arch" "fp<q>,*,*")
(set_attr "length" "16,8,8")]
)
(define_insn "*aarch64_be_movxi"
[(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
(match_operand:XI 1 "general_operand" " w,w,o"))]
- "TARGET_SIMD && BYTES_BIG_ENDIAN
+ "TARGET_FLOAT
+ && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
&& (register_operand (operands[0], XImode)
|| register_operand (operands[1], XImode))"
"#"
[(set_attr "type" "multiple")
- (set_attr "length" "16,4,4")]
+ (set_attr "arch" "simd,*,*")
+ (set_attr "length" "16,8,8")]
)
(define_split
[(set (match_operand:VSTRUCT_2QD 0 "register_operand")
(match_operand:VSTRUCT_2QD 1 "register_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
@@ -7655,7 +7704,7 @@
(define_split
[(set (match_operand:OI 0 "register_operand")
(match_operand:OI 1 "register_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
@@ -7665,7 +7714,7 @@
(define_split
[(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_3QD 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
@@ -7674,7 +7723,7 @@
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -7701,7 +7750,7 @@
(define_split
[(set (match_operand:CI 0 "nonimmediate_operand")
(match_operand:CI 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], CImode)
@@ -7710,7 +7759,7 @@
aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -7729,7 +7778,7 @@
(define_split
[(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
(match_operand:VSTRUCT_4QD 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], <MODE>mode)
@@ -7738,7 +7787,7 @@
aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -7759,7 +7808,7 @@
(define_split
[(set (match_operand:XI 0 "nonimmediate_operand")
(match_operand:XI 1 "general_operand"))]
- "TARGET_SIMD && reload_completed"
+ "TARGET_FLOAT && reload_completed"
[(const_int 0)]
{
if (register_operand (operands[0], XImode)
@@ -7768,7 +7817,7 @@
aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
DONE;
}
- else if (BYTES_BIG_ENDIAN)
+ else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
{
emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
simplify_gen_subreg (OImode, operands[1], XImode, 0));
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 02210ed13..b4b646fa0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3748,7 +3748,7 @@ aarch64_classify_vector_mode (machine_mode mode)
case E_OImode:
case E_CImode:
case E_XImode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
/* Structures of 64-bit Advanced SIMD vectors. */
case E_V2x8QImode:
@@ -3775,7 +3775,7 @@ aarch64_classify_vector_mode (machine_mode mode)
case E_V4x4HFmode:
case E_V4x2SFmode:
case E_V4x1DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
/* Structures of 128-bit Advanced SIMD vectors. */
case E_V2x16QImode:
@@ -3802,7 +3802,7 @@ aarch64_classify_vector_mode (machine_mode mode)
case E_V4x8HFmode:
case E_V4x4SFmode:
case E_V4x2DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
/* 64-bit Advanced SIMD vectors. */
case E_V8QImode:
@@ -3822,7 +3822,7 @@ aarch64_classify_vector_mode (machine_mode mode)
case E_V8BFmode:
case E_V4SFmode:
case E_V2DFmode:
- return TARGET_SIMD ? VEC_ADVSIMD : 0;
+ return TARGET_FLOAT ? VEC_ADVSIMD : 0;
default:
return 0;
@@ -4110,7 +4110,8 @@ aarch64_vectorize_related_mode (machine_mode vector_mode,
}
/* Prefer to use 1 128-bit vector instead of 2 64-bit vectors. */
- if ((vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD
+ && (vec_flags & VEC_ADVSIMD)
&& known_eq (nunits, 0U)
&& known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
&& maybe_ge (GET_MODE_BITSIZE (element_mode)
@@ -4208,7 +4209,7 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
if (GP_REGNUM_P (regno))
{
- if (vec_flags & VEC_ANY_SVE)
+ if (vec_flags & (VEC_ANY_SVE | VEC_STRUCT))
return false;
if (known_le (GET_MODE_SIZE (mode), 8))
return true;
@@ -10884,7 +10885,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
|| mode == TImode
|| mode == TFmode
|| mode == TDmode
- || (BYTES_BIG_ENDIAN && advsimd_struct_p));
+ || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
+ && advsimd_struct_p));
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
corresponds to the actual size of the memory being loaded/stored and the
mode of the corresponding addressing mode is half of that. */
@@ -10914,6 +10916,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
/* On LE, for AdvSIMD, don't support anything other than POST_INC or
REG addressing. */
if (advsimd_struct_p
+ && TARGET_SIMD
&& !BYTES_BIG_ENDIAN
&& (code != POST_INC && code != REG))
return false;
@@ -10976,7 +10979,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
&& aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48));
/* A 7bit offset check because OImode will emit a ldp/stp
- instruction (only big endian will get here).
+ instruction (only !TARGET_SIMD or big endian will get here).
For ldp/stp instructions, the offset is scaled for the size of a
single element of the pair. */
if (aarch64_advsimd_partial_struct_mode_p (mode)
@@ -10987,7 +10990,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
/* Three 9/12 bit offsets checks because CImode will emit three
- ldr/str instructions (only big endian will get here). */
+ ldr/str instructions (only !TARGET_SIMD or big endian will
+ get here). */
if (aarch64_advsimd_partial_struct_mode_p (mode)
&& known_eq (GET_MODE_SIZE (mode), 24))
return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
@@ -12716,18 +12720,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
/* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
LDR and STR. See the comment at the head of aarch64-sve.md for
more details about the big-endian handling. */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
if (reg_class_subset_p (rclass, FP_REGS)
&& !((REG_P (x) && HARD_REGISTER_P (x))
|| aarch64_simd_valid_immediate (x, NULL))
- && mode != VNx16QImode)
+ && mode != VNx16QImode
+ && (vec_flags & VEC_SVE_DATA)
+ && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
{
- unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if ((vec_flags & VEC_SVE_DATA)
- && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
- {
- sri->icode = CODE_FOR_aarch64_sve_reload_mem;
- return NO_REGS;
- }
+ sri->icode = CODE_FOR_aarch64_sve_reload_mem;
+ return NO_REGS;
}
/* If we have to disable direct literal pool loads and stores because the
@@ -12744,9 +12746,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
/* Without the TARGET_SIMD instructions we cannot move a Q register
to a Q register directly. We need a scratch. */
if (REG_P (x)
- && (mode == TFmode || mode == TImode || mode == TDmode)
+ && (mode == TFmode
+ || mode == TImode
+ || mode == TDmode
+ || (vec_flags == VEC_ADVSIMD && known_eq (GET_MODE_SIZE (mode), 16)))
&& mode == GET_MODE (x)
- && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+ && !TARGET_SIMD
+ && FP_REGNUM_P (REGNO (x))
&& reg_class_subset_p (rclass, FP_REGS))
{
sri->icode = code_for_aarch64_reload_mov (mode);
@@ -12768,6 +12774,28 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
return NO_REGS;
}
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED. */
+
+static bool
+aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1,
+ reg_class_t class2)
+{
+ if (!TARGET_SIMD
+ && reg_classes_intersect_p (class1, FP_REGS)
+ && reg_classes_intersect_p (class2, FP_REGS))
+ {
+ /* We can't do a 128-bit FPR-to-FPR move without TARGET_SIMD,
+ so we can't easily split a move involving tuples of 128-bit
+ vectors. Force the copy through memory instead.
+
+ (Tuples of 64-bit vectors are fine.) */
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+ return true;
+ }
+ return false;
+}
+
static bool
aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
{
@@ -13311,7 +13339,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
if (VECTOR_MODE_P (mode))
{
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the instruction have the
same cost as the three vector version - don't add the costs of the
@@ -14257,7 +14285,7 @@ cost_minus:
{
/* SUBL2 and SUBW2. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the sub instruction
have the same cost as the regular three vector version -
@@ -14359,7 +14387,7 @@ cost_plus:
{
/* ADDL2 and ADDW2. */
unsigned int vec_flags = aarch64_classify_vector_mode (mode);
- if (vec_flags & VEC_ADVSIMD)
+ if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
{
/* The select-operand-high-half versions of the add instruction
have the same cost as the regular three vector version -
@@ -15284,7 +15312,9 @@ aarch64_register_move_cost (machine_mode mode,
return aarch64_register_move_cost (mode, from, GENERAL_REGS)
+ aarch64_register_move_cost (mode, GENERAL_REGS, to);
- if (known_eq (GET_MODE_SIZE (mode), 16))
+ unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+ if (vec_flags != (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL)
+ && known_eq (GET_MODE_SIZE (mode), 16))
{
/* 128-bit operations on general registers require 2 instructions. */
if (from == GENERAL_REGS && to == GENERAL_REGS)
@@ -15312,6 +15342,16 @@ aarch64_register_move_cost (machine_mode mode,
else if (to == GENERAL_REGS)
return regmove_cost->FP2GP;
+ if (!TARGET_SIMD && vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+ {
+ /* Needs a round-trip through memory, which can use LDP/STP for pairs.
+ The cost must be greater than 2 units to indicate that direct
+ moves aren't possible. */
+ auto per_vector = (aarch64_tune_params.memmov_cost.load_fp
+ + aarch64_tune_params.memmov_cost.store_fp);
+ return MIN (CEIL (per_vector, 2), 4);
+ }
+
return regmove_cost->FP2FP;
}
@@ -21504,6 +21544,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
return false;
+ if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD)
+ return false;
+
if (vec_flags & VEC_SVE_PRED)
return aarch64_sve_pred_valid_immediate (op, info);
@@ -24430,7 +24473,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
std::swap (d->op0, d->op1);
}
- if ((d->vec_flags == VEC_ADVSIMD
+ if (((d->vec_flags == VEC_ADVSIMD && TARGET_SIMD)
|| d->vec_flags == VEC_SVE_DATA
|| d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL)
|| d->vec_flags == VEC_SVE_PRED)
@@ -27977,6 +28020,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
+#undef TARGET_SECONDARY_MEMORY_NEEDED
+#define TARGET_SECONDARY_MEMORY_NEEDED aarch64_secondary_memory_needed
+
#undef TARGET_SHIFT_TRUNCATION_MASK
#define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 8757a962f..c0cc91756 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -374,8 +374,11 @@
;; Attributes of the architecture required to support the instruction (or
;; alternative). This attribute is used to compute attribute "enabled", use type
;; "any" to enable an alternative in all cases.
+;;
+;; As a convenience, "fp_q" means "fp" + the ability to move between
+;; Q registers and is equivalent to "simd".
-(define_enum "arches" [ any rcpc8_4 fp simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
(define_enum_attr "arch" "arches" (const_string "any"))
@@ -403,7 +406,7 @@
(and (eq_attr "arch" "fp")
(match_test "TARGET_FLOAT"))
- (and (eq_attr "arch" "simd")
+ (and (eq_attr "arch" "fp_q, simd")
(match_test "TARGET_SIMD"))
(and (eq_attr "arch" "fp16")
@@ -6768,8 +6771,8 @@
)
(define_expand "@aarch64_reload_mov<mode>"
- [(set (match_operand:TX 0 "register_operand" "=w")
- (match_operand:TX 1 "register_operand" "w"))
+ [(set (match_operand:VTX 0 "register_operand" "=w")
+ (match_operand:VTX 1 "register_operand" "w"))
(clobber (match_operand:DI 2 "register_operand" "=&r"))
]
"TARGET_FLOAT"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index d0cd1b788..a8a39b65a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -313,6 +313,8 @@
(define_mode_iterator TX [TI TF TD])
+(define_mode_iterator VTX [TI TF TD V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF])
+
;; Advanced SIMD opaque structure modes.
(define_mode_iterator VSTRUCT [OI CI XI])
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c
new file mode 100644
index 000000000..7e705e119
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c
@@ -0,0 +1,7 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "ldp_stp_6.c"
+
+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c
new file mode 100644
index 000000000..462e3c9aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c
@@ -0,0 +1,7 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "ldp_stp_8.c"
+
+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c
new file mode 100644
index 000000000..283c56dd2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (__Float32x4_t *ptr)
+{
+ ptr[0] = ptr[2];
+ ptr[1] = ptr[3];
+}
+
+/* { dg-final { scan-assembler {\tldp\tq[0-9]+, q[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstp\tq[0-9]+, q[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c
new file mode 100644
index 000000000..b14976cfe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (char *char_ptr)
+{
+ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1);
+ asm volatile ("" ::
+ "w" (ptr[1]),
+ "w" (ptr[2]),
+ "w" (ptr[3]),
+ "w" (ptr[4]));
+}
+
+/* { dg-final { scan-assembler-times {\tldp\tq[0-9]+, q[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c
new file mode 100644
index 000000000..a99426eb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (char *char_ptr)
+{
+ __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1);
+ asm volatile ("" :
+ "=w" (ptr[1]),
+ "=w" (ptr[2]),
+ "=w" (ptr[3]),
+ "=w" (ptr[4]));
+}
+
+/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+, q[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
index 8a6afb13b..cac4241b0 100644
--- a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
@@ -80,3 +80,24 @@ fpr_to_gpr (v16qi q0)
x0 = q0;
asm volatile ("" :: "r" (x0));
}
+
+/*
+** gpr_to_gpr:
+** (
+** mov x0, x2
+** mov x1, x3
+** |
+** mov x1, x3
+** mov x0, x2
+** )
+** ret
+*/
+void
+gpr_to_gpr ()
+{
+ register v16qi x0 asm ("x0");
+ register v16qi x2 asm ("x2");
+ asm volatile ("" : "=r" (x2));
+ x0 = x2;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c
new file mode 100644
index 000000000..08a0a19b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_GENERAL(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE zero_##TYPE () { return (TYPE) {}; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_GENERAL (__Int8x16_t)
+TEST_GENERAL (__Int16x8_t)
+TEST_GENERAL (__Int32x4_t)
+TEST_GENERAL (__Int64x2_t)
+TEST_GENERAL (__Bfloat16x8_t)
+TEST_GENERAL (__Float16x8_t)
+TEST_GENERAL (__Float32x4_t)
+TEST_GENERAL (__Float64x2_t)
+
+__Int8x16_t const_s8x8 () { return (__Int8x16_t) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; }
+__Int16x8_t const_s16x4 () { return (__Int16x8_t) { 1, 0, 1, 0, 1, 0, 1, 0 }; }
+__Int32x4_t const_s32x2 () { return (__Int32x4_t) { 1, 2, 3, 4 }; }
+__Int64x2_t const_s64x1 () { return (__Int64x2_t) { 100, 100 }; }
+__Float16x8_t const_f16x4 () { return (__Float16x8_t) { 2, 2, 2, 2, 2, 2, 2, 2 }; }
+__Float32x4_t const_f32x2 () { return (__Float32x4_t) { 1, 2, 1, 2 }; }
+__Float64x2_t const_f64x1 () { return (__Float64x2_t) { 32, 32 }; }
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c
new file mode 100644
index 000000000..d43b994c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE \
+ test_##TYPE (void) \
+ { \
+ typedef TYPE v __attribute__((aligned(1))); \
+ register v *ptr asm ("x0"); \
+ asm volatile ("" : "=r" (ptr)); \
+ return *ptr; \
+ }
+
+TEST_VECTOR (__Int8x16_t)
+TEST_VECTOR (__Int16x8_t)
+TEST_VECTOR (__Int32x4_t)
+TEST_VECTOR (__Int64x2_t)
+TEST_VECTOR (__Bfloat16x8_t)
+TEST_VECTOR (__Float16x8_t)
+TEST_VECTOR (__Float32x4_t)
+TEST_VECTOR (__Float64x2_t)
+
+/*
+** test___Int8x16_t:
+** ldr q0, \[x0\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2di_1.c b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c
new file mode 100644
index 000000000..e3b55fd52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c
@@ -0,0 +1,103 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+typedef long long v2di __attribute__((vector_size(16)));
+
+/*
+** fpr_to_fpr:
+** sub sp, sp, #16
+** str q1, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, #?16
+** ret
+*/
+v2di
+fpr_to_fpr (v2di q0, v2di q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr: { target aarch64_little_endian }
+** fmov d0, x0
+** fmov v0.d\[1\], x1
+** ret
+*/
+/*
+** gpr_to_fpr: { target aarch64_big_endian }
+** fmov d0, x1
+** fmov v0.d\[1\], x0
+** ret
+*/
+v2di
+gpr_to_fpr ()
+{
+ register v2di x0 asm ("x0");
+ asm volatile ("" : "=r" (x0));
+ return x0;
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+v2di
+zero_to_fpr ()
+{
+ return (v2di) {};
+}
+
+/*
+** fpr_to_gpr: { target aarch64_little_endian }
+** (
+** fmov x0, d0
+** fmov x1, v0.d\[1\]
+** |
+** fmov x1, v0.d\[1\]
+** fmov x0, d0
+** )
+** ret
+*/
+/*
+** fpr_to_gpr: { target aarch64_big_endian }
+** (
+** fmov x1, d0
+** fmov x0, v0.d\[1\]
+** |
+** fmov x0, v0.d\[1\]
+** fmov x1, d0
+** )
+** ret
+*/
+void
+fpr_to_gpr (v2di q0)
+{
+ register v2di x0 asm ("x0");
+ x0 = q0;
+ asm volatile ("" :: "r" (x0));
+}
+
+/*
+** gpr_to_gpr:
+** (
+** mov x0, x2
+** mov x1, x3
+** |
+** mov x1, x3
+** mov x0, x2
+** )
+** ret
+*/
+void
+gpr_to_gpr ()
+{
+ register v2di x0 asm ("x0");
+ register v2di x2 asm ("x2");
+ asm volatile ("" : "=r" (x2));
+ x0 = x2;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c
new file mode 100644
index 000000000..90e3b426d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x2_t)
+TEST_VECTOR (int16x8x2_t)
+TEST_VECTOR (int32x4x2_t)
+TEST_VECTOR (int64x2x2_t)
+TEST_VECTOR (float16x8x2_t)
+TEST_VECTOR (bfloat16x8x2_t)
+TEST_VECTOR (float32x4x2_t)
+TEST_VECTOR (float64x2x2_t)
+
+/*
+** mov_int8x16x2_t:
+** sub sp, sp, #32
+** stp q2, q3, \[sp\]
+** ldp q0, q1, \[sp\]
+** add sp, sp, #?32
+** ret
+*/
+/*
+** load_int8x16x2_t:
+** ldp q0, q1, \[x0\]
+** ret
+*/
+/*
+** store_int8x16x2_t: { xfail *-*-* }
+** stp q0, q1, \[x0\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c
new file mode 100644
index 000000000..883a0ea71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x2_t)
+TEST_VECTOR (int16x4x2_t)
+TEST_VECTOR (int32x2x2_t)
+TEST_VECTOR (int64x1x2_t)
+TEST_VECTOR (float16x4x2_t)
+TEST_VECTOR (bfloat16x4x2_t)
+TEST_VECTOR (float32x2x2_t)
+TEST_VECTOR (float64x1x2_t)
+
+/*
+** mov_int8x8x2_t:
+** fmov d0, d2
+** fmov d1, d3
+** ret
+*/
+/*
+** load_int8x8x2_t:
+** ldp d0, d1, \[x0\]
+** ret
+*/
+/*
+** store_int8x8x2_t:
+** stp d0, d1, \[x0\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c
new file mode 100644
index 000000000..070a596bf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c
@@ -0,0 +1,44 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x3_t)
+TEST_VECTOR (int16x8x3_t)
+TEST_VECTOR (int32x4x3_t)
+TEST_VECTOR (int64x2x3_t)
+TEST_VECTOR (float16x8x3_t)
+TEST_VECTOR (bfloat16x8x3_t)
+TEST_VECTOR (float32x4x3_t)
+TEST_VECTOR (float64x2x3_t)
+
+/*
+** mov_int8x16x3_t:
+** sub sp, sp, #48
+** stp q3, q4, \[sp\]
+** str q5, \[sp, #?32\]
+** ldp q0, q1, \[sp\]
+** ldr q2, \[sp, #?32\]
+** add sp, sp, #?48
+** ret
+*/
+/*
+** load_int8x16x3_t:
+** ldp q0, q1, \[x0\]
+** ldr q2, \[x0, #?32\]
+** ret
+*/
+/*
+** store_int8x16x3_t: { xfail *-*-* }
+** stp q0, q1, \[x0\]
+** stp q2, \[x0, #?32\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c
new file mode 100644
index 000000000..4b873d749
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c
@@ -0,0 +1,41 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x3_t)
+TEST_VECTOR (int16x4x3_t)
+TEST_VECTOR (int32x2x3_t)
+TEST_VECTOR (int64x1x3_t)
+TEST_VECTOR (float16x4x3_t)
+TEST_VECTOR (bfloat16x4x3_t)
+TEST_VECTOR (float32x2x3_t)
+TEST_VECTOR (float64x1x3_t)
+
+/*
+** mov_int8x8x3_t:
+** fmov d0, d3
+** fmov d1, d4
+** fmov d2, d5
+** ret
+*/
+/*
+** load_int8x8x3_t:
+** ldp d0, d1, \[x0\]
+** ldr d2, \[x0, #?16\]
+** ret
+*/
+/*
+** store_int8x8x3_t:
+** stp d0, d1, \[x0\]
+** str d2, \[x0, #?16\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c
new file mode 100644
index 000000000..6a517b4fe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c
@@ -0,0 +1,44 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x4_t)
+TEST_VECTOR (int16x8x4_t)
+TEST_VECTOR (int32x4x4_t)
+TEST_VECTOR (int64x2x4_t)
+TEST_VECTOR (float16x8x4_t)
+TEST_VECTOR (bfloat16x8x4_t)
+TEST_VECTOR (float32x4x4_t)
+TEST_VECTOR (float64x2x4_t)
+
+/*
+** mov_int8x16x4_t:
+** sub sp, sp, #64
+** stp q4, q5, \[sp\]
+** stp q6, q7, \[sp, #?32\]
+** ldp q0, q1, \[sp\]
+** ldp q2, q3, \[sp, #?32\]
+** add sp, sp, #?64
+** ret
+*/
+/*
+** load_int8x16x4_t:
+** ldp q0, q1, \[x0\]
+** ldp q2, q3, \[x0, #?32\]
+** ret
+*/
+/*
+** store_int8x16x4_t: { xfail *-*-* }
+** stp q0, q1, \[x0\]
+** stp q2, q3, \[x0, #?32\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c
new file mode 100644
index 000000000..f096be4a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c
@@ -0,0 +1,42 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x4_t)
+TEST_VECTOR (int16x4x4_t)
+TEST_VECTOR (int32x2x4_t)
+TEST_VECTOR (int64x1x4_t)
+TEST_VECTOR (float16x4x4_t)
+TEST_VECTOR (bfloat16x4x4_t)
+TEST_VECTOR (float32x2x4_t)
+TEST_VECTOR (float64x1x4_t)
+
+/*
+** mov_int8x8x4_t:
+** fmov d0, d4
+** fmov d1, d5
+** fmov d2, d6
+** fmov d3, d7
+** ret
+*/
+/*
+** load_int8x8x4_t:
+** ldp d0, d1, \[x0\]
+** ldp d2, d3, \[x0, #?16\]
+** ret
+*/
+/*
+** store_int8x8x4_t:
+** stp d0, d1, \[x0\]
+** stp d2, d3, \[x0, #?16\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
index 4c97e6fbc..d2b5d8025 100644
--- a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
@@ -53,3 +53,18 @@ fpr_to_gpr (v8qi q0)
x0 = q0;
asm volatile ("" :: "r" (x0));
}
+
+/*
+** gpr_to_gpr:
+** mov x0, x1
+** ret
+*/
+void
+gpr_to_gpr ()
+{
+ register v8qi x0 asm ("x0");
+ register v8qi x1 asm ("x1");
+ asm volatile ("" : "=r" (x1));
+ x0 = x1;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c
new file mode 100644
index 000000000..0d8576ffe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_GENERAL(TYPE) \
+ TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+ TYPE zero_##TYPE () { return (TYPE) {}; } \
+ TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+ void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_GENERAL (__Int8x8_t)
+TEST_GENERAL (__Int16x4_t)
+TEST_GENERAL (__Int32x2_t)
+TEST_GENERAL (__Int64x1_t)
+TEST_GENERAL (__Bfloat16x4_t)
+TEST_GENERAL (__Float16x4_t)
+TEST_GENERAL (__Float32x2_t)
+TEST_GENERAL (__Float64x1_t)
+
+__Int8x8_t const_s8x8 () { return (__Int8x8_t) { 1, 1, 1, 1, 1, 1, 1, 1 }; }
+__Int16x4_t const_s16x4 () { return (__Int16x4_t) { 1, 0, 1, 0 }; }
+__Int32x2_t const_s32x2 () { return (__Int32x2_t) { 1, 2 }; }
+__Int64x1_t const_s64x1 () { return (__Int64x1_t) { 100 }; }
+__Float16x4_t const_f16x4 () { return (__Float16x4_t) { 2, 2, 2, 2 }; }
+__Float32x2_t const_f32x2 () { return (__Float32x2_t) { 1, 2 }; }
+__Float64x1_t const_f64x1 () { return (__Float64x1_t) { 32 }; }
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c
new file mode 100644
index 000000000..1caa1a788
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+ TYPE \
+ test_##TYPE (void) \
+ { \
+ typedef TYPE v __attribute__((aligned(1))); \
+ register v *ptr asm ("x0"); \
+ asm volatile ("" : "=r" (ptr)); \
+ return *ptr; \
+ }
+
+TEST_VECTOR (__Int8x8_t)
+TEST_VECTOR (__Int16x4_t)
+TEST_VECTOR (__Int32x2_t)
+TEST_VECTOR (__Int64x1_t)
+TEST_VECTOR (__Bfloat16x4_t)
+TEST_VECTOR (__Float16x4_t)
+TEST_VECTOR (__Float32x2_t)
+TEST_VECTOR (__Float64x1_t)
+
+/*
+** test___Int8x8_t:
+** ldr d0, \[x0\]
+** ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c
new file mode 100644
index 000000000..454ac2771
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c
@@ -0,0 +1,5 @@
+/* { dg-options "-O3 -fno-math-errno --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "vect_unary_1.c"
--
2.33.0
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
1
https://gitee.com/src-openeuler/gcc.git
git@gitee.com:src-openeuler/gcc.git
src-openeuler
gcc
gcc
master

搜索帮助