169 lines
5.4 KiB
Diff
169 lines
5.4 KiB
Diff
From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001
|
|
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
|
Date: Thu, 1 Jun 2023 09:37:06 +0100
|
|
Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r
|
|
alternatives to 64-bit vector move patterns
|
|
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
|
|
|
|
We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
|
|
to the mov patterns. This straightforward patch does that and for the pair variants too.
|
|
For the testcase in the code we now generate the optimal assembly without any superfluous
|
|
GP<->SIMD moves.
|
|
|
|
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
|
|
Add =r,m and =r,m alternatives.
|
|
(load_pair<DREG:mode><DREG2:mode>): Likewise.
|
|
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/aarch64/xreg-vec-modes_1.c: New test.
|
|
---
|
|
gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++--------
|
|
.../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++
|
|
2 files changed, 65 insertions(+), 17 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
|
index 2d688edf5..b5c52ba16 100644
|
|
--- a/gcc/config/aarch64/aarch64-simd.md
|
|
+++ b/gcc/config/aarch64/aarch64-simd.md
|
|
@@ -116,26 +116,28 @@
|
|
|
|
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
|
|
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
|
|
- "=w, m, m, w, ?r, ?w, ?r, w, w")
|
|
+ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
|
|
(match_operand:VDMOV 1 "general_operand"
|
|
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
|
|
+ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
|
|
"TARGET_FLOAT
|
|
&& (register_operand (operands[0], <MODE>mode)
|
|
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
|
"@
|
|
ldr\t%d0, %1
|
|
+ ldr\t%x0, %1
|
|
str\txzr, %0
|
|
str\t%d1, %0
|
|
+ str\t%x1, %0
|
|
* return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
|
|
* return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
|
|
fmov\t%d0, %1
|
|
mov\t%0, %1
|
|
* return aarch64_output_simd_mov_immediate (operands[1], 64);
|
|
fmov\t%d0, xzr"
|
|
- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
|
|
- neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
|
+ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
|
|
+ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
|
mov_reg, neon_move<q>, f_mcr")
|
|
- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
|
|
+ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
|
|
)
|
|
|
|
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
|
|
@@ -177,31 +179,35 @@
|
|
)
|
|
|
|
(define_insn "load_pair<DREG:mode><DREG2:mode>"
|
|
- [(set (match_operand:DREG 0 "register_operand" "=w")
|
|
- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
|
|
- (set (match_operand:DREG2 2 "register_operand" "=w")
|
|
- (match_operand:DREG2 3 "memory_operand" "m"))]
|
|
+ [(set (match_operand:DREG 0 "register_operand" "=w,r")
|
|
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
|
|
+ (set (match_operand:DREG2 2 "register_operand" "=w,r")
|
|
+ (match_operand:DREG2 3 "memory_operand" "m,m"))]
|
|
"TARGET_FLOAT
|
|
&& rtx_equal_p (XEXP (operands[3], 0),
|
|
plus_constant (Pmode,
|
|
XEXP (operands[1], 0),
|
|
GET_MODE_SIZE (<DREG:MODE>mode)))"
|
|
- "ldp\\t%d0, %d2, %z1"
|
|
- [(set_attr "type" "neon_ldp")]
|
|
+ "@
|
|
+ ldp\t%d0, %d2, %z1
|
|
+ ldp\t%x0, %x2, %z1"
|
|
+ [(set_attr "type" "neon_ldp,load_16")]
|
|
)
|
|
|
|
(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
|
|
- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
|
|
- (match_operand:DREG 1 "register_operand" "w"))
|
|
- (set (match_operand:DREG2 2 "memory_operand" "=m")
|
|
- (match_operand:DREG2 3 "register_operand" "w"))]
|
|
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
|
|
+ (match_operand:DREG 1 "register_operand" "w,r"))
|
|
+ (set (match_operand:DREG2 2 "memory_operand" "=m,m")
|
|
+ (match_operand:DREG2 3 "register_operand" "w,r"))]
|
|
"TARGET_FLOAT
|
|
&& rtx_equal_p (XEXP (operands[2], 0),
|
|
plus_constant (Pmode,
|
|
XEXP (operands[0], 0),
|
|
GET_MODE_SIZE (<DREG:MODE>mode)))"
|
|
- "stp\\t%d1, %d3, %z0"
|
|
- [(set_attr "type" "neon_stp")]
|
|
+ "@
|
|
+ stp\t%d1, %d3, %z0
|
|
+ stp\t%x1, %x3, %z0"
|
|
+ [(set_attr "type" "neon_stp,store_16")]
|
|
)
|
|
|
|
(define_insn "load_pair<VQ:mode><VQ2:mode>"
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
|
new file mode 100644
|
|
index 000000000..fc4dcb1ad
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
|
@@ -0,0 +1,42 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2" } */
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
|
+
|
|
+typedef unsigned int v2si __attribute__((vector_size (8)));
|
|
+
|
|
+#define force_gp(V1) asm volatile ("" \
|
|
+ : "=r"(V1) \
|
|
+ : "r"(V1) \
|
|
+ : /* No clobbers */);
|
|
+
|
|
+/*
|
|
+** foo:
|
|
+** ldr (x[0-9]+), \[x1\]
|
|
+** str \1, \[x0\]
|
|
+** ret
|
|
+*/
|
|
+
|
|
+void
|
|
+foo (v2si *a, v2si *b)
|
|
+{
|
|
+ v2si tmp = *b;
|
|
+ force_gp (tmp);
|
|
+ *a = tmp;
|
|
+}
|
|
+
|
|
+/*
|
|
+** foo2:
|
|
+** ldp (x[0-9]+), (x[0-9]+), \[x0\]
|
|
+** stp \1, \2, \[x1\]
|
|
+** ret
|
|
+*/
|
|
+void
|
|
+foo2 (v2si *a, v2si *b)
|
|
+{
|
|
+ v2si t1 = *a;
|
|
+ v2si t2 = a[1];
|
|
+ force_gp (t1);
|
|
+ force_gp (t2);
|
|
+ *b = t1;
|
|
+ b[1] = t2;
|
|
+}
|
|
--
|
|
2.33.0
|
|
|