1553 lines
40 KiB
Diff
1553 lines
40 KiB
Diff
|
|
From 4a0e91dc27b30ae673ba132bf2be17a74bc89f31 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Richard Sandiford <richard.sandiford@arm.com>
|
||
|
|
Date: Tue, 5 Dec 2023 10:11:24 +0000
|
||
|
|
Subject: [PATCH 080/157] [Backport][SME] aarch64: Distinguish
|
||
|
|
streaming-compatible AdvSIMD insns
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c86ee4f683e05e5809597d96b5eeb261c9c92cac
|
||
|
|
|
||
|
|
The vast majority of Advanced SIMD instructions are not
|
||
|
|
available in streaming mode, but some of the load/store/move
|
||
|
|
instructions are. This patch adds a new target feature macro
|
||
|
|
called TARGET_BASE_SIMD for this streaming-compatible subset.
|
||
|
|
|
||
|
|
The vector-to-vector move instructions are not streaming-compatible,
|
||
|
|
so we need to use the SVE move instructions where enabled, or fall
|
||
|
|
back to the nofp16 handling otherwise.
|
||
|
|
|
||
|
|
I haven't found a good way of testing the SVE EXT alternative
|
||
|
|
in aarch64_simd_mov_from_<mode>high, but I'd rather provide it
|
||
|
|
than not.
|
||
|
|
|
||
|
|
gcc/
|
||
|
|
* config/aarch64/aarch64.h (TARGET_BASE_SIMD): New macro.
|
||
|
|
(TARGET_SIMD): Require PSTATE.SM to be 0.
|
||
|
|
(AARCH64_ISA_SM_OFF): New macro.
|
||
|
|
* config/aarch64/aarch64.cc (aarch64_array_mode_supported_p):
|
||
|
|
Allow Advanced SIMD structure modes for TARGET_BASE_SIMD.
|
||
|
|
(aarch64_print_operand): Support '%Z'.
|
||
|
|
(aarch64_secondary_reload): Expect SVE moves to be used for
|
||
|
|
Advanced SIMD modes if SVE is enabled and non-streaming
|
||
|
|
Advanced SIMD isn't.
|
||
|
|
(aarch64_register_move_cost): Likewise.
|
||
|
|
(aarch64_simd_container_mode): Extend Advanced SIMD mode
|
||
|
|
handling to TARGET_BASE_SIMD.
|
||
|
|
(aarch64_expand_cpymem): Expand commentary.
|
||
|
|
* config/aarch64/aarch64.md (arches): Add base_simd and nobase_simd.
|
||
|
|
(arch_enabled): Handle it.
|
||
|
|
(*mov<mode>_aarch64): Extend UMOV alternative to TARGET_BASE_SIMD.
|
||
|
|
(*movti_aarch64): Use an SVE move instruction if non-streaming
|
||
|
|
SIMD isn't available.
|
||
|
|
(*mov<TFD:mode>_aarch64): Likewise.
|
||
|
|
(load_pair_dw_tftf): Extend to TARGET_BASE_SIMD.
|
||
|
|
(store_pair_dw_tftf): Likewise.
|
||
|
|
(loadwb_pair<TX:mode>_<P:mode>): Likewise.
|
||
|
|
(storewb_pair<TX:mode>_<P:mode>): Likewise.
|
||
|
|
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
|
||
|
|
Allow UMOV in streaming mode.
|
||
|
|
(*aarch64_simd_mov<VQMOV:mode>): Use an SVE move instruction
|
||
|
|
if non-streaming SIMD isn't available.
|
||
|
|
(aarch64_store_lane0<mode>): Depend on TARGET_FLOAT rather than
|
||
|
|
TARGET_SIMD.
|
||
|
|
(aarch64_simd_mov_from_<mode>low): Likewise. Use fmov if
|
||
|
|
Advanced SIMD is completely disabled.
|
||
|
|
(aarch64_simd_mov_from_<mode>high): Use SVE EXT instructions if
|
||
|
|
non-streaming SIMD isn't available.
|
||
|
|
|
||
|
|
gcc/testsuite/
|
||
|
|
* gcc.target/aarch64/movdf_2.c: New test.
|
||
|
|
* gcc.target/aarch64/movdi_3.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movhf_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movhi_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movqi_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movsf_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movsi_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movtf_3.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movtf_4.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movti_3.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movti_4.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movv16qi_4.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movv16qi_5.c: Likewise.
|
||
|
|
* gcc.target/aarch64/movv8qi_4.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sme/arm_neon_1.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sme/arm_neon_2.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sme/arm_neon_3.c: Likewise.
|
||
|
|
---
|
||
|
|
gcc/config/aarch64/aarch64-simd.md | 50 ++++++-----
|
||
|
|
gcc/config/aarch64/aarch64.cc | 16 ++--
|
||
|
|
gcc/config/aarch64/aarch64.h | 12 ++-
|
||
|
|
gcc/config/aarch64/aarch64.md | 77 +++++++++--------
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movdf_2.c | 51 +++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movdi_3.c | 59 +++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movhf_2.c | 53 ++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movhi_2.c | 61 +++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movqi_2.c | 59 +++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movsf_2.c | 51 +++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movsi_2.c | 59 +++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movtf_3.c | 81 +++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movtf_4.c | 78 +++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movti_3.c | 86 +++++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movti_4.c | 83 ++++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movv16qi_4.c | 82 ++++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movv16qi_5.c | 79 +++++++++++++++++
|
||
|
|
gcc/testsuite/gcc.target/aarch64/movv8qi_4.c | 55 ++++++++++++
|
||
|
|
.../gcc.target/aarch64/sme/arm_neon_1.c | 13 +++
|
||
|
|
.../gcc.target/aarch64/sme/arm_neon_2.c | 11 +++
|
||
|
|
.../gcc.target/aarch64/sme/arm_neon_3.c | 11 +++
|
||
|
|
21 files changed, 1062 insertions(+), 65 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_3.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_3.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_4.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_3.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_4.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_4.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_5.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_4.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
index 1f4b30642..62493cdfa 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||
|
|
@@ -121,19 +121,19 @@
|
||
|
|
&& (register_operand (operands[0], <MODE>mode)
|
||
|
|
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||
|
|
{@ [cons: =0, 1; attrs: type, arch]
|
||
|
|
- [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
|
||
|
|
- [r , m ; load_8 , * ] ldr\t%x0, %1
|
||
|
|
- [m , Dz; store_8 , * ] str\txzr, %0
|
||
|
|
- [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
|
||
|
|
- [m , r ; store_8 , * ] str\t%x1, %0
|
||
|
|
- [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
|
||
|
|
- [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
|
||
|
|
- [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
|
||
|
|
- [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
|
||
|
|
- [?w, r ; f_mcr , * ] fmov\t%d0, %1
|
||
|
|
- [?r, r ; mov_reg , * ] mov\t%0, %1
|
||
|
|
- [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
|
||
|
|
- [w , Dz; f_mcr , * ] fmov\t%d0, xzr
|
||
|
|
+ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
|
||
|
|
+ [r , m ; load_8 , * ] ldr\t%x0, %1
|
||
|
|
+ [m , Dz; store_8 , * ] str\txzr, %0
|
||
|
|
+ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
|
||
|
|
+ [m , r ; store_8 , * ] str\t%x1, %0
|
||
|
|
+ [w , w ; neon_logic<q> , simd ] mov\t%0.<Vbtype>, %1.<Vbtype>
|
||
|
|
+ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
|
||
|
|
+ [?r, w ; neon_to_gp<q> , base_simd] umov\t%0, %1.d[0]
|
||
|
|
+ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
|
||
|
|
+ [?w, r ; f_mcr , * ] fmov\t%d0, %1
|
||
|
|
+ [?r, r ; mov_reg , * ] mov\t%0, %1
|
||
|
|
+ [w , Dn; neon_move<q> , simd ] << aarch64_output_simd_mov_immediate (operands[1], 64);
|
||
|
|
+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
@@ -148,6 +148,7 @@
|
||
|
|
[Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
|
||
|
|
[m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
|
||
|
|
[w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
|
||
|
|
+ [w , w ; * , sve , 4] mov\t%Z0.d, %Z1.d
|
||
|
|
[?r , w ; multiple , * , 8] #
|
||
|
|
[?w , r ; multiple , * , 8] #
|
||
|
|
[?r , r ; multiple , * , 8] #
|
||
|
|
@@ -177,7 +178,7 @@
|
||
|
|
[(set (match_operand:<VEL> 0 "memory_operand" "=m")
|
||
|
|
(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
|
||
|
|
(parallel [(match_operand 2 "const_int_operand" "n")])))]
|
||
|
|
- "TARGET_SIMD
|
||
|
|
+ "TARGET_FLOAT
|
||
|
|
&& ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
|
||
|
|
"str\\t%<Vetype>1, %0"
|
||
|
|
[(set_attr "type" "neon_store1_1reg<q>")]
|
||
|
|
@@ -312,35 +313,38 @@
|
||
|
|
)
|
||
|
|
|
||
|
|
(define_insn_and_split "aarch64_simd_mov_from_<mode>low"
|
||
|
|
- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
|
||
|
|
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
|
||
|
|
(vec_select:<VHALF>
|
||
|
|
- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
|
||
|
|
+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
|
||
|
|
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_lo_half" "")))]
|
||
|
|
- "TARGET_SIMD"
|
||
|
|
+ "TARGET_FLOAT"
|
||
|
|
"@
|
||
|
|
#
|
||
|
|
- umov\t%0, %1.d[0]"
|
||
|
|
+ umov\t%0, %1.d[0]
|
||
|
|
+ fmov\t%0, %d1"
|
||
|
|
"&& reload_completed && aarch64_simd_register (operands[0], <VHALF>mode)"
|
||
|
|
[(set (match_dup 0) (match_dup 1))]
|
||
|
|
{
|
||
|
|
operands[1] = aarch64_replace_reg_mode (operands[1], <VHALF>mode);
|
||
|
|
}
|
||
|
|
- [(set_attr "type" "mov_reg,neon_to_gp<q>")
|
||
|
|
+ [(set_attr "type" "mov_reg,neon_to_gp<q>,f_mrc")
|
||
|
|
+ (set_attr "arch" "simd,base_simd,*")
|
||
|
|
(set_attr "length" "4")]
|
||
|
|
)
|
||
|
|
|
||
|
|
(define_insn "aarch64_simd_mov_from_<mode>high"
|
||
|
|
- [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
|
||
|
|
+ [(set (match_operand:<VHALF> 0 "register_operand" "=w,w,?r,?r")
|
||
|
|
(vec_select:<VHALF>
|
||
|
|
- (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
|
||
|
|
+ (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w,w")
|
||
|
|
(match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
|
||
|
|
"TARGET_FLOAT"
|
||
|
|
"@
|
||
|
|
dup\t%d0, %1.d[1]
|
||
|
|
+ ext\t%Z0.b, %Z0.b, %Z0.b, #8
|
||
|
|
umov\t%0, %1.d[1]
|
||
|
|
fmov\t%0, %1.d[1]"
|
||
|
|
- [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
|
||
|
|
- (set_attr "arch" "simd,simd,*")
|
||
|
|
+ [(set_attr "type" "neon_dup<q>,*,neon_to_gp<q>,f_mrc")
|
||
|
|
+ (set_attr "arch" "simd,sve,simd,*")
|
||
|
|
(set_attr "length" "4")]
|
||
|
|
)
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||
|
|
index 8f8395201..08a98f8ba 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.cc
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.cc
|
||
|
|
@@ -3999,7 +3999,7 @@ static bool
|
||
|
|
aarch64_array_mode_supported_p (machine_mode mode,
|
||
|
|
unsigned HOST_WIDE_INT nelems)
|
||
|
|
{
|
||
|
|
- if (TARGET_SIMD
|
||
|
|
+ if (TARGET_BASE_SIMD
|
||
|
|
&& (AARCH64_VALID_SIMD_QREG_MODE (mode)
|
||
|
|
|| AARCH64_VALID_SIMD_DREG_MODE (mode))
|
||
|
|
&& (nelems >= 2 && nelems <= 4))
|
||
|
|
@@ -12955,8 +12955,8 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||
|
|
return NO_REGS;
|
||
|
|
}
|
||
|
|
|
||
|
|
- /* Without the TARGET_SIMD instructions we cannot move a Q register
|
||
|
|
- to a Q register directly. We need a scratch. */
|
||
|
|
+ /* Without the TARGET_SIMD or TARGET_SVE instructions we cannot move a
|
||
|
|
+ Q register to a Q register directly. We need a scratch. */
|
||
|
|
if (REG_P (x)
|
||
|
|
&& (mode == TFmode
|
||
|
|
|| mode == TImode
|
||
|
|
@@ -15540,7 +15540,7 @@ aarch64_register_move_cost (machine_mode mode,
|
||
|
|
secondary reload. A general register is used as a scratch to move
|
||
|
|
the upper DI value and the lower DI value is moved directly,
|
||
|
|
hence the cost is the sum of three moves. */
|
||
|
|
- if (! TARGET_SIMD)
|
||
|
|
+ if (!TARGET_SIMD && !TARGET_SVE)
|
||
|
|
return regmove_cost->GP2FP + regmove_cost->FP2GP + regmove_cost->FP2FP;
|
||
|
|
|
||
|
|
return regmove_cost->FP2FP;
|
||
|
|
@@ -21107,7 +21107,7 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
|
||
|
|
return aarch64_full_sve_mode (mode).else_mode (word_mode);
|
||
|
|
|
||
|
|
gcc_assert (known_eq (width, 64) || known_eq (width, 128));
|
||
|
|
- if (TARGET_SIMD)
|
||
|
|
+ if (TARGET_BASE_SIMD)
|
||
|
|
{
|
||
|
|
if (known_eq (width, 128))
|
||
|
|
return aarch64_vq_mode (mode).else_mode (word_mode);
|
||
|
|
@@ -25221,7 +25221,11 @@ aarch64_expand_cpymem (rtx *operands)
|
||
|
|
int copy_bits = 256;
|
||
|
|
|
||
|
|
/* Default to 256-bit LDP/STP on large copies, however small copies, no SIMD
|
||
|
|
- support or slow 256-bit LDP/STP fall back to 128-bit chunks. */
|
||
|
|
+ support or slow 256-bit LDP/STP fall back to 128-bit chunks.
|
||
|
|
+
|
||
|
|
+ ??? Although it would be possible to use LDP/STP Qn in streaming mode
|
||
|
|
+ (so using TARGET_BASE_SIMD instead of TARGET_SIMD), it isn't clear
|
||
|
|
+ whether that would improve performance. */
|
||
|
|
if (size <= 24
|
||
|
|
|| !TARGET_SIMD
|
||
|
|
|| (aarch64_tune_params.extra_tuning_flags
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||
|
|
index dd2de4e88..a3c83a3b1 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.h
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.h
|
||
|
|
@@ -61,8 +61,15 @@
|
||
|
|
#define WORDS_BIG_ENDIAN (BYTES_BIG_ENDIAN)
|
||
|
|
|
||
|
|
/* AdvSIMD is supported in the default configuration, unless disabled by
|
||
|
|
- -mgeneral-regs-only or by the +nosimd extension. */
|
||
|
|
-#define TARGET_SIMD (AARCH64_ISA_SIMD)
|
||
|
|
+ -mgeneral-regs-only or by the +nosimd extension. The set of available
|
||
|
|
+ instructions is then subdivided into:
|
||
|
|
+
|
||
|
|
+ - the "base" set, available both in SME streaming mode and in
|
||
|
|
+ non-streaming mode
|
||
|
|
+
|
||
|
|
+ - the full set, available only in non-streaming mode. */
|
||
|
|
+#define TARGET_BASE_SIMD (AARCH64_ISA_SIMD)
|
||
|
|
+#define TARGET_SIMD (AARCH64_ISA_SIMD && AARCH64_ISA_SM_OFF)
|
||
|
|
#define TARGET_FLOAT (AARCH64_ISA_FP)
|
||
|
|
|
||
|
|
#define UNITS_PER_WORD 8
|
||
|
|
@@ -199,6 +206,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
||
|
|
|
||
|
|
/* Macros to test ISA flags. */
|
||
|
|
|
||
|
|
+#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF)
|
||
|
|
#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES)
|
||
|
|
#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC)
|
||
|
|
#define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO)
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||
|
|
index 1ec23fae8..079c8a3f9 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.md
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.md
|
||
|
|
@@ -378,7 +378,8 @@
|
||
|
|
;; As a convenience, "fp_q" means "fp" + the ability to move between
|
||
|
|
;; Q registers and is equivalent to "simd".
|
||
|
|
|
||
|
|
-(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16])
|
||
|
|
+(define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd
|
||
|
|
+ simd nosimd sve fp16])
|
||
|
|
|
||
|
|
(define_enum_attr "arch" "arches" (const_string "any"))
|
||
|
|
|
||
|
|
@@ -406,6 +407,12 @@
|
||
|
|
(and (eq_attr "arch" "fp")
|
||
|
|
(match_test "TARGET_FLOAT"))
|
||
|
|
|
||
|
|
+ (and (eq_attr "arch" "base_simd")
|
||
|
|
+ (match_test "TARGET_BASE_SIMD"))
|
||
|
|
+
|
||
|
|
+ (and (eq_attr "arch" "nobase_simd")
|
||
|
|
+ (match_test "!TARGET_BASE_SIMD"))
|
||
|
|
+
|
||
|
|
(and (eq_attr "arch" "fp_q, simd")
|
||
|
|
(match_test "TARGET_SIMD"))
|
||
|
|
|
||
|
|
@@ -1202,22 +1209,22 @@
|
||
|
|
"(register_operand (operands[0], <MODE>mode)
|
||
|
|
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
|
||
|
|
{@ [cons: =0, 1; attrs: type, arch]
|
||
|
|
- [r, r ; mov_reg , * ] mov\t%w0, %w1
|
||
|
|
- [r, M ; mov_imm , * ] mov\t%w0, %1
|
||
|
|
- [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||
|
|
+ [r, r ; mov_reg , * ] mov\t%w0, %w1
|
||
|
|
+ [r, M ; mov_imm , * ] mov\t%w0, %1
|
||
|
|
+ [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
||
|
|
- [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||
|
|
- [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]);
|
||
|
|
- [r, m ; load_4 , * ] ldr<size>\t%w0, %1
|
||
|
|
- [w, m ; load_4 , * ] ldr\t%<size>0, %1
|
||
|
|
- [m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||
|
|
- [m, w ; store_4 , * ] str\t%<size>1, %0
|
||
|
|
- [r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0]
|
||
|
|
- [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1
|
||
|
|
- [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
|
||
|
|
- [w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
|
||
|
|
- [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
|
||
|
|
- [w, w ; neon_dup , nosimd] fmov\t%s0, %s1
|
||
|
|
+ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||
|
|
+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]);
|
||
|
|
+ [r, m ; load_4 , * ] ldr<size>\t%w0, %1
|
||
|
|
+ [w, m ; load_4 , * ] ldr\t%<size>0, %1
|
||
|
|
+ [m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||
|
|
+ [m, w ; store_4 , * ] str\t%<size>1, %0
|
||
|
|
+ [r, w ; neon_to_gp<q> , base_simd ] umov\t%w0, %1.<v>[0]
|
||
|
|
+ [r, w ; neon_to_gp<q> , nobase_simd] fmov\t%w0, %s1
|
||
|
|
+ [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
|
||
|
|
+ [w, r Z ; neon_from_gp<q>, nosimd ] fmov\t%s0, %w1
|
||
|
|
+ [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
|
||
|
|
+ [w, w ; neon_dup , nosimd ] fmov\t%s0, %s1
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
@@ -1372,9 +1379,9 @@
|
||
|
|
|
||
|
|
(define_insn "*movti_aarch64"
|
||
|
|
[(set (match_operand:TI 0
|
||
|
|
- "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m")
|
||
|
|
+ "nonimmediate_operand" "= r,w,w,w, r,w,w,r,m,m,w,m")
|
||
|
|
(match_operand:TI 1
|
||
|
|
- "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))]
|
||
|
|
+ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,w,m,r,Z,m,w"))]
|
||
|
|
"(register_operand (operands[0], TImode)
|
||
|
|
|| aarch64_reg_or_zero (operands[1], TImode))"
|
||
|
|
"@
|
||
|
|
@@ -1384,16 +1391,17 @@
|
||
|
|
#
|
||
|
|
#
|
||
|
|
mov\\t%0.16b, %1.16b
|
||
|
|
+ mov\\t%Z0.d, %Z1.d
|
||
|
|
ldp\\t%0, %H0, %1
|
||
|
|
stp\\t%1, %H1, %0
|
||
|
|
stp\\txzr, xzr, %0
|
||
|
|
ldr\\t%q0, %1
|
||
|
|
str\\t%q1, %0"
|
||
|
|
- [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \
|
||
|
|
+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q,*,\
|
||
|
|
load_16,store_16,store_16,\
|
||
|
|
load_16,store_16")
|
||
|
|
- (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4")
|
||
|
|
- (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")]
|
||
|
|
+ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4,4")
|
||
|
|
+ (set_attr "arch" "*,simd,*,*,*,simd,sve,*,*,*,fp,fp")]
|
||
|
|
)
|
||
|
|
|
||
|
|
;; Split a TImode register-register or register-immediate move into
|
||
|
|
@@ -1529,13 +1537,14 @@
|
||
|
|
|
||
|
|
(define_insn "*mov<mode>_aarch64"
|
||
|
|
[(set (match_operand:TFD 0
|
||
|
|
- "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
|
||
|
|
+ "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
|
||
|
|
(match_operand:TFD 1
|
||
|
|
- "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
|
||
|
|
+ "general_operand" " w,w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
|
||
|
|
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||
|
|
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||
|
|
"@
|
||
|
|
mov\\t%0.16b, %1.16b
|
||
|
|
+ mov\\t%Z0.d, %Z1.d
|
||
|
|
#
|
||
|
|
#
|
||
|
|
#
|
||
|
|
@@ -1546,10 +1555,10 @@
|
||
|
|
ldp\\t%0, %H0, %1
|
||
|
|
stp\\t%1, %H1, %0
|
||
|
|
stp\\txzr, xzr, %0"
|
||
|
|
- [(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
|
||
|
|
+ [(set_attr "type" "logic_reg,*,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
|
||
|
|
f_loadd,f_stored,load_16,store_16,store_16")
|
||
|
|
- (set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
|
||
|
|
- (set_attr "arch" "simd,*,*,*,simd,*,*,*,*,*,*")]
|
||
|
|
+ (set_attr "length" "4,4,8,8,8,4,4,4,4,4,4,4")
|
||
|
|
+ (set_attr "arch" "simd,sve,*,*,*,simd,*,*,*,*,*,*")]
|
||
|
|
)
|
||
|
|
|
||
|
|
(define_split
|
||
|
|
@@ -1738,7 +1747,7 @@
|
||
|
|
(match_operand:TF 1 "aarch64_mem_pair_operand" "Ump"))
|
||
|
|
(set (match_operand:TF 2 "register_operand" "=w")
|
||
|
|
(match_operand:TF 3 "memory_operand" "m"))]
|
||
|
|
- "TARGET_SIMD
|
||
|
|
+ "TARGET_BASE_SIMD
|
||
|
|
&& rtx_equal_p (XEXP (operands[3], 0),
|
||
|
|
plus_constant (Pmode,
|
||
|
|
XEXP (operands[1], 0),
|
||
|
|
@@ -1788,11 +1797,11 @@
|
||
|
|
(match_operand:TF 1 "register_operand" "w"))
|
||
|
|
(set (match_operand:TF 2 "memory_operand" "=m")
|
||
|
|
(match_operand:TF 3 "register_operand" "w"))]
|
||
|
|
- "TARGET_SIMD &&
|
||
|
|
- rtx_equal_p (XEXP (operands[2], 0),
|
||
|
|
- plus_constant (Pmode,
|
||
|
|
- XEXP (operands[0], 0),
|
||
|
|
- GET_MODE_SIZE (TFmode)))"
|
||
|
|
+ "TARGET_BASE_SIMD
|
||
|
|
+ && rtx_equal_p (XEXP (operands[2], 0),
|
||
|
|
+ plus_constant (Pmode,
|
||
|
|
+ XEXP (operands[0], 0),
|
||
|
|
+ GET_MODE_SIZE (TFmode)))"
|
||
|
|
"stp\\t%q1, %q3, %z0"
|
||
|
|
[(set_attr "type" "neon_stp_q")
|
||
|
|
(set_attr "fp" "yes")]
|
||
|
|
@@ -1840,7 +1849,7 @@
|
||
|
|
(set (match_operand:TX 3 "register_operand" "=w")
|
||
|
|
(mem:TX (plus:P (match_dup 1)
|
||
|
|
(match_operand:P 5 "const_int_operand" "n"))))])]
|
||
|
|
- "TARGET_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
|
||
|
|
+ "TARGET_BASE_SIMD && INTVAL (operands[5]) == GET_MODE_SIZE (<TX:MODE>mode)"
|
||
|
|
"ldp\\t%q2, %q3, [%1], %4"
|
||
|
|
[(set_attr "type" "neon_ldp_q")]
|
||
|
|
)
|
||
|
|
@@ -1890,7 +1899,7 @@
|
||
|
|
(set (mem:TX (plus:P (match_dup 0)
|
||
|
|
(match_operand:P 5 "const_int_operand" "n")))
|
||
|
|
(match_operand:TX 3 "register_operand" "w"))])]
|
||
|
|
- "TARGET_SIMD
|
||
|
|
+ "TARGET_BASE_SIMD
|
||
|
|
&& INTVAL (operands[5])
|
||
|
|
== INTVAL (operands[4]) + GET_MODE_SIZE (<TX:MODE>mode)"
|
||
|
|
"stp\\t%q2, %q3, [%0, %4]!"
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_2.c b/gcc/testsuite/gcc.target/aarch64/movdf_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..0d459d317
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movdf_2.c
|
||
|
|
@@ -0,0 +1,51 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov d0, d1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+double
|
||
|
|
+fpr_to_fpr (double q0, double q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+double
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register double x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+double
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (double q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register double x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_3.c b/gcc/testsuite/gcc.target/aarch64/movdi_3.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..31b2cbbae
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movdi_3.c
|
||
|
|
@@ -0,0 +1,59 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#include <stdint.h>
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov d0, d1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint64_t q0 asm ("q0");
|
||
|
|
+ register uint64_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (uint64_t x0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint64_t q0 asm ("q0");
|
||
|
|
+ q0 = x0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint64_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+uint64_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint64_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_2.c b/gcc/testsuite/gcc.target/aarch64/movhf_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..3292b0de8
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movhf_2.c
|
||
|
|
@@ -0,0 +1,53 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nothing+simd"
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov s0, s1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+_Float16
|
||
|
|
+fpr_to_fpr (_Float16 q0, _Float16 q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov s0, w0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+_Float16
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register _Float16 w0 asm ("w0");
|
||
|
|
+ asm volatile ("" : "=r" (w0));
|
||
|
|
+ return w0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+_Float16
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** fmov w0, s0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (_Float16 q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register _Float16 w0 asm ("w0");
|
||
|
|
+ w0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (w0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_2.c b/gcc/testsuite/gcc.target/aarch64/movhi_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..dbbf3486f
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movhi_2.c
|
||
|
|
@@ -0,0 +1,61 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nothing+simd"
|
||
|
|
+
|
||
|
|
+#include <stdint.h>
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov s0, s1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint16_t q0 asm ("q0");
|
||
|
|
+ register uint16_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov s0, w0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (uint16_t w0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint16_t q0 asm ("q0");
|
||
|
|
+ q0 = w0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint16_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** umov w0, v0.h\[0\]
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+uint16_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint16_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_2.c b/gcc/testsuite/gcc.target/aarch64/movqi_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..aec087e4e
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movqi_2.c
|
||
|
|
@@ -0,0 +1,59 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#include <stdint.h>
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov s0, s1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint8_t q0 asm ("q0");
|
||
|
|
+ register uint8_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov s0, w0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (uint8_t w0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint8_t q0 asm ("q0");
|
||
|
|
+ q0 = w0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint8_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** umov w0, v0.b\[0\]
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+uint8_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint8_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_2.c b/gcc/testsuite/gcc.target/aarch64/movsf_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..7fed4b22f
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movsf_2.c
|
||
|
|
@@ -0,0 +1,51 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov s0, s1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+float
|
||
|
|
+fpr_to_fpr (float q0, float q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov s0, w0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+float
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register float w0 asm ("w0");
|
||
|
|
+ asm volatile ("" : "=r" (w0));
|
||
|
|
+ return w0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+float
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** fmov w0, s0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (float q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register float w0 asm ("w0");
|
||
|
|
+ w0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (w0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_2.c b/gcc/testsuite/gcc.target/aarch64/movsi_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..c14d2468a
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movsi_2.c
|
||
|
|
@@ -0,0 +1,59 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#include <stdint.h>
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov s0, s1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint32_t q0 asm ("q0");
|
||
|
|
+ register uint32_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov s0, w0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (uint32_t w0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint32_t q0 asm ("q0");
|
||
|
|
+ q0 = w0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint32_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** fmov w0, s0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+uint32_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register uint32_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_3.c b/gcc/testsuite/gcc.target/aarch64/movtf_3.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..dd164a418
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_3.c
|
||
|
|
@@ -0,0 +1,81 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-require-effective-target large_long_double } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nosve"
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** sub sp, sp, #16
|
||
|
|
+** str q1, \[sp\]
|
||
|
|
+** ldr q0, \[sp\]
|
||
|
|
+** add sp, sp, #?16
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register long double x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (long double q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register long double x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_4.c b/gcc/testsuite/gcc.target/aarch64/movtf_4.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..faf9703e2
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_4.c
|
||
|
|
@@ -0,0 +1,78 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-require-effective-target large_long_double } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+sve"
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** mov z0.d, z1.d
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+fpr_to_fpr (long double q0, long double q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register long double x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov s0, wzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+long double
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return 0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (long double q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register long double x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_3.c b/gcc/testsuite/gcc.target/aarch64/movti_3.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..243109181
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movti_3.c
|
||
|
|
@@ -0,0 +1,86 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nosve"
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** sub sp, sp, #16
|
||
|
|
+** str q1, \[sp\]
|
||
|
|
+** ldr q0, \[sp\]
|
||
|
|
+** add sp, sp, #?16
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ register __int128_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ q0 = x0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+__int128_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_4.c b/gcc/testsuite/gcc.target/aarch64/movti_4.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..a70feccb0
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movti_4.c
|
||
|
|
@@ -0,0 +1,83 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+sve"
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** mov z0\.d, z1\.d
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_fpr (void) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ register __int128_t q1 asm ("q1");
|
||
|
|
+ asm volatile ("" : "=w" (q1));
|
||
|
|
+ q0 = q1;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+gpr_to_fpr (__int128_t x0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ q0 = x0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ q0 = 0;
|
||
|
|
+ asm volatile ("" :: "w" (q0));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** fmov x0, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** fmov x1, d0
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+__int128_t
|
||
|
|
+fpr_to_gpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register __int128_t q0 asm ("q0");
|
||
|
|
+ asm volatile ("" : "=w" (q0));
|
||
|
|
+ return q0;
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..7bec888b7
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_4.c
|
||
|
|
@@ -0,0 +1,82 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nosve"
|
||
|
|
+
|
||
|
|
+typedef unsigned char v16qi __attribute__((vector_size(16)));
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** sub sp, sp, #16
|
||
|
|
+** str q1, \[sp\]
|
||
|
|
+** ldr q0, \[sp\]
|
||
|
|
+** add sp, sp, #?16
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v16qi x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return (v16qi) {};
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** umov x0, v0.d\[0\]
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** umov x0, v0.d\[0\]
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** umov x1, v0.d\[0\]
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** umov x1, v0.d\[0\]
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v16qi x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..2d36342b3
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_5.c
|
||
|
|
@@ -0,0 +1,79 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+sve"
|
||
|
|
+
|
||
|
|
+typedef unsigned char v16qi __attribute__((vector_size(16)));
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** mov z0.d, z1.d
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+fpr_to_fpr (v16qi q0, v16qi q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_little_endian }
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** fmov v0.d\[1\], x1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr: { target aarch64_big_endian }
|
||
|
|
+** fmov d0, x1
|
||
|
|
+** fmov v0.d\[1\], x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v16qi x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v16qi
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return (v16qi) {};
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_little_endian }
|
||
|
|
+** (
|
||
|
|
+** umov x0, v0.d\[0\]
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x1, v0.d\[1\]
|
||
|
|
+** umov x0, v0.d\[0\]
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr: { target aarch64_big_endian }
|
||
|
|
+** (
|
||
|
|
+** umov x1, v0.d\[0\]
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** |
|
||
|
|
+** fmov x0, v0.d\[1\]
|
||
|
|
+** umov x1, v0.d\[0\]
|
||
|
|
+** )
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (v16qi q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v16qi x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..12ae25a3a
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_4.c
|
||
|
|
@@ -0,0 +1,55 @@
|
||
|
|
+/* { dg-do assemble } */
|
||
|
|
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nosve"
|
||
|
|
+
|
||
|
|
+typedef unsigned char v8qi __attribute__((vector_size(8)));
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_fpr:
|
||
|
|
+** fmov d0, d1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v8qi
|
||
|
|
+fpr_to_fpr (v8qi q0, v8qi q1) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return q1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** gpr_to_fpr:
|
||
|
|
+** fmov d0, x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v8qi
|
||
|
|
+gpr_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v8qi x0 asm ("x0");
|
||
|
|
+ asm volatile ("" : "=r" (x0));
|
||
|
|
+ return x0;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** zero_to_fpr:
|
||
|
|
+** fmov d0, xzr
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+v8qi
|
||
|
|
+zero_to_fpr () [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return (v8qi) {};
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** fpr_to_gpr:
|
||
|
|
+** umov x0, v0\.d\[0\]
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+void
|
||
|
|
+fpr_to_gpr (v8qi q0) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ register v8qi x0 asm ("x0");
|
||
|
|
+ x0 = q0;
|
||
|
|
+ asm volatile ("" :: "r" (x0));
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..5b5346cf4
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_1.c
|
||
|
|
@@ -0,0 +1,13 @@
|
||
|
|
+// { dg-options "" }
|
||
|
|
+
|
||
|
|
+#include <arm_neon.h>
|
||
|
|
+
|
||
|
|
+#pragma GCC target "+nosme"
|
||
|
|
+
|
||
|
|
+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 }
|
||
|
|
+
|
||
|
|
+int32x4_t
|
||
|
|
+foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return vhaddq_s32 (x, y);
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..2092c4471
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_2.c
|
||
|
|
@@ -0,0 +1,11 @@
|
||
|
|
+// { dg-options "" }
|
||
|
|
+
|
||
|
|
+#include <arm_neon.h>
|
||
|
|
+
|
||
|
|
+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 }
|
||
|
|
+
|
||
|
|
+int32x4_t
|
||
|
|
+foo (int32x4_t x, int32x4_t y) [[arm::streaming_compatible]]
|
||
|
|
+{
|
||
|
|
+ return vhaddq_s32 (x, y);
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..36794e5b0
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/arm_neon_3.c
|
||
|
|
@@ -0,0 +1,11 @@
|
||
|
|
+// { dg-options "" }
|
||
|
|
+
|
||
|
|
+#include <arm_neon.h>
|
||
|
|
+
|
||
|
|
+// { dg-error {inlining failed.*'vhaddq_s32'} "" { target *-*-* } 0 }
|
||
|
|
+
|
||
|
|
+int32x4_t
|
||
|
|
+foo (int32x4_t x, int32x4_t y) [[arm::streaming]]
|
||
|
|
+{
|
||
|
|
+ return vhaddq_s32 (x, y);
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|