793 lines
26 KiB
Diff
793 lines
26 KiB
Diff
|
|
From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Richard Sandiford <richard.sandiford@arm.com>
|
||
|
|
Date: Tue, 5 Dec 2023 10:11:18 +0000
|
||
|
|
Subject: [PATCH 063/157] [Backport][SME] aarch64: Use SVE's RDVL instruction
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0
|
||
|
|
|
||
|
|
We didn't previously use SVE's RDVL instruction, since the CNT*
|
||
|
|
forms are preferred and provide most of the range. However,
|
||
|
|
there are some cases that RDVL can handle and CNT* can't,
|
||
|
|
and using RDVL-like instructions becomes important for SME.
|
||
|
|
|
||
|
|
gcc/
|
||
|
|
* config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p)
|
||
|
|
(aarch64_output_sve_rdvl): Declare.
|
||
|
|
* config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New
|
||
|
|
function, split out from...
|
||
|
|
(aarch64_sve_cnt_immediate_p): ...here.
|
||
|
|
(aarch64_sve_rdvl_factor_p): New function.
|
||
|
|
(aarch64_sve_rdvl_immediate_p): Likewise.
|
||
|
|
(aarch64_output_sve_rdvl): Likewise.
|
||
|
|
(aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL
|
||
|
|
for some cases.
|
||
|
|
(aarch64_expand_mov_immediate): Handle RDVL immediates.
|
||
|
|
(aarch64_mov_operand_p): Likewise.
|
||
|
|
* config/aarch64/constraints.md (Usr): New constraint.
|
||
|
|
* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL
|
||
|
|
alternative.
|
||
|
|
(*movsi_aarch64, *movdi_aarch64): Likewise.
|
||
|
|
|
||
|
|
gcc/testsuite/
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise.
|
||
|
|
* gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used
|
||
|
|
to calculate the -17 and 17 factors.
|
||
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor.
|
||
|
|
---
|
||
|
|
gcc/config/aarch64/aarch64-protos.h | 2 +
|
||
|
|
gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------
|
||
|
|
gcc/config/aarch64/aarch64.md | 3 +
|
||
|
|
gcc/config/aarch64/constraints.md | 6 +
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++--
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +-
|
||
|
|
.../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +-
|
||
|
|
.../gcc.target/aarch64/sve/loop_add_4.c | 6 +-
|
||
|
|
.../aarch64/sve/pcs/stack_clash_1.c | 3 +-
|
||
|
|
14 files changed, 225 insertions(+), 123 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||
|
|
index 3ff1a0163..14a568140 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64-protos.h
|
||
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||
|
|
@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode);
|
||
|
|
HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
|
||
|
|
bool aarch64_sve_cnt_immediate_p (rtx);
|
||
|
|
bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
|
||
|
|
+bool aarch64_sve_rdvl_immediate_p (rtx);
|
||
|
|
bool aarch64_sve_addvl_addpl_immediate_p (rtx);
|
||
|
|
bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
|
||
|
|
int aarch64_add_offset_temporaries (rtx);
|
||
|
|
@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
|
||
|
|
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
|
||
|
|
char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
|
||
|
|
char *aarch64_output_sve_scalar_inc_dec (rtx);
|
||
|
|
+char *aarch64_output_sve_rdvl (rtx);
|
||
|
|
char *aarch64_output_sve_addvl_addpl (rtx);
|
||
|
|
char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
|
||
|
|
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||
|
|
index acb659f53..4194dfc70 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.cc
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.cc
|
||
|
|
@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
|
||
|
|
return -1;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Return true if a single CNT[BHWD] instruction can multiply FACTOR
|
||
|
|
+ by the number of 128-bit quadwords in an SVE vector. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor)
|
||
|
|
+{
|
||
|
|
+ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
|
||
|
|
+ return (IN_RANGE (factor, 2, 16 * 16)
|
||
|
|
+ && (factor & 1) == 0
|
||
|
|
+ && factor <= 16 * (factor & -factor));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Return true if we can move VALUE into a register using a single
|
||
|
|
CNT[BHWD] instruction. */
|
||
|
|
|
||
|
|
@@ -5527,11 +5539,7 @@ static bool
|
||
|
|
aarch64_sve_cnt_immediate_p (poly_int64 value)
|
||
|
|
{
|
||
|
|
HOST_WIDE_INT factor = value.coeffs[0];
|
||
|
|
- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
|
||
|
|
- return (value.coeffs[1] == factor
|
||
|
|
- && IN_RANGE (factor, 2, 16 * 16)
|
||
|
|
- && (factor & 1) == 0
|
||
|
|
- && factor <= 16 * (factor & -factor));
|
||
|
|
+ return value.coeffs[1] == factor && aarch64_sve_cnt_factor_p (factor);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* Likewise for rtx X. */
|
||
|
|
@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset)
|
||
|
|
-offset_value.coeffs[1], 0);
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Return true if a single RDVL instruction can multiply FACTOR by the
|
||
|
|
+ number of 128-bit quadwords in an SVE vector. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor)
|
||
|
|
+{
|
||
|
|
+ return (multiple_p (factor, 16)
|
||
|
|
+ && IN_RANGE (factor, -32 * 16, 31 * 16));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return true if we can move VALUE into a register using a single
|
||
|
|
+ RDVL instruction. */
|
||
|
|
+
|
||
|
|
+static bool
|
||
|
|
+aarch64_sve_rdvl_immediate_p (poly_int64 value)
|
||
|
|
+{
|
||
|
|
+ HOST_WIDE_INT factor = value.coeffs[0];
|
||
|
|
+ return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Likewise for rtx X. */
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+aarch64_sve_rdvl_immediate_p (rtx x)
|
||
|
|
+{
|
||
|
|
+ poly_int64 value;
|
||
|
|
+ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Return the asm string for moving RDVL immediate OFFSET into register
|
||
|
|
+ operand 0. */
|
||
|
|
+
|
||
|
|
+char *
|
||
|
|
+aarch64_output_sve_rdvl (rtx offset)
|
||
|
|
+{
|
||
|
|
+ static char buffer[sizeof ("rdvl\t%x0, #-") + 3 * sizeof (int)];
|
||
|
|
+ poly_int64 offset_value = rtx_to_poly_int64 (offset);
|
||
|
|
+ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value));
|
||
|
|
+
|
||
|
|
+ int factor = offset_value.coeffs[1];
|
||
|
|
+ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16);
|
||
|
|
+ return buffer;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Return true if we can add VALUE to a register using a single ADDVL
|
||
|
|
or ADDPL instruction. */
|
||
|
|
|
||
|
|
@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset)
|
||
|
|
count += 1;
|
||
|
|
else if (factor != 0)
|
||
|
|
{
|
||
|
|
- factor = abs (factor);
|
||
|
|
- if (factor > 16 * (factor & -factor))
|
||
|
|
- /* Need one register for the CNT result and one for the multiplication
|
||
|
|
- factor. If necessary, the second temporary can be reused for the
|
||
|
|
- constant part of the offset. */
|
||
|
|
+ factor /= (HOST_WIDE_INT) least_bit_hwi (factor);
|
||
|
|
+ if (!IN_RANGE (factor, -32, 31))
|
||
|
|
+ /* Need one register for the CNT or RDVL result and one for the
|
||
|
|
+ multiplication factor. If necessary, the second temporary
|
||
|
|
+ can be reused for the constant part of the offset. */
|
||
|
|
return 2;
|
||
|
|
- /* Need one register for the CNT result (which might then
|
||
|
|
+ /* Need one register for the CNT or RDVL result (which might then
|
||
|
|
be shifted). */
|
||
|
|
count += 1;
|
||
|
|
}
|
||
|
|
@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
|
||
|
|
/* Otherwise use a CNT-based sequence. */
|
||
|
|
else if (factor != 0)
|
||
|
|
{
|
||
|
|
- /* Use a subtraction if we have a negative factor. */
|
||
|
|
- rtx_code code = PLUS;
|
||
|
|
- if (factor < 0)
|
||
|
|
- {
|
||
|
|
- factor = -factor;
|
||
|
|
- code = MINUS;
|
||
|
|
- }
|
||
|
|
+ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT,
|
||
|
|
+ with negative shifts indicating a shift right. */
|
||
|
|
+ HOST_WIDE_INT low_bit = least_bit_hwi (factor);
|
||
|
|
+ HOST_WIDE_INT rel_factor = factor / low_bit;
|
||
|
|
+ int shift = exact_log2 (low_bit) - 4;
|
||
|
|
+ gcc_assert (shift >= -4 && (rel_factor & 1) != 0);
|
||
|
|
+
|
||
|
|
+ /* Set CODE, VAL and SHIFT so that [+-] VAL * 2**SHIFT is
|
||
|
|
+ equal to CNTB * FACTOR / 16, with CODE being the [+-].
|
||
|
|
|
||
|
|
- /* Calculate CNTD * FACTOR / 2. First try to fold the division
|
||
|
|
- into the multiplication. */
|
||
|
|
+ We can avoid a multiplication if REL_FACTOR is in the range
|
||
|
|
+ of RDVL, although there are then various optimizations that
|
||
|
|
+ we can try on top. */
|
||
|
|
+ rtx_code code = PLUS;
|
||
|
|
rtx val;
|
||
|
|
- int shift = 0;
|
||
|
|
- if (factor & 1)
|
||
|
|
- /* Use a right shift by 1. */
|
||
|
|
- shift = -1;
|
||
|
|
- else
|
||
|
|
- factor /= 2;
|
||
|
|
- HOST_WIDE_INT low_bit = factor & -factor;
|
||
|
|
- if (factor <= 16 * low_bit)
|
||
|
|
+ if (IN_RANGE (rel_factor, -32, 31))
|
||
|
|
{
|
||
|
|
- if (factor > 16 * 8)
|
||
|
|
+ /* Try to use an unshifted CNT[BHWD] or RDVL. */
|
||
|
|
+ if (aarch64_sve_cnt_factor_p (factor)
|
||
|
|
+ || aarch64_sve_rdvl_factor_p (factor))
|
||
|
|
+ {
|
||
|
|
+ val = gen_int_mode (poly_int64 (factor, factor), mode);
|
||
|
|
+ shift = 0;
|
||
|
|
+ }
|
||
|
|
+ /* Try to subtract an unshifted CNT[BHWD]. */
|
||
|
|
+ else if (aarch64_sve_cnt_factor_p (-factor))
|
||
|
|
{
|
||
|
|
- /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
|
||
|
|
- the value with the minimum multiplier and shift it into
|
||
|
|
- position. */
|
||
|
|
- int extra_shift = exact_log2 (low_bit);
|
||
|
|
- shift += extra_shift;
|
||
|
|
- factor >>= extra_shift;
|
||
|
|
+ code = MINUS;
|
||
|
|
+ val = gen_int_mode (poly_int64 (-factor, -factor), mode);
|
||
|
|
+ shift = 0;
|
||
|
|
}
|
||
|
|
- val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
|
||
|
|
+ /* If subtraction is free, prefer to load a positive constant.
|
||
|
|
+ In the best case this will fit a shifted CNTB. */
|
||
|
|
+ else if (src != const0_rtx && rel_factor < 0)
|
||
|
|
+ {
|
||
|
|
+ code = MINUS;
|
||
|
|
+ val = gen_int_mode (-rel_factor * BYTES_PER_SVE_VECTOR, mode);
|
||
|
|
+ }
|
||
|
|
+ /* Otherwise use a shifted RDVL or CNT[BHWD]. */
|
||
|
|
+ else
|
||
|
|
+ val = gen_int_mode (rel_factor * BYTES_PER_SVE_VECTOR, mode);
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
- /* Base the factor on LOW_BIT if we can calculate LOW_BIT
|
||
|
|
- directly, since that should increase the chances of being
|
||
|
|
- able to use a shift and add sequence. If LOW_BIT itself
|
||
|
|
- is out of range, just use CNTD. */
|
||
|
|
- if (low_bit <= 16 * 8)
|
||
|
|
- factor /= low_bit;
|
||
|
|
+ /* If we can calculate CNTB << SHIFT directly, prefer to do that,
|
||
|
|
+ since it should increase the chances of being able to use
|
||
|
|
+ a shift and add sequence for the multiplication.
|
||
|
|
+ If CNTB << SHIFT is out of range, stick with the current
|
||
|
|
+ shift factor. */
|
||
|
|
+ if (IN_RANGE (low_bit, 2, 16 * 16))
|
||
|
|
+ {
|
||
|
|
+ val = gen_int_mode (poly_int64 (low_bit, low_bit), mode);
|
||
|
|
+ shift = 0;
|
||
|
|
+ }
|
||
|
|
else
|
||
|
|
- low_bit = 1;
|
||
|
|
+ val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode);
|
||
|
|
|
||
|
|
- val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
|
||
|
|
val = aarch64_force_temporary (mode, temp1, val);
|
||
|
|
|
||
|
|
+ /* Prefer to multiply by a positive factor and subtract rather
|
||
|
|
+ than multiply by a negative factor and add, since positive
|
||
|
|
+ values are usually easier to move. */
|
||
|
|
+ if (rel_factor < 0 && src != const0_rtx)
|
||
|
|
+ {
|
||
|
|
+ rel_factor = -rel_factor;
|
||
|
|
+ code = MINUS;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
if (can_create_pseudo_p ())
|
||
|
|
{
|
||
|
|
- rtx coeff1 = gen_int_mode (factor, mode);
|
||
|
|
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
|
||
|
|
val = expand_mult (mode, val, coeff1, NULL_RTX, true, true);
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
- /* Go back to using a negative multiplication factor if we have
|
||
|
|
- no register from which to subtract. */
|
||
|
|
- if (code == MINUS && src == const0_rtx)
|
||
|
|
- {
|
||
|
|
- factor = -factor;
|
||
|
|
- code = PLUS;
|
||
|
|
- }
|
||
|
|
- rtx coeff1 = gen_int_mode (factor, mode);
|
||
|
|
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
|
||
|
|
coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
|
||
|
|
val = gen_rtx_MULT (mode, val, coeff1);
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
+ /* Multiply by 2 ** SHIFT. */
|
||
|
|
if (shift > 0)
|
||
|
|
{
|
||
|
|
- /* Multiply by 1 << SHIFT. */
|
||
|
|
val = aarch64_force_temporary (mode, temp1, val);
|
||
|
|
val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
|
||
|
|
}
|
||
|
|
- else if (shift == -1)
|
||
|
|
+ else if (shift < 0)
|
||
|
|
{
|
||
|
|
- /* Divide by 2. */
|
||
|
|
val = aarch64_force_temporary (mode, temp1, val);
|
||
|
|
- val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
|
||
|
|
+ val = gen_rtx_ASHIFTRT (mode, val, GEN_INT (-shift));
|
||
|
|
}
|
||
|
|
|
||
|
|
- /* Calculate SRC +/- CNTD * FACTOR / 2. */
|
||
|
|
+ /* Add the result to SRC or subtract the result from SRC. */
|
||
|
|
if (src != const0_rtx)
|
||
|
|
{
|
||
|
|
val = aarch64_force_temporary (mode, temp1, val);
|
||
|
|
@@ -7045,7 +7112,9 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
||
|
|
aarch64_report_sve_required ();
|
||
|
|
return;
|
||
|
|
}
|
||
|
|
- if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
|
||
|
|
+ if (base == const0_rtx
|
||
|
|
+ && (aarch64_sve_cnt_immediate_p (offset)
|
||
|
|
+ || aarch64_sve_rdvl_immediate_p (offset)))
|
||
|
|
emit_insn (gen_rtx_SET (dest, imm));
|
||
|
|
else
|
||
|
|
{
|
||
|
|
@@ -21751,7 +21820,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
|
||
|
|
if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
|
||
|
|
return true;
|
||
|
|
|
||
|
|
- if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
|
||
|
|
+ if (TARGET_SVE
|
||
|
|
+ && (aarch64_sve_cnt_immediate_p (x)
|
||
|
|
+ || aarch64_sve_rdvl_immediate_p (x)))
|
||
|
|
return true;
|
||
|
|
|
||
|
|
return aarch64_classify_symbolic_expression (x)
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||
|
|
index 5d02da42f..c0977a3da 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.md
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.md
|
||
|
|
@@ -1207,6 +1207,7 @@
|
||
|
|
[w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
||
|
|
[r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||
|
|
+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]);
|
||
|
|
[r, m ; load_4 , * ] ldr<size>\t%w0, %1
|
||
|
|
[w, m ; load_4 , * ] ldr\t%<size>0, %1
|
||
|
|
[m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||
|
|
@@ -1265,6 +1266,7 @@
|
||
|
|
[r , n ; mov_imm , * ,16] #
|
||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
||
|
|
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||
|
|
+ [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
||
|
|
[r , m ; load_4 , * , 4] ldr\t%w0, %1
|
||
|
|
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
|
||
|
|
[m , r Z; store_4 , * , 4] str\t%w1, %0
|
||
|
|
@@ -1299,6 +1301,7 @@
|
||
|
|
[r, n ; mov_imm , * ,16] #
|
||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
||
|
|
[r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||
|
|
+ [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
||
|
|
[r, m ; load_8 , * , 4] ldr\t%x0, %1
|
||
|
|
[w, m ; load_8 , fp , 4] ldr\t%d0, %1
|
||
|
|
[m, r Z; store_8 , * , 4] str\t%x1, %0
|
||
|
|
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
|
||
|
|
index 750a42fb1..212a73416 100644
|
||
|
|
--- a/gcc/config/aarch64/constraints.md
|
||
|
|
+++ b/gcc/config/aarch64/constraints.md
|
||
|
|
@@ -214,6 +214,12 @@
|
||
|
|
(and (match_code "const_int")
|
||
|
|
(match_test "aarch64_high_bits_all_ones_p (ival)")))
|
||
|
|
|
||
|
|
+(define_constraint "Usr"
|
||
|
|
+ "@internal
|
||
|
|
+ A constraint that matches a value produced by RDVL."
|
||
|
|
+ (and (match_code "const_poly_int")
|
||
|
|
+ (match_test "aarch64_sve_rdvl_immediate_p (op)")))
|
||
|
|
+
|
||
|
|
(define_constraint "Usv"
|
||
|
|
"@internal
|
||
|
|
A constraint that matches a VG-based constant that can be loaded by
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||
|
|
index 8b8fe8e4f..a22d8a28d 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||
|
|
@@ -51,19 +51,24 @@ PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; }
|
||
|
|
*/
|
||
|
|
PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; }
|
||
|
|
|
||
|
|
-/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntb_17:
|
||
|
|
-** cntb x0, all, mul #16
|
||
|
|
-** incb x0
|
||
|
|
+** rdvl x0, #17
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; }
|
||
|
|
|
||
|
|
+/*
|
||
|
|
+** cntb_31:
|
||
|
|
+** rdvl x0, #31
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+PROTO (cntb_31, uint64_t, ()) { return svcntb () * 31; }
|
||
|
|
+
|
||
|
|
/*
|
||
|
|
** cntb_32:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 5
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; }
|
||
|
|
@@ -80,16 +85,16 @@ PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_64:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 9
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 6
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_128:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 10
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 7
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; }
|
||
|
|
@@ -106,46 +111,70 @@ PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_m1:
|
||
|
|
-** cntb (x[0-9]+)
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-1
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_m13:
|
||
|
|
-** cntb (x[0-9]+), all, mul #13
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-13
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_m15:
|
||
|
|
-** cntb (x[0-9]+), all, mul #15
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-15
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntb_m16:
|
||
|
|
-** cntb (x[0-9]+), all, mul #16
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-16
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; }
|
||
|
|
|
||
|
|
-/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntb_m17:
|
||
|
|
-** cntb x0, all, mul #16
|
||
|
|
-** incb x0
|
||
|
|
-** neg x0, x0
|
||
|
|
+** rdvl x0, #-17
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; }
|
||
|
|
|
||
|
|
+/*
|
||
|
|
+** cntb_m32:
|
||
|
|
+** rdvl x0, #-32
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+PROTO (cntb_m32, uint64_t, ()) { return -svcntb () * 32; }
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** cntb_m33:
|
||
|
|
+** rdvl x0, #-32
|
||
|
|
+** decb x0
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+PROTO (cntb_m33, uint64_t, ()) { return -svcntb () * 33; }
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** cntb_m34:
|
||
|
|
+** rdvl (x[0-9]+), #-17
|
||
|
|
+** lsl x0, \1, #?1
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+PROTO (cntb_m34, uint64_t, ()) { return -svcntb () * 34; }
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** cntb_m64:
|
||
|
|
+** rdvl (x[0-9]+), #-1
|
||
|
|
+** lsl x0, \1, #?6
|
||
|
|
+** ret
|
||
|
|
+*/
|
||
|
|
+PROTO (cntb_m64, uint64_t, ()) { return -svcntb () * 64; }
|
||
|
|
+
|
||
|
|
/*
|
||
|
|
** incb_1:
|
||
|
|
** incb x0
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||
|
|
index 0d0ed4849..090a643b4 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||
|
|
@@ -54,8 +54,8 @@ PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntd_17:
|
||
|
|
-** cntb x0, all, mul #2
|
||
|
|
-** incd x0
|
||
|
|
+** rdvl (x[0-9]+), #17
|
||
|
|
+** asr x0, \1, 3
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; }
|
||
|
|
@@ -107,8 +107,7 @@ PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntd_m16:
|
||
|
|
-** cntb (x[0-9]+), all, mul #2
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-2
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
|
||
|
|
@@ -116,9 +115,8 @@ PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntd_m17:
|
||
|
|
-** cntb x0, all, mul #2
|
||
|
|
-** incd x0
|
||
|
|
-** neg x0, x0
|
||
|
|
+** rdvl (x[0-9]+), #-17
|
||
|
|
+** asr x0, \1, 3
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; }
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||
|
|
index c29930f15..1a4e7dc0e 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||
|
|
@@ -54,8 +54,8 @@ PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cnth_17:
|
||
|
|
-** cntb x0, all, mul #8
|
||
|
|
-** inch x0
|
||
|
|
+** rdvl (x[0-9]+), #17
|
||
|
|
+** asr x0, \1, 1
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; }
|
||
|
|
@@ -69,16 +69,16 @@ PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cnth_64:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 5
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cnth_128:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 9
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 6
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; }
|
||
|
|
@@ -109,8 +109,7 @@ PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cnth_m16:
|
||
|
|
-** cntb (x[0-9]+), all, mul #8
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl x0, #-8
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
|
||
|
|
@@ -118,9 +117,8 @@ PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cnth_m17:
|
||
|
|
-** cntb x0, all, mul #8
|
||
|
|
-** inch x0
|
||
|
|
-** neg x0, x0
|
||
|
|
+** rdvl (x[0-9]+), #-17
|
||
|
|
+** asr x0, \1, 1
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; }
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||
|
|
index e26cc67a4..9d1697690 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||
|
|
@@ -54,8 +54,8 @@ PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntw_17:
|
||
|
|
-** cntb x0, all, mul #4
|
||
|
|
-** incw x0
|
||
|
|
+** rdvl (x[0-9]+), #17
|
||
|
|
+** asr x0, \1, 2
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; }
|
||
|
|
@@ -76,8 +76,8 @@ PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntw_128:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl x0, \1, 8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl x0, \1, 5
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; }
|
||
|
|
@@ -108,8 +108,7 @@ PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; }
|
||
|
|
|
||
|
|
/*
|
||
|
|
** cntw_m16:
|
||
|
|
-** cntb (x[0-9]+), all, mul #4
|
||
|
|
-** neg x0, \1
|
||
|
|
+** rdvl (x[0-9]+), #-4
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
|
||
|
|
@@ -117,9 +116,8 @@ PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
|
||
|
|
/* Other sequences would be OK. */
|
||
|
|
/*
|
||
|
|
** cntw_m17:
|
||
|
|
-** cntb x0, all, mul #4
|
||
|
|
-** incw x0
|
||
|
|
-** neg x0, x0
|
||
|
|
+** rdvl (x[0-9]+), #-17
|
||
|
|
+** asr x0, \1, 2
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; }
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||
|
|
index c90730a03..94cd3a066 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||
|
|
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfb_vnum_31, uint16_t,
|
||
|
|
|
||
|
|
/*
|
||
|
|
** prfb_vnum_32:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl (x[0-9]+), \1, #?8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl (x[0-9]+), \1, #?5
|
||
|
|
** add (x[0-9]+), (\2, x0|x0, \2)
|
||
|
|
** prfb pldl1keep, p0, \[\3\]
|
||
|
|
** ret
|
||
|
|
@@ -240,7 +240,7 @@ TEST_PREFETCH (prfb_vnum_m32, uint16_t,
|
||
|
|
/*
|
||
|
|
** prfb_vnum_m33:
|
||
|
|
** ...
|
||
|
|
-** prfb pldl1keep, p0, \[x[0-9]+\]
|
||
|
|
+** prfb pldl1keep, p0, \[x[0-9]+(, x[0-9]+)?\]
|
||
|
|
** ret
|
||
|
|
*/
|
||
|
|
TEST_PREFETCH (prfb_vnum_m33, uint16_t,
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||
|
|
index 869ef3d3e..b7a116cf0 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||
|
|
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfd_vnum_31, uint16_t,
|
||
|
|
|
||
|
|
/*
|
||
|
|
** prfd_vnum_32:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl (x[0-9]+), \1, #?8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl (x[0-9]+), \1, #?5
|
||
|
|
** add (x[0-9]+), (\2, x0|x0, \2)
|
||
|
|
** prfd pldl1keep, p0, \[\3\]
|
||
|
|
** ret
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||
|
|
index 45a735eae..9d3df6bd3 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||
|
|
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfh_vnum_31, uint16_t,
|
||
|
|
|
||
|
|
/*
|
||
|
|
** prfh_vnum_32:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl (x[0-9]+), \1, #?8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl (x[0-9]+), \1, #?5
|
||
|
|
** add (x[0-9]+), (\2, x0|x0, \2)
|
||
|
|
** prfh pldl1keep, p0, \[\3\]
|
||
|
|
** ret
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||
|
|
index 444187f45..6962abab6 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||
|
|
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfw_vnum_31, uint16_t,
|
||
|
|
|
||
|
|
/*
|
||
|
|
** prfw_vnum_32:
|
||
|
|
-** cntd (x[0-9]+)
|
||
|
|
-** lsl (x[0-9]+), \1, #?8
|
||
|
|
+** cntb (x[0-9]+)
|
||
|
|
+** lsl (x[0-9]+), \1, #?5
|
||
|
|
** add (x[0-9]+), (\2, x0|x0, \2)
|
||
|
|
** prfw pldl1keep, p0, \[\3\]
|
||
|
|
** ret
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||
|
|
index 9ead9c21b..7f02497e8 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||
|
|
@@ -68,8 +68,7 @@ TEST_ALL (LOOP)
|
||
|
|
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
|
||
|
|
-/* 2 for the calculations of -17 and 17. */
|
||
|
|
-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */
|
||
|
|
+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */
|
||
|
|
|
||
|
|
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */
|
||
|
|
@@ -86,8 +85,7 @@ TEST_ALL (LOOP)
|
||
|
|
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
|
||
|
|
-/* 2 for the calculations of -17 and 17. */
|
||
|
|
-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */
|
||
|
|
+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */
|
||
|
|
|
||
|
|
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */
|
||
|
|
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||
|
|
index 110947a6c..5de34fc61 100644
|
||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||
|
|
@@ -6,8 +6,7 @@
|
||
|
|
|
||
|
|
/*
|
||
|
|
** test_1:
|
||
|
|
-** cntd x12, all, mul #9
|
||
|
|
-** lsl x12, x12, #?4
|
||
|
|
+** rdvl x12, #18
|
||
|
|
** mov x11, sp
|
||
|
|
** ...
|
||
|
|
** sub sp, sp, x12
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|