710 lines
25 KiB
Diff
710 lines
25 KiB
Diff
From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001
|
|
From: Richard Sandiford <richard.sandiford@arm.com>
|
|
Date: Tue, 17 Oct 2023 23:46:33 +0100
|
|
Subject: [PATCH 104/157] [Backport][SME] aarch64: Use vecs to store register
|
|
save order
|
|
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0
|
|
|
|
aarch64_save/restore_callee_saves looped over registers in register
|
|
number order. This in turn meant that we could only use LDP and STP
|
|
for registers that were consecutive both number-wise and
|
|
offset-wise (after unsaved registers are excluded).
|
|
|
|
This patch instead builds lists of the registers that we've decided to
|
|
save, in offset order. We can then form LDP/STP pairs regardless of
|
|
register number order, which in turn means that we can put the LR save
|
|
slot first without losing LDP/STP opportunities.
|
|
|
|
gcc/
|
|
* config/aarch64/aarch64.h (aarch64_frame): Add vectors that
|
|
store the list saved GPRs, FPRs and predicate registers.
|
|
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize
|
|
the lists of saved registers. Use them to choose push candidates.
|
|
Invalidate pop candidates if we're not going to do a pop.
|
|
(aarch64_next_callee_save): Delete.
|
|
(aarch64_save_callee_saves): Take a list of registers,
|
|
rather than a range. Make !skip_wb select only write-back
|
|
candidates.
|
|
(aarch64_expand_prologue): Update calls accordingly.
|
|
(aarch64_restore_callee_saves): Take a list of registers,
|
|
rather than a range. Always skip pop candidates. Also skip
|
|
LR if shadow call stacks are enabled.
|
|
(aarch64_expand_epilogue): Update calls accordingly.
|
|
|
|
gcc/testsuite/
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores
|
|
to happen in offset order.
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise.
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise.
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise.
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise.
|
|
* gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise.
|
|
---
|
|
gcc/config/aarch64/aarch64.cc | 203 +++++++++---------
|
|
gcc/config/aarch64/aarch64.h | 9 +-
|
|
.../aarch64/sve/pcs/stack_clash_2.c | 6 +-
|
|
.../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +-
|
|
.../aarch64/sve/pcs/stack_clash_2_128.c | 6 +-
|
|
.../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +-
|
|
.../aarch64/sve/pcs/stack_clash_2_256.c | 6 +-
|
|
.../aarch64/sve/pcs/stack_clash_2_512.c | 6 +-
|
|
8 files changed, 128 insertions(+), 120 deletions(-)
|
|
|
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
|
index 8d4dd2891..e10c9d763 100644
|
|
--- a/gcc/config/aarch64/aarch64.cc
|
|
+++ b/gcc/config/aarch64/aarch64.cc
|
|
@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p ()
|
|
static void
|
|
aarch64_layout_frame (void)
|
|
{
|
|
- int regno, last_fp_reg = INVALID_REGNUM;
|
|
+ unsigned regno, last_fp_reg = INVALID_REGNUM;
|
|
machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
|
|
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
|
|
bool frame_related_fp_reg_p = false;
|
|
aarch64_frame &frame = cfun->machine->frame;
|
|
poly_int64 top_of_locals = -1;
|
|
|
|
+ vec_safe_truncate (frame.saved_gprs, 0);
|
|
+ vec_safe_truncate (frame.saved_fprs, 0);
|
|
+ vec_safe_truncate (frame.saved_prs, 0);
|
|
+
|
|
frame.emit_frame_chain = aarch64_needs_frame_chain ();
|
|
|
|
/* Adjust the outgoing arguments size if required. Keep it in sync with what
|
|
@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void)
|
|
for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
|
|
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
|
{
|
|
+ vec_safe_push (frame.saved_prs, regno);
|
|
if (frame.sve_save_and_probe == INVALID_REGNUM)
|
|
frame.sve_save_and_probe = regno;
|
|
frame.reg_offset[regno] = offset;
|
|
@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void)
|
|
If we don't have any vector registers to save, and we know how
|
|
big the predicate save area is, we can just round it up to the
|
|
next 16-byte boundary. */
|
|
- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
|
|
+ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ())
|
|
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
|
else
|
|
{
|
|
@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void)
|
|
}
|
|
|
|
/* If we need to save any SVE vector registers, add them next. */
|
|
- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
|
|
+ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
|
|
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
|
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
|
{
|
|
+ vec_safe_push (frame.saved_fprs, regno);
|
|
if (frame.sve_save_and_probe == INVALID_REGNUM)
|
|
frame.sve_save_and_probe = regno;
|
|
frame.reg_offset[regno] = offset;
|
|
@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void)
|
|
|
|
auto allocate_gpr_slot = [&](unsigned int regno)
|
|
{
|
|
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
|
- frame.hard_fp_save_and_probe = regno;
|
|
+ vec_safe_push (frame.saved_gprs, regno);
|
|
frame.reg_offset[regno] = offset;
|
|
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
|
- frame.wb_push_candidate1 = regno;
|
|
- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
|
|
- frame.wb_push_candidate2 = regno;
|
|
offset += UNITS_PER_WORD;
|
|
};
|
|
|
|
@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void)
|
|
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
|
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
|
{
|
|
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
|
- frame.hard_fp_save_and_probe = regno;
|
|
+ vec_safe_push (frame.saved_fprs, regno);
|
|
/* If there is an alignment gap between integer and fp callee-saves,
|
|
allocate the last fp register to it if possible. */
|
|
if (regno == last_fp_reg
|
|
@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void)
|
|
}
|
|
|
|
frame.reg_offset[regno] = offset;
|
|
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
|
- frame.wb_push_candidate1 = regno;
|
|
- else if (frame.wb_push_candidate2 == INVALID_REGNUM
|
|
- && frame.wb_push_candidate1 >= V0_REGNUM)
|
|
- frame.wb_push_candidate2 = regno;
|
|
offset += vector_save_size;
|
|
}
|
|
|
|
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
|
-
|
|
auto saved_regs_size = offset - frame.bytes_below_saved_regs;
|
|
- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
|
|
- || (frame.hard_fp_save_and_probe != INVALID_REGNUM
|
|
- && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
|
|
- frame.bytes_below_hard_fp)));
|
|
+
|
|
+ array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs)
|
|
+ ? frame.saved_gprs
|
|
+ : frame.saved_fprs);
|
|
+ if (!push_regs.empty ()
|
|
+ && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))
|
|
+ {
|
|
+ frame.hard_fp_save_and_probe = push_regs[0];
|
|
+ frame.wb_push_candidate1 = push_regs[0];
|
|
+ if (push_regs.size () > 1)
|
|
+ frame.wb_push_candidate2 = push_regs[1];
|
|
+ }
|
|
+ else
|
|
+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size));
|
|
|
|
/* With stack-clash, a register must be saved in non-leaf functions.
|
|
The saving of the bottommost register counts as an implicit probe,
|
|
@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void)
|
|
+ frame.sve_callee_adjust
|
|
+ frame.final_adjust, frame.frame_size));
|
|
|
|
- if (!frame.emit_frame_chain && frame.callee_adjust == 0)
|
|
+ if (frame.callee_adjust == 0)
|
|
{
|
|
- /* We've decided not to associate any register saves with the initial
|
|
- stack allocation. */
|
|
- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM;
|
|
- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM;
|
|
+ /* We've decided not to do a "real" push and pop. However,
|
|
+ setting up the frame chain is treated as being essentially
|
|
+ a multi-instruction push. */
|
|
+ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM;
|
|
+ if (!frame.emit_frame_chain)
|
|
+ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM;
|
|
}
|
|
|
|
frame.laid_out = true;
|
|
@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno)
|
|
return known_ge (cfun->machine->frame.reg_offset[regno], 0);
|
|
}
|
|
|
|
-/* Return the next register up from REGNO up to LIMIT for the callee
|
|
- to save. */
|
|
-
|
|
-static unsigned
|
|
-aarch64_next_callee_save (unsigned regno, unsigned limit)
|
|
-{
|
|
- while (regno <= limit && !aarch64_register_saved_on_entry (regno))
|
|
- regno ++;
|
|
- return regno;
|
|
-}
|
|
-
|
|
/* Push the register number REGNO of mode MODE to the stack with write-back
|
|
adjusting the stack by ADJUSTMENT. */
|
|
|
|
@@ -9424,41 +9419,46 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
|
|
add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
|
|
}
|
|
|
|
-/* Emit code to save the callee-saved registers from register number START
|
|
- to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
|
|
- bytes above the bottom of the static frame. Skip any write-back
|
|
- candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
|
|
- frame pointer has been set up. */
|
|
+/* Emit code to save the callee-saved registers in REGS. Skip any
|
|
+ write-back candidates if SKIP_WB is true, otherwise consider only
|
|
+ write-back candidates.
|
|
+
|
|
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
|
|
+ of the static frame. HARD_FP_VALID_P is true if the hard frame pointer
|
|
+ has been set up. */
|
|
|
|
static void
|
|
aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
|
- unsigned start, unsigned limit, bool skip_wb,
|
|
+ array_slice<unsigned int> regs, bool skip_wb,
|
|
bool hard_fp_valid_p)
|
|
{
|
|
aarch64_frame &frame = cfun->machine->frame;
|
|
rtx_insn *insn;
|
|
- unsigned regno;
|
|
- unsigned regno2;
|
|
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
|
|
|
|
- for (regno = aarch64_next_callee_save (start, limit);
|
|
- regno <= limit;
|
|
- regno = aarch64_next_callee_save (regno + 1, limit))
|
|
+ auto skip_save_p = [&](unsigned int regno)
|
|
+ {
|
|
+ if (cfun->machine->reg_is_wrapped_separately[regno])
|
|
+ return true;
|
|
+
|
|
+ if (skip_wb == (regno == frame.wb_push_candidate1
|
|
+ || regno == frame.wb_push_candidate2))
|
|
+ return true;
|
|
+
|
|
+ return false;
|
|
+ };
|
|
+
|
|
+ for (unsigned int i = 0; i < regs.size (); ++i)
|
|
{
|
|
- rtx reg, mem;
|
|
+ unsigned int regno = regs[i];
|
|
poly_int64 offset;
|
|
bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
|
|
|
- if (skip_wb
|
|
- && (regno == frame.wb_push_candidate1
|
|
- || regno == frame.wb_push_candidate2))
|
|
- continue;
|
|
-
|
|
- if (cfun->machine->reg_is_wrapped_separately[regno])
|
|
+ if (skip_save_p (regno))
|
|
continue;
|
|
|
|
machine_mode mode = aarch64_reg_save_mode (regno);
|
|
- reg = gen_rtx_REG (mode, regno);
|
|
+ rtx reg = gen_rtx_REG (mode, regno);
|
|
offset = frame.reg_offset[regno] - bytes_below_sp;
|
|
rtx base_rtx = stack_pointer_rtx;
|
|
poly_int64 sp_offset = offset;
|
|
@@ -9485,12 +9485,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
|
}
|
|
offset -= fp_offset;
|
|
}
|
|
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
|
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
|
bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
|
|
|
|
+ unsigned int regno2;
|
|
if (!aarch64_sve_mode_p (mode)
|
|
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
|
- && !cfun->machine->reg_is_wrapped_separately[regno2]
|
|
+ && i + 1 < regs.size ()
|
|
+ && (regno2 = regs[i + 1], !skip_save_p (regno2))
|
|
&& known_eq (GET_MODE_SIZE (mode),
|
|
frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
|
{
|
|
@@ -9516,6 +9517,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
|
}
|
|
|
|
regno = regno2;
|
|
+ ++i;
|
|
}
|
|
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
|
{
|
|
@@ -9533,49 +9535,57 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
|
}
|
|
}
|
|
|
|
-/* Emit code to restore the callee registers from register number START
|
|
- up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
|
|
- bytes above the bottom of the static frame. Skip any write-back
|
|
- candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
|
|
- notes into CFI_OPS. */
|
|
+/* Emit code to restore the callee registers in REGS, ignoring pop candidates
|
|
+ and any other registers that are handled separately. Write the appropriate
|
|
+ REG_CFA_RESTORE notes into CFI_OPS.
|
|
+
|
|
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
|
|
+ of the static frame. */
|
|
|
|
static void
|
|
-aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
|
- unsigned limit, bool skip_wb, rtx *cfi_ops)
|
|
+aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
|
|
+ array_slice<unsigned int> regs, rtx *cfi_ops)
|
|
{
|
|
aarch64_frame &frame = cfun->machine->frame;
|
|
- unsigned regno;
|
|
- unsigned regno2;
|
|
poly_int64 offset;
|
|
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
|
|
|
|
- for (regno = aarch64_next_callee_save (start, limit);
|
|
- regno <= limit;
|
|
- regno = aarch64_next_callee_save (regno + 1, limit))
|
|
+ auto skip_restore_p = [&](unsigned int regno)
|
|
{
|
|
- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
|
if (cfun->machine->reg_is_wrapped_separately[regno])
|
|
- continue;
|
|
+ return true;
|
|
+
|
|
+ if (regno == frame.wb_pop_candidate1
|
|
+ || regno == frame.wb_pop_candidate2)
|
|
+ return true;
|
|
|
|
- rtx reg, mem;
|
|
+ /* The shadow call stack code restores LR separately. */
|
|
+ if (frame.is_scs_enabled && regno == LR_REGNUM)
|
|
+ return true;
|
|
|
|
- if (skip_wb
|
|
- && (regno == frame.wb_pop_candidate1
|
|
- || regno == frame.wb_pop_candidate2))
|
|
+ return false;
|
|
+ };
|
|
+
|
|
+ for (unsigned int i = 0; i < regs.size (); ++i)
|
|
+ {
|
|
+ unsigned int regno = regs[i];
|
|
+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
|
+ if (skip_restore_p (regno))
|
|
continue;
|
|
|
|
machine_mode mode = aarch64_reg_save_mode (regno);
|
|
- reg = gen_rtx_REG (mode, regno);
|
|
+ rtx reg = gen_rtx_REG (mode, regno);
|
|
offset = frame.reg_offset[regno] - bytes_below_sp;
|
|
rtx base_rtx = stack_pointer_rtx;
|
|
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
|
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
|
|
offset, ptrue);
|
|
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
|
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
|
|
|
+ unsigned int regno2;
|
|
if (!aarch64_sve_mode_p (mode)
|
|
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
|
- && !cfun->machine->reg_is_wrapped_separately[regno2]
|
|
+ && i + 1 < regs.size ()
|
|
+ && (regno2 = regs[i + 1], !skip_restore_p (regno2))
|
|
&& known_eq (GET_MODE_SIZE (mode),
|
|
frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
|
{
|
|
@@ -9588,6 +9598,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
|
|
|
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
|
|
regno = regno2;
|
|
+ ++i;
|
|
}
|
|
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
|
emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
|
|
@@ -10409,13 +10420,10 @@ aarch64_expand_prologue (void)
|
|
- frame.bytes_above_hard_fp);
|
|
gcc_assert (known_ge (chain_offset, 0));
|
|
|
|
+ gcc_assert (reg1 == R29_REGNUM && reg2 == R30_REGNUM);
|
|
if (callee_adjust == 0)
|
|
- {
|
|
- reg1 = R29_REGNUM;
|
|
- reg2 = R30_REGNUM;
|
|
- aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
|
|
- false, false);
|
|
- }
|
|
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs,
|
|
+ false, false);
|
|
else
|
|
gcc_assert (known_eq (chain_offset, 0));
|
|
aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
|
|
@@ -10453,8 +10461,7 @@ aarch64_expand_prologue (void)
|
|
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
|
|
}
|
|
|
|
- aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
|
|
- callee_adjust != 0 || emit_frame_chain,
|
|
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true,
|
|
emit_frame_chain);
|
|
if (maybe_ne (sve_callee_adjust, 0))
|
|
{
|
|
@@ -10465,10 +10472,9 @@ aarch64_expand_prologue (void)
|
|
!frame_pointer_needed, false);
|
|
bytes_below_sp -= sve_callee_adjust;
|
|
}
|
|
- aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
|
|
- false, emit_frame_chain);
|
|
- aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
|
|
- callee_adjust != 0 || emit_frame_chain,
|
|
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_prs, true,
|
|
+ emit_frame_chain);
|
|
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_fprs, true,
|
|
emit_frame_chain);
|
|
|
|
/* We may need to probe the final adjustment if it is larger than the guard
|
|
@@ -10514,8 +10520,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
|
poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
|
|
unsigned reg1 = frame.wb_pop_candidate1;
|
|
unsigned reg2 = frame.wb_pop_candidate2;
|
|
- unsigned int last_gpr = (frame.is_scs_enabled
|
|
- ? R29_REGNUM : R30_REGNUM);
|
|
rtx cfi_ops = NULL;
|
|
rtx_insn *insn;
|
|
/* A stack clash protection prologue may not have left EP0_REGNUM or
|
|
@@ -10579,10 +10583,8 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
|
|
|
/* Restore the vector registers before the predicate registers,
|
|
so that we can use P4 as a temporary for big-endian SVE frames. */
|
|
- aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
|
|
- callee_adjust != 0, &cfi_ops);
|
|
- aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
|
|
- false, &cfi_ops);
|
|
+ aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops);
|
|
+ aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops);
|
|
if (maybe_ne (sve_callee_adjust, 0))
|
|
aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
|
|
|
|
@@ -10590,8 +10592,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
|
restore x30, we don't need to restore x30 again in the traditional
|
|
way. */
|
|
aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
|
|
- R0_REGNUM, last_gpr,
|
|
- callee_adjust != 0, &cfi_ops);
|
|
+ frame.saved_gprs, &cfi_ops);
|
|
|
|
if (need_barrier_p)
|
|
aarch64_emit_stack_tie (stack_pointer_rtx);
|
|
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
|
index 292ef2eec..1591cde8b 100644
|
|
--- a/gcc/config/aarch64/aarch64.h
|
|
+++ b/gcc/config/aarch64/aarch64.h
|
|
@@ -787,7 +787,7 @@ extern enum aarch64_processor aarch64_tune;
|
|
|
|
#define DEFAULT_PCC_STRUCT_RETURN 0
|
|
|
|
-#ifdef HAVE_POLY_INT_H
|
|
+#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
|
|
struct GTY (()) aarch64_frame
|
|
{
|
|
/* The offset from the bottom of the static frame (the bottom of the
|
|
@@ -795,6 +795,13 @@ struct GTY (()) aarch64_frame
|
|
needed. */
|
|
poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
|
|
|
|
+ /* The list of GPRs, FPRs and predicate registers that have nonnegative
|
|
+ entries in reg_offset. The registers are listed in order of
|
|
+ increasing offset (rather than increasing register number). */
|
|
+ vec<unsigned, va_gc_atomic> *saved_gprs;
|
|
+ vec<unsigned, va_gc_atomic> *saved_fprs;
|
|
+ vec<unsigned, va_gc_atomic> *saved_prs;
|
|
+
|
|
/* The number of extra stack bytes taken up by register varargs.
|
|
This area is allocated by the callee at the very top of the
|
|
frame. This value is rounded up to a multiple of
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
|
index 4622a1eed..bbb45d266 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
|
@@ -215,9 +215,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr p4, \[sp\]
|
|
** addvl sp, sp, #1
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -283,9 +283,9 @@ test_9 (int n)
|
|
** addvl sp, x29, #-1
|
|
** ldr p4, \[sp\]
|
|
** addvl sp, sp, #1
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -319,9 +319,9 @@ test_10 (int n)
|
|
** addvl sp, x29, #-1
|
|
** ldr p4, \[sp\]
|
|
** addvl sp, sp, #1
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
|
index e31200fc2..9437c7a85 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
|
@@ -176,9 +176,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?128
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -234,9 +234,9 @@ test_9 (int n)
|
|
** sub sp, x29, #128
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?128
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -268,9 +268,9 @@ test_10 (int n)
|
|
** sub sp, x29, #128
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?128
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
|
index 41193b411..b4e1627fa 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
|
@@ -176,9 +176,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr p4, \[sp\]
|
|
** add sp, sp, #?16
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -234,9 +234,9 @@ test_9 (int n)
|
|
** sub sp, x29, #16
|
|
** ldr p4, \[sp\]
|
|
** add sp, sp, #?16
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -267,9 +267,9 @@ test_10 (int n)
|
|
** sub sp, x29, #16
|
|
** ldr p4, \[sp\]
|
|
** add sp, sp, #?16
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
|
index f63751678..921209379 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
|
@@ -176,9 +176,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?256
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -234,9 +234,9 @@ test_9 (int n)
|
|
** sub sp, x29, #256
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?256
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -268,9 +268,9 @@ test_10 (int n)
|
|
** sub sp, x29, #256
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?256
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
|
index 6bcbb5772..bd8bef0f0 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
|
@@ -176,9 +176,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?32
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -234,9 +234,9 @@ test_9 (int n)
|
|
** sub sp, x29, #32
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?32
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -267,9 +267,9 @@ test_10 (int n)
|
|
** sub sp, x29, #32
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?32
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
|
index dc7df8e6b..2c76ccecd 100644
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
|
@@ -176,9 +176,9 @@ test_7 (void)
|
|
** add sp, sp, #?16
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?64
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -234,9 +234,9 @@ test_9 (int n)
|
|
** sub sp, x29, #64
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?64
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** mov x12, #?4144
|
|
** add sp, sp, x12
|
|
** ret
|
|
@@ -268,9 +268,9 @@ test_10 (int n)
|
|
** sub sp, x29, #64
|
|
** ldr z16, \[sp\]
|
|
** add sp, sp, #?64
|
|
+** ldp x29, x30, \[sp\]
|
|
** ldp x24, x25, \[sp, 16\]
|
|
** ldr x26, \[sp, 32\]
|
|
-** ldp x29, x30, \[sp\]
|
|
** add sp, sp, #?3008
|
|
** add sp, sp, #?126976
|
|
** ret
|
|
--
|
|
2.33.0
|
|
|