4325 lines
126 KiB
Diff
4325 lines
126 KiB
Diff
|
|
From 1efd433c779f66440facc8ba5cd23bdbdd6672ba Mon Sep 17 00:00:00 2001
|
|||
|
|
From: Richard Sandiford <richard.sandiford@arm.com>
|
|||
|
|
Date: Tue, 5 Dec 2023 10:11:26 +0000
|
|||
|
|
Subject: [PATCH 107/157] [Backport][SME] aarch64: Add support for SME ZA
|
|||
|
|
attributes
|
|||
|
|
|
|||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3af9ceb631b741095d8eabd055ff7c23d4a69e6f
|
|||
|
|
|
|||
|
|
SME has an array called ZA that can be enabled and disabled separately
|
|||
|
|
from streaming mode. A status bit called PSTATE.ZA indicates whether
|
|||
|
|
ZA is currently enabled or not.
|
|||
|
|
|
|||
|
|
In C and C++, the state of PSTATE.ZA is controlled using function
|
|||
|
|
attributes. There are four attributes that can be attached to
|
|||
|
|
function types to indicate that the function shares ZA with its
|
|||
|
|
caller. These are:
|
|||
|
|
|
|||
|
|
- arm::in("za")
|
|||
|
|
- arm::out("za")
|
|||
|
|
- arm::inout("za")
|
|||
|
|
- arm::preserves("za")
|
|||
|
|
|
|||
|
|
If a function's type has one of these shared-ZA attributes,
|
|||
|
|
PSTATE.ZA is specified to be 1 on entry to the function and on return
|
|||
|
|
from the function. Otherwise, the caller and callee have separate
|
|||
|
|
ZA contexts; they do not use ZA to share data.
|
|||
|
|
|
|||
|
|
Although normal non-shared-ZA functions have a separate ZA context
|
|||
|
|
from their callers, nested uses of ZA are expected to be rare.
|
|||
|
|
The ABI therefore defines a cooperative lazy saving scheme that
|
|||
|
|
allows saves and restore of ZA to be kept to a minimum.
|
|||
|
|
(Callers still have the option of doing a full save and restore
|
|||
|
|
if they prefer.)
|
|||
|
|
|
|||
|
|
Functions that want to use ZA internally have an arm::new("za")
|
|||
|
|
attribute, which tells the compiler to enable PSTATE.ZA for
|
|||
|
|
the duration of the function body. It also tells the compiler
|
|||
|
|
to commit any lazy save initiated by a caller.
|
|||
|
|
|
|||
|
|
The patch uses various abstract hard registers to track dataflow
|
|||
|
|
relating to ZA. See the comments in the patch for details.
|
|||
|
|
|
|||
|
|
The lazy save scheme is intended to be transparent to most normal
|
|||
|
|
functions, so that they don't need to be recompiled for SME.
|
|||
|
|
This is reflected in the way that most normal functions ignore
|
|||
|
|
the new hard registers added in the patch.
|
|||
|
|
|
|||
|
|
As with arm::streaming and arm::streaming_compatible, the attributes are
|
|||
|
|
also available as __arm_<attr>. This has two advantages: it triggers an
|
|||
|
|
error on compilers that don't understand the attributes, and it eases
|
|||
|
|
use on C, where [[...]] attributes were only added in C23.
|
|||
|
|
|
|||
|
|
gcc/
|
|||
|
|
* config/aarch64/aarch64-isa-modes.def (ZA_ON): New ISA mode.
|
|||
|
|
* config/aarch64/aarch64-protos.h (aarch64_rdsvl_immediate_p)
|
|||
|
|
(aarch64_output_rdsvl, aarch64_optimize_mode_switching)
|
|||
|
|
(aarch64_restore_za): Declare.
|
|||
|
|
* config/aarch64/constraints.md (UsR): New constraint.
|
|||
|
|
* config/aarch64/aarch64.md (LOWERING_REGNUM, TPIDR_BLOCK_REGNUM)
|
|||
|
|
(SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM, ZA_FREE_REGNUM)
|
|||
|
|
(ZA_SAVED_REGNUM, ZA_REGNUM, FIRST_FAKE_REGNUM): New constants.
|
|||
|
|
(LAST_FAKE_REGNUM): Likewise.
|
|||
|
|
(UNSPEC_SAVE_NZCV, UNSPEC_RESTORE_NZCV, UNSPEC_SME_VQ): New unspecs.
|
|||
|
|
(arches): Add sme.
|
|||
|
|
(arch_enabled): Handle it.
|
|||
|
|
(*cb<optab><mode>1): Rename to...
|
|||
|
|
(aarch64_cb<optab><mode>1): ...this.
|
|||
|
|
(*movsi_aarch64): Add an alternative for RDSVL.
|
|||
|
|
(*movdi_aarch64): Likewise.
|
|||
|
|
(aarch64_save_nzcv, aarch64_restore_nzcv): New insns.
|
|||
|
|
* config/aarch64/aarch64-sme.md (UNSPEC_SMSTOP_ZA)
|
|||
|
|
(UNSPEC_INITIAL_ZERO_ZA, UNSPEC_TPIDR2_SAVE, UNSPEC_TPIDR2_RESTORE)
|
|||
|
|
(UNSPEC_READ_TPIDR2, UNSPEC_WRITE_TPIDR2, UNSPEC_SETUP_LOCAL_TPIDR2)
|
|||
|
|
(UNSPEC_RESTORE_ZA, UNSPEC_START_PRIVATE_ZA_CALL): New unspecs.
|
|||
|
|
(UNSPEC_END_PRIVATE_ZA_CALL, UNSPEC_COMMIT_LAZY_SAVE): Likewise.
|
|||
|
|
(UNSPECV_ASM_UPDATE_ZA): New unspecv.
|
|||
|
|
(aarch64_tpidr2_save, aarch64_smstart_za, aarch64_smstop_za)
|
|||
|
|
(aarch64_initial_zero_za, aarch64_setup_local_tpidr2)
|
|||
|
|
(aarch64_clear_tpidr2, aarch64_write_tpidr2, aarch64_read_tpidr2)
|
|||
|
|
(aarch64_tpidr2_restore, aarch64_restore_za, aarch64_asm_update_za)
|
|||
|
|
(aarch64_start_private_za_call, aarch64_end_private_za_call)
|
|||
|
|
(aarch64_commit_lazy_save): New patterns.
|
|||
|
|
* config/aarch64/aarch64.h (AARCH64_ISA_ZA_ON, TARGET_ZA): New macros.
|
|||
|
|
(FIXED_REGISTERS, REGISTER_NAMES): Add the new fake ZA registers.
|
|||
|
|
(CALL_USED_REGISTERS): Replace with...
|
|||
|
|
(CALL_REALLY_USED_REGISTERS): ...this and add the fake ZA registers.
|
|||
|
|
(FIRST_PSEUDO_REGISTER): Bump to include the fake ZA registers.
|
|||
|
|
(FAKE_REGS): New register class.
|
|||
|
|
(REG_CLASS_NAMES): Update accordingly.
|
|||
|
|
(REG_CLASS_CONTENTS): Likewise.
|
|||
|
|
(machine_function::tpidr2_block): New member variable.
|
|||
|
|
(machine_function::tpidr2_block_ptr): Likewise.
|
|||
|
|
(machine_function::za_save_buffer): Likewise.
|
|||
|
|
(machine_function::next_asm_update_za_id): Likewise.
|
|||
|
|
(CUMULATIVE_ARGS::shared_za_flags): Likewise.
|
|||
|
|
(aarch64_mode_entity, aarch64_local_sme_state): New enums.
|
|||
|
|
(aarch64_tristate_mode): Likewise.
|
|||
|
|
(OPTIMIZE_MODE_SWITCHING, NUM_MODES_FOR_MODE_SWITCHING): Define.
|
|||
|
|
* config/aarch64/aarch64.cc (AARCH64_STATE_SHARED, AARCH64_STATE_IN)
|
|||
|
|
(AARCH64_STATE_OUT): New constants.
|
|||
|
|
(aarch64_attribute_shared_state_flags): New function.
|
|||
|
|
(aarch64_lookup_shared_state_flags, aarch64_fndecl_has_new_state)
|
|||
|
|
(aarch64_check_state_string, cmp_string_csts): Likewise.
|
|||
|
|
(aarch64_merge_string_arguments, aarch64_check_arm_new_against_type)
|
|||
|
|
(handle_arm_new, handle_arm_shared): Likewise.
|
|||
|
|
(handle_arm_new_za_attribute): New
|
|||
|
|
(aarch64_arm_attribute_table): Add new, preserves, in, out, and inout.
|
|||
|
|
(aarch64_hard_regno_nregs): Handle FAKE_REGS.
|
|||
|
|
(aarch64_hard_regno_mode_ok): Likewise.
|
|||
|
|
(aarch64_fntype_shared_flags, aarch64_fntype_pstate_za): New functions.
|
|||
|
|
(aarch64_fntype_isa_mode): Include aarch64_fntype_pstate_za.
|
|||
|
|
(aarch64_fndecl_has_state, aarch64_fndecl_pstate_za): New functions.
|
|||
|
|
(aarch64_fndecl_isa_mode): Include aarch64_fndecl_pstate_za.
|
|||
|
|
(aarch64_cfun_incoming_pstate_za, aarch64_cfun_shared_flags)
|
|||
|
|
(aarch64_cfun_has_new_state, aarch64_cfun_has_state): New functions.
|
|||
|
|
(aarch64_sme_vq_immediate, aarch64_sme_vq_unspec_p): Likewise.
|
|||
|
|
(aarch64_rdsvl_immediate_p, aarch64_output_rdsvl): Likewise.
|
|||
|
|
(aarch64_expand_mov_immediate): Handle RDSVL immediates.
|
|||
|
|
(aarch64_function_arg): Add the ZA sharing flags as a third limb
|
|||
|
|
of the PARALLEL.
|
|||
|
|
(aarch64_init_cumulative_args): Record the ZA sharing flags.
|
|||
|
|
(aarch64_extra_live_on_entry): New function. Handle the new
|
|||
|
|
ZA-related fake registers.
|
|||
|
|
(aarch64_epilogue_uses): Handle the new ZA-related fake registers.
|
|||
|
|
(aarch64_cannot_force_const_mem): Handle UNSPEC_SME_VQ constants.
|
|||
|
|
(aarch64_get_tpidr2_block, aarch64_get_tpidr2_ptr): New functions.
|
|||
|
|
(aarch64_init_tpidr2_block, aarch64_restore_za): Likewise.
|
|||
|
|
(aarch64_layout_frame): Check whether the current function creates
|
|||
|
|
new ZA state. Record that it clobbers LR if so.
|
|||
|
|
(aarch64_expand_prologue): Handle functions that create new ZA state.
|
|||
|
|
(aarch64_expand_epilogue): Likewise.
|
|||
|
|
(aarch64_create_tpidr2_block): New function.
|
|||
|
|
(aarch64_restore_za): Likewise.
|
|||
|
|
(aarch64_start_call_args): Disallow calls to shared-ZA functions
|
|||
|
|
from functions that have no ZA state. Emit a marker instruction
|
|||
|
|
before calls to private-ZA functions from functions that have
|
|||
|
|
SME state.
|
|||
|
|
(aarch64_expand_call): Add return registers for state that is
|
|||
|
|
managed via attributes. Record the use and clobber information
|
|||
|
|
for the ZA registers.
|
|||
|
|
(aarch64_end_call_args): New function.
|
|||
|
|
(aarch64_regno_regclass): Handle FAKE_REGS.
|
|||
|
|
(aarch64_class_max_nregs): Likewise.
|
|||
|
|
(aarch64_override_options_internal): Require TARGET_SME for
|
|||
|
|
functions that have ZA state.
|
|||
|
|
(aarch64_conditional_register_usage): Handle FAKE_REGS.
|
|||
|
|
(aarch64_mov_operand_p): Handle RDSVL immediates.
|
|||
|
|
(aarch64_comp_type_attributes): Check that the ZA sharing flags
|
|||
|
|
are equal.
|
|||
|
|
(aarch64_merge_decl_attributes): New function.
|
|||
|
|
(aarch64_optimize_mode_switching, aarch64_mode_emit_za_save_buffer)
|
|||
|
|
(aarch64_mode_emit_local_sme_state, aarch64_mode_emit): Likewise.
|
|||
|
|
(aarch64_insn_references_sme_state_p): Likewise.
|
|||
|
|
(aarch64_mode_needed_local_sme_state): Likewise.
|
|||
|
|
(aarch64_mode_needed_za_save_buffer, aarch64_mode_needed): Likewise.
|
|||
|
|
(aarch64_mode_after_local_sme_state, aarch64_mode_after): Likewise.
|
|||
|
|
(aarch64_local_sme_confluence, aarch64_mode_confluence): Likewise.
|
|||
|
|
(aarch64_one_shot_backprop, aarch64_local_sme_backprop): Likewise.
|
|||
|
|
(aarch64_mode_backprop, aarch64_mode_entry): Likewise.
|
|||
|
|
(aarch64_mode_exit, aarch64_mode_eh_handler): Likewise.
|
|||
|
|
(aarch64_mode_priority, aarch64_md_asm_adjust): Likewise.
|
|||
|
|
(TARGET_END_CALL_ARGS, TARGET_MERGE_DECL_ATTRIBUTES): Define.
|
|||
|
|
(TARGET_MODE_EMIT, TARGET_MODE_NEEDED, TARGET_MODE_AFTER): Likewise.
|
|||
|
|
(TARGET_MODE_CONFLUENCE, TARGET_MODE_BACKPROP): Likewise.
|
|||
|
|
(TARGET_MODE_ENTRY, TARGET_MODE_EXIT): Likewise.
|
|||
|
|
(TARGET_MODE_EH_HANDLER, TARGET_MODE_PRIORITY): Likewise.
|
|||
|
|
(TARGET_EXTRA_LIVE_ON_ENTRY): Likewise.
|
|||
|
|
(TARGET_MD_ASM_ADJUST): Use aarch64_md_asm_adjust.
|
|||
|
|
* config/aarch64/aarch64-c.cc (aarch64_define_unconditional_macros):
|
|||
|
|
Define __arm_new, __arm_preserves,__arm_in, __arm_out, and __arm_inout.
|
|||
|
|
|
|||
|
|
gcc/testsuite/
|
|||
|
|
* gcc.target/aarch64/sme/za_state_1.c: New test.
|
|||
|
|
* gcc.target/aarch64/sme/za_state_2.c: Likewise.
|
|||
|
|
* gcc.target/aarch64/sme/za_state_3.c: Likewise.
|
|||
|
|
* gcc.target/aarch64/sme/za_state_4.c: Likewise.
|
|||
|
|
* gcc.target/aarch64/sme/za_state_5.c: Likewise.
|
|||
|
|
* gcc.target/aarch64/sme/za_state_6.c: Likewise.
|
|||
|
|
* g++.target/aarch64/sme/exceptions_1.C: Likewise.
|
|||
|
|
* gcc.target/aarch64/sme/keyword_macros_1.c: Add ZA macros.
|
|||
|
|
* g++.target/aarch64/sme/keyword_macros_1.C: Likewise.
|
|||
|
|
---
|
|||
|
|
gcc/config/aarch64/aarch64-c.cc | 32 +
|
|||
|
|
gcc/config/aarch64/aarch64-isa-modes.def | 5 +
|
|||
|
|
gcc/config/aarch64/aarch64-protos.h | 5 +
|
|||
|
|
gcc/config/aarch64/aarch64-sme.md | 287 ++++
|
|||
|
|
gcc/config/aarch64/aarch64.cc | 1371 ++++++++++++++++-
|
|||
|
|
gcc/config/aarch64/aarch64.h | 98 +-
|
|||
|
|
gcc/config/aarch64/aarch64.md | 81 +-
|
|||
|
|
gcc/config/aarch64/constraints.md | 6 +
|
|||
|
|
.../g++.target/aarch64/sme/exceptions_1.C | 189 +++
|
|||
|
|
.../g++.target/aarch64/sme/keyword_macros_1.C | 5 +
|
|||
|
|
.../gcc.target/aarch64/sme/keyword_macros_1.c | 5 +
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_1.c | 154 ++
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_2.c | 73 +
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_3.c | 31 +
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_4.c | 585 +++++++
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_5.c | 595 +++++++
|
|||
|
|
.../gcc.target/aarch64/sme/za_state_6.c | 23 +
|
|||
|
|
17 files changed, 3523 insertions(+), 22 deletions(-)
|
|||
|
|
create mode 100644 gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c
|
|||
|
|
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
|||
|
|
index 397745fbd..76c20848f 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64-c.cc
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64-c.cc
|
|||
|
|
@@ -73,6 +73,8 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
|
|||
|
|
|
|||
|
|
builtin_define ("__GCC_ASM_FLAG_OUTPUTS__");
|
|||
|
|
|
|||
|
|
+ builtin_define ("__ARM_STATE_ZA");
|
|||
|
|
+
|
|||
|
|
/* Define keyword attributes like __arm_streaming as macros that expand
|
|||
|
|
to the associated [[...]] attribute. Use __extension__ in the attribute
|
|||
|
|
for C, since the [[...]] syntax was only added in C23. */
|
|||
|
|
@@ -86,6 +88,36 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
|
|||
|
|
DEFINE_ARM_KEYWORD_MACRO ("streaming_compatible");
|
|||
|
|
|
|||
|
|
#undef DEFINE_ARM_KEYWORD_MACRO
|
|||
|
|
+
|
|||
|
|
+ /* Same for the keyword attributes that take arguments. The snag here
|
|||
|
|
+ is that some old modes warn about or reject variadic arguments. */
|
|||
|
|
+ auto *cpp_opts = cpp_get_options (parse_in);
|
|||
|
|
+ if (!cpp_opts->traditional)
|
|||
|
|
+ {
|
|||
|
|
+ auto old_warn_variadic_macros = cpp_opts->warn_variadic_macros;
|
|||
|
|
+ auto old_cpp_warn_c90_c99_compat = cpp_opts->cpp_warn_c90_c99_compat;
|
|||
|
|
+
|
|||
|
|
+ cpp_opts->warn_variadic_macros = false;
|
|||
|
|
+ cpp_opts->cpp_warn_c90_c99_compat = 0;
|
|||
|
|
+
|
|||
|
|
+#define DEFINE_ARM_KEYWORD_MACRO_ARGS(NAME) \
|
|||
|
|
+ builtin_define_with_value ("__arm_" NAME "(...)", \
|
|||
|
|
+ lang_GNU_CXX () \
|
|||
|
|
+ ? "[[arm::" NAME "(__VA_ARGS__)]]" \
|
|||
|
|
+ : "[[__extension__ arm::" NAME \
|
|||
|
|
+ "(__VA_ARGS__)]]", 0);
|
|||
|
|
+
|
|||
|
|
+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("new");
|
|||
|
|
+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("preserves");
|
|||
|
|
+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("in");
|
|||
|
|
+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("out");
|
|||
|
|
+ DEFINE_ARM_KEYWORD_MACRO_ARGS ("inout");
|
|||
|
|
+
|
|||
|
|
+#undef DEFINE_ARM_KEYWORD_MACRO_ARGS
|
|||
|
|
+
|
|||
|
|
+ cpp_opts->warn_variadic_macros = old_warn_variadic_macros;
|
|||
|
|
+ cpp_opts->cpp_warn_c90_c99_compat = old_cpp_warn_c90_c99_compat;
|
|||
|
|
+ }
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Undefine/redefine macros that depend on the current backend state and may
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def
|
|||
|
|
index 5915c98a8..c0ada35bd 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64-isa-modes.def
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64-isa-modes.def
|
|||
|
|
@@ -32,4 +32,9 @@
|
|||
|
|
DEF_AARCH64_ISA_MODE(SM_ON)
|
|||
|
|
DEF_AARCH64_ISA_MODE(SM_OFF)
|
|||
|
|
|
|||
|
|
+/* Indicates that PSTATE.ZA is known to be 1. The converse is that
|
|||
|
|
+ PSTATE.ZA might be 0 or 1, depending on whether there is an uncommitted
|
|||
|
|
+ lazy save. */
|
|||
|
|
+DEF_AARCH64_ISA_MODE(ZA_ON)
|
|||
|
|
+
|
|||
|
|
#undef DEF_AARCH64_ISA_MODE
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
|||
|
|
index 737f47026..0883ddd1a 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64-protos.h
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64-protos.h
|
|||
|
|
@@ -808,6 +808,8 @@ bool aarch64_sve_addvl_addpl_immediate_p (rtx);
|
|||
|
|
bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
|
|||
|
|
int aarch64_add_offset_temporaries (rtx);
|
|||
|
|
void aarch64_split_add_offset (scalar_int_mode, rtx, rtx, rtx, rtx, rtx);
|
|||
|
|
+bool aarch64_rdsvl_immediate_p (const_rtx);
|
|||
|
|
+char *aarch64_output_rdsvl (const_rtx);
|
|||
|
|
bool aarch64_mov_operand_p (rtx, machine_mode);
|
|||
|
|
rtx aarch64_reverse_mask (machine_mode, unsigned int);
|
|||
|
|
bool aarch64_offset_7bit_signed_scaled_p (machine_mode, poly_int64);
|
|||
|
|
@@ -1083,4 +1085,7 @@ extern bool aarch64_harden_sls_blr_p (void);
|
|||
|
|
|
|||
|
|
extern void aarch64_output_patchable_area (unsigned int, bool);
|
|||
|
|
|
|||
|
|
+bool aarch64_optimize_mode_switching (aarch64_mode_entity);
|
|||
|
|
+void aarch64_restore_za (rtx);
|
|||
|
|
+
|
|||
|
|
#endif /* GCC_AARCH64_PROTOS_H */
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
|
|||
|
|
index 52427b4f1..d4973098e 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64-sme.md
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64-sme.md
|
|||
|
|
@@ -23,6 +23,7 @@
|
|||
|
|
;; == State management
|
|||
|
|
;; ---- Test current state
|
|||
|
|
;; ---- PSTATE.SM management
|
|||
|
|
+;; ---- PSTATE.ZA management
|
|||
|
|
|
|||
|
|
;; =========================================================================
|
|||
|
|
;; == State management
|
|||
|
|
@@ -169,3 +170,289 @@
|
|||
|
|
""
|
|||
|
|
"smstop\tsm"
|
|||
|
|
)
|
|||
|
|
+
|
|||
|
|
+;; -------------------------------------------------------------------------
|
|||
|
|
+;; ---- PSTATE.ZA management
|
|||
|
|
+;; -------------------------------------------------------------------------
|
|||
|
|
+;; Includes:
|
|||
|
|
+;; - SMSTART ZA
|
|||
|
|
+;; - SMSTOP ZA
|
|||
|
|
+;; plus calls to support routines.
|
|||
|
|
+;; -------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+(define_c_enum "unspec" [
|
|||
|
|
+ UNSPEC_SMSTOP_ZA
|
|||
|
|
+ UNSPEC_INITIAL_ZERO_ZA
|
|||
|
|
+ UNSPEC_TPIDR2_SAVE
|
|||
|
|
+ UNSPEC_TPIDR2_RESTORE
|
|||
|
|
+ UNSPEC_READ_TPIDR2
|
|||
|
|
+ UNSPEC_WRITE_TPIDR2
|
|||
|
|
+ UNSPEC_SETUP_LOCAL_TPIDR2
|
|||
|
|
+ UNSPEC_RESTORE_ZA
|
|||
|
|
+ UNSPEC_START_PRIVATE_ZA_CALL
|
|||
|
|
+ UNSPEC_END_PRIVATE_ZA_CALL
|
|||
|
|
+ UNSPEC_COMMIT_LAZY_SAVE
|
|||
|
|
+])
|
|||
|
|
+
|
|||
|
|
+(define_c_enum "unspecv" [
|
|||
|
|
+ UNSPECV_ASM_UPDATE_ZA
|
|||
|
|
+])
|
|||
|
|
+
|
|||
|
|
+;; Use the ABI-defined routine to commit an uncommitted lazy save.
|
|||
|
|
+;; This relies on the current PSTATE.ZA, so depends on SME_STATE_REGNUM.
|
|||
|
|
+;; The fake TPIDR2_SETUP_REGNUM register initially holds the incoming
|
|||
|
|
+;; value of the architected TPIDR2_EL0.
|
|||
|
|
+(define_insn "aarch64_tpidr2_save"
|
|||
|
|
+ [(set (reg:DI ZA_FREE_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (reg:DI TPIDR2_SETUP_REGNUM)] UNSPEC_TPIDR2_SAVE))
|
|||
|
|
+ (clobber (reg:DI R14_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R15_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R16_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R17_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R18_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R30_REGNUM))
|
|||
|
|
+ (clobber (reg:CC CC_REGNUM))]
|
|||
|
|
+ ""
|
|||
|
|
+ "bl\t__arm_tpidr2_save"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Set PSTATE.ZA to 1. If ZA was previously dormant or active,
|
|||
|
|
+;; it remains in the same state afterwards, with the same contents.
|
|||
|
|
+;; Otherwise, it goes from off to on with zeroed contents.
|
|||
|
|
+;;
|
|||
|
|
+;; Later writes of TPIDR2_EL0 to a nonzero value must not be moved
|
|||
|
|
+;; up past this instruction, since that could create an invalid
|
|||
|
|
+;; combination of having an active lazy save while ZA is off.
|
|||
|
|
+;; Create an anti-dependence by reading the current contents
|
|||
|
|
+;; of TPIDR2_SETUP_REGNUM.
|
|||
|
|
+;;
|
|||
|
|
+;; Making this depend on ZA_FREE_REGNUM ensures that contents belonging
|
|||
|
|
+;; to the caller have already been saved. That isn't necessary for this
|
|||
|
|
+;; instruction itself, since PSTATE.ZA is already 1 if it contains data.
|
|||
|
|
+;; But doing this here means that other uses of ZA can just depend on
|
|||
|
|
+;; SME_STATE_REGNUM, rather than both SME_STATE_REGNUM and ZA_FREE_REGNUM.
|
|||
|
|
+(define_insn "aarch64_smstart_za"
|
|||
|
|
+ [(set (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (const_int 1))
|
|||
|
|
+ (use (reg:DI TPIDR2_SETUP_REGNUM))
|
|||
|
|
+ (use (reg:DI ZA_FREE_REGNUM))]
|
|||
|
|
+ ""
|
|||
|
|
+ "smstart\tza"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Disable ZA and discard its current contents.
|
|||
|
|
+;;
|
|||
|
|
+;; The ABI says that the ZA save buffer must be null whenever PSTATE.ZA
|
|||
|
|
+;; is zero, so earlier writes to TPIDR2_EL0 must not be moved down past
|
|||
|
|
+;; this instruction. Depend on TPIDR2_SETUP_REGNUM to ensure this.
|
|||
|
|
+;;
|
|||
|
|
+;; We can only turn ZA off once we know that it is free (i.e. doesn't
|
|||
|
|
+;; contain data belonging to the caller). Depend on ZA_FREE_REGNUM
|
|||
|
|
+;; to ensure this.
|
|||
|
|
+;;
|
|||
|
|
+;; We only turn ZA off when the current function's ZA state is dead,
|
|||
|
|
+;; or perhaps if we're sure that the contents are saved. Either way,
|
|||
|
|
+;; we know whether ZA is saved or not.
|
|||
|
|
+(define_insn "aarch64_smstop_za"
|
|||
|
|
+ [(set (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (const_int 0))
|
|||
|
|
+ (set (reg:DI ZA_SAVED_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_SMSTOP_ZA))]
|
|||
|
|
+ ""
|
|||
|
|
+ "smstop\tza"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Zero ZA after committing a lazy save. The sequencing is enforced
|
|||
|
|
+;; by reading ZA_FREE_REGNUM.
|
|||
|
|
+(define_insn "aarch64_initial_zero_za"
|
|||
|
|
+ [(set (reg:DI ZA_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))]
|
|||
|
|
+ ""
|
|||
|
|
+ "zero\t{ za }"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Initialize the abstract TPIDR2_BLOCK_REGNUM from the contents of
|
|||
|
|
+;; the current function's TPIDR2 block. Other instructions can then
|
|||
|
|
+;; depend on TPIDR2_BLOCK_REGNUM rather than on the memory block.
|
|||
|
|
+(define_insn "aarch64_setup_local_tpidr2"
|
|||
|
|
+ [(set (reg:DI TPIDR2_BLOCK_REGNUM)
|
|||
|
|
+ (unspec:DI [(match_operand:V16QI 0 "memory_operand" "m")]
|
|||
|
|
+ UNSPEC_SETUP_LOCAL_TPIDR2))]
|
|||
|
|
+ ""
|
|||
|
|
+ ""
|
|||
|
|
+ [(set_attr "type" "no_insn")]
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Clear TPIDR2_EL0, cancelling any uncommitted lazy save.
|
|||
|
|
+(define_insn "aarch64_clear_tpidr2"
|
|||
|
|
+ [(set (reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (const_int 0))]
|
|||
|
|
+ ""
|
|||
|
|
+ "msr\ttpidr2_el0, xzr"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Point TPIDR2_EL0 to the current function's TPIDR2 block, whose address
|
|||
|
|
+;; is given by operand 0. TPIDR2_BLOCK_REGNUM represents the contents of the
|
|||
|
|
+;; pointed-to block.
|
|||
|
|
+(define_insn "aarch64_write_tpidr2"
|
|||
|
|
+ [(set (reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
|
|||
|
|
+ (reg:DI TPIDR2_BLOCK_REGNUM)] UNSPEC_WRITE_TPIDR2))]
|
|||
|
|
+ ""
|
|||
|
|
+ "msr\ttpidr2_el0, %0"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Check whether ZA has been saved. The system depends on the value that
|
|||
|
|
+;; we wrote to TPIDR2_EL0 previously, so it depends on TPDIR2_SETUP_REGNUM.
|
|||
|
|
+(define_insn "aarch64_read_tpidr2"
|
|||
|
|
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
|||
|
|
+ (unspec:DI [(reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_READ_TPIDR2))]
|
|||
|
|
+ ""
|
|||
|
|
+ "mrs\t%0, tpidr2_el0"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Use the ABI-defined routine to restore lazy-saved ZA contents
|
|||
|
|
+;; from the TPIDR2 block pointed to by X0. ZA must already be active.
|
|||
|
|
+(define_insn "aarch64_tpidr2_restore"
|
|||
|
|
+ [(set (reg:DI ZA_SAVED_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI R0_REGNUM)] UNSPEC_TPIDR2_RESTORE))
|
|||
|
|
+ (set (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
|
|||
|
|
+ (clobber (reg:DI R14_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R15_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R16_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R17_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R18_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R30_REGNUM))
|
|||
|
|
+ (clobber (reg:CC CC_REGNUM))]
|
|||
|
|
+ ""
|
|||
|
|
+ "bl\t__arm_tpidr2_restore"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; Check whether a lazy save set up by aarch64_save_za was committed
|
|||
|
|
+;; and restore the saved contents if so.
|
|||
|
|
+;;
|
|||
|
|
+;; Operand 0 is the address of the current function's TPIDR2 block.
|
|||
|
|
+(define_insn_and_split "aarch64_restore_za"
|
|||
|
|
+ [(set (reg:DI ZA_SAVED_REGNUM)
|
|||
|
|
+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
|
|||
|
|
+ (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
|
|||
|
|
+ (clobber (reg:DI R0_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R14_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R15_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R16_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R17_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R18_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R30_REGNUM))
|
|||
|
|
+ (clobber (reg:CC CC_REGNUM))]
|
|||
|
|
+ ""
|
|||
|
|
+ "#"
|
|||
|
|
+ "&& epilogue_completed"
|
|||
|
|
+ [(const_int 0)]
|
|||
|
|
+ {
|
|||
|
|
+ auto label = gen_label_rtx ();
|
|||
|
|
+ auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
|
|||
|
|
+ emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
|
|||
|
|
+ auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
|
|||
|
|
+ JUMP_LABEL (jump) = label;
|
|||
|
|
+
|
|||
|
|
+ aarch64_restore_za (operands[0]);
|
|||
|
|
+ emit_label (label);
|
|||
|
|
+ DONE;
|
|||
|
|
+ }
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; This instruction is emitted after asms that alter ZA, in order to model
|
|||
|
|
+;; the effect on dataflow. The asm itself can't have ZA as an input or
|
|||
|
|
+;; an output, since there is no associated data type. Instead it retains
|
|||
|
|
+;; the original "za" clobber, which on its own would indicate that ZA
|
|||
|
|
+;; is dead.
|
|||
|
|
+;;
|
|||
|
|
+;; The operand is a unique identifier.
|
|||
|
|
+(define_insn "aarch64_asm_update_za"
|
|||
|
|
+ [(set (reg:VNx16QI ZA_REGNUM)
|
|||
|
|
+ (unspec_volatile:VNx16QI
|
|||
|
|
+ [(reg:VNx16QI ZA_REGNUM)
|
|||
|
|
+ (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (match_operand 0 "const_int_operand")]
|
|||
|
|
+ UNSPECV_ASM_UPDATE_ZA))]
|
|||
|
|
+ ""
|
|||
|
|
+ ""
|
|||
|
|
+ [(set_attr "type" "no_insn")]
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; This pseudo-instruction is emitted as part of a call to a private-ZA
|
|||
|
|
+;; function from a function with ZA state. It marks a natural place to set
|
|||
|
|
+;; up a lazy save, if that turns out to be necessary. The save itself
|
|||
|
|
+;; is managed by the mode-switching pass.
|
|||
|
|
+(define_insn "aarch64_start_private_za_call"
|
|||
|
|
+ [(set (reg:DI LOWERING_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_START_PRIVATE_ZA_CALL))]
|
|||
|
|
+ ""
|
|||
|
|
+ ""
|
|||
|
|
+ [(set_attr "type" "no_insn")]
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; This pseudo-instruction is emitted as part of a call to a private-ZA
|
|||
|
|
+;; function from a function with ZA state. It marks a natural place to restore
|
|||
|
|
+;; the current function's ZA contents from the lazy save buffer, if that
|
|||
|
|
+;; turns out to be necessary. The save itself is managed by the
|
|||
|
|
+;; mode-switching pass.
|
|||
|
|
+(define_insn "aarch64_end_private_za_call"
|
|||
|
|
+ [(set (reg:DI LOWERING_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI LOWERING_REGNUM)] UNSPEC_END_PRIVATE_ZA_CALL))]
|
|||
|
|
+ ""
|
|||
|
|
+ ""
|
|||
|
|
+ [(set_attr "type" "no_insn")]
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+;; This pseudo-instruction is emitted before a private-ZA function uses
|
|||
|
|
+;; PSTATE.ZA state for the first time. The instruction checks whether
|
|||
|
|
+;; ZA currently contains data belonging to a caller and commits the
|
|||
|
|
+;; lazy save if so.
|
|||
|
|
+;;
|
|||
|
|
+;; Operand 0 is the incoming value of TPIDR2_EL0. Operand 1 is nonzero
|
|||
|
|
+;; if ZA is live, and should therefore be zeroed after committing a save.
|
|||
|
|
+;;
|
|||
|
|
+;; The instruction is generated by the mode-switching pass. It is a
|
|||
|
|
+;; define_insn_and_split rather than a define_expand because of the
|
|||
|
|
+;; internal control flow.
|
|||
|
|
+(define_insn_and_split "aarch64_commit_lazy_save"
|
|||
|
|
+ [(set (reg:DI ZA_FREE_REGNUM)
|
|||
|
|
+ (unspec:DI [(match_operand 0 "pmode_register_operand" "r")
|
|||
|
|
+ (match_operand 1 "const_int_operand")
|
|||
|
|
+ (reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (reg:DI TPIDR2_SETUP_REGNUM)
|
|||
|
|
+ (reg:VNx16QI ZA_REGNUM)] UNSPEC_COMMIT_LAZY_SAVE))
|
|||
|
|
+ (set (reg:DI ZA_REGNUM)
|
|||
|
|
+ (unspec:DI [(reg:DI SME_STATE_REGNUM)
|
|||
|
|
+ (reg:DI ZA_FREE_REGNUM)] UNSPEC_INITIAL_ZERO_ZA))
|
|||
|
|
+ (clobber (reg:DI R14_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R15_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R16_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R17_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R18_REGNUM))
|
|||
|
|
+ (clobber (reg:DI R30_REGNUM))
|
|||
|
|
+ (clobber (reg:CC CC_REGNUM))]
|
|||
|
|
+ ""
|
|||
|
|
+ "#"
|
|||
|
|
+ "true"
|
|||
|
|
+ [(const_int 0)]
|
|||
|
|
+ {
|
|||
|
|
+ auto label = gen_label_rtx ();
|
|||
|
|
+ auto jump = emit_jump_insn (gen_aarch64_cbeqdi1 (operands[0], label));
|
|||
|
|
+ JUMP_LABEL (jump) = label;
|
|||
|
|
+ emit_insn (gen_aarch64_tpidr2_save ());
|
|||
|
|
+ emit_insn (gen_aarch64_clear_tpidr2 ());
|
|||
|
|
+ if (INTVAL (operands[1]) != 0)
|
|||
|
|
+ emit_insn (gen_aarch64_initial_zero_za ());
|
|||
|
|
+ emit_label (label);
|
|||
|
|
+ DONE;
|
|||
|
|
+ }
|
|||
|
|
+)
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
|||
|
|
index 82f8e574e..a6e996c5b 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64.cc
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64.cc
|
|||
|
|
@@ -91,6 +91,26 @@
|
|||
|
|
/* Defined for convenience. */
|
|||
|
|
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
|
|||
|
|
|
|||
|
|
+/* Flags that describe how a function shares certain architectural state
|
|||
|
|
+ with its callers.
|
|||
|
|
+
|
|||
|
|
+ - AARCH64_STATE_SHARED indicates that the function does share the state
|
|||
|
|
+ with callers.
|
|||
|
|
+
|
|||
|
|
+ - AARCH64_STATE_IN indicates that the function reads (or might read) the
|
|||
|
|
+ incoming state. The converse is that the function ignores the incoming
|
|||
|
|
+ state.
|
|||
|
|
+
|
|||
|
|
+ - AARCH64_STATE_OUT indicates that the function returns new state.
|
|||
|
|
+ The converse is that the state on return is the same as it was on entry.
|
|||
|
|
+
|
|||
|
|
+ A function that partially modifies the state treats it as both IN
|
|||
|
|
+ and OUT (because the value on return depends to some extent on the
|
|||
|
|
+ value on input). */
|
|||
|
|
+constexpr auto AARCH64_STATE_SHARED = 1U << 0;
|
|||
|
|
+constexpr auto AARCH64_STATE_IN = 1U << 1;
|
|||
|
|
+constexpr auto AARCH64_STATE_OUT = 1U << 2;
|
|||
|
|
+
|
|||
|
|
/* Information about a legitimate vector immediate operand. */
|
|||
|
|
struct simd_immediate_info
|
|||
|
|
{
|
|||
|
|
@@ -2959,6 +2979,151 @@ static const struct processor all_cores[] =
|
|||
|
|
/* The current tuning set. */
|
|||
|
|
struct tune_params aarch64_tune_params = generic_tunings;
|
|||
|
|
|
|||
|
|
+/* If NAME is the name of an arm:: attribute that describes shared state,
|
|||
|
|
+ return its associated AARCH64_STATE_* flags, otherwise return 0. */
|
|||
|
|
+static unsigned int
|
|||
|
|
+aarch64_attribute_shared_state_flags (const char *name)
|
|||
|
|
+{
|
|||
|
|
+ if (strcmp (name, "in") == 0)
|
|||
|
|
+ return AARCH64_STATE_SHARED | AARCH64_STATE_IN;
|
|||
|
|
+ if (strcmp (name, "inout") == 0)
|
|||
|
|
+ return AARCH64_STATE_SHARED | AARCH64_STATE_IN | AARCH64_STATE_OUT;
|
|||
|
|
+ if (strcmp (name, "out") == 0)
|
|||
|
|
+ return AARCH64_STATE_SHARED | AARCH64_STATE_OUT;
|
|||
|
|
+ if (strcmp (name, "preserves") == 0)
|
|||
|
|
+ return AARCH64_STATE_SHARED;
|
|||
|
|
+ return 0;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* See whether attribute list ATTRS has any sharing information
|
|||
|
|
+ for state STATE_NAME. Return the associated state flags if so,
|
|||
|
|
+ otherwise return 0. */
|
|||
|
|
+static unsigned int
|
|||
|
|
+aarch64_lookup_shared_state_flags (tree attrs, const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ for (tree attr = attrs; attr; attr = TREE_CHAIN (attr))
|
|||
|
|
+ {
|
|||
|
|
+ if (!cxx11_attribute_p (attr))
|
|||
|
|
+ continue;
|
|||
|
|
+
|
|||
|
|
+ auto ns = IDENTIFIER_POINTER (TREE_PURPOSE (TREE_PURPOSE (attr)));
|
|||
|
|
+ if (strcmp (ns, "arm") != 0)
|
|||
|
|
+ continue;
|
|||
|
|
+
|
|||
|
|
+ auto attr_name = IDENTIFIER_POINTER (TREE_VALUE (TREE_PURPOSE (attr)));
|
|||
|
|
+ auto flags = aarch64_attribute_shared_state_flags (attr_name);
|
|||
|
|
+ if (!flags)
|
|||
|
|
+ continue;
|
|||
|
|
+
|
|||
|
|
+ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ {
|
|||
|
|
+ tree value = TREE_VALUE (arg);
|
|||
|
|
+ if (TREE_CODE (value) == STRING_CST
|
|||
|
|
+ && strcmp (TREE_STRING_POINTER (value), state_name) == 0)
|
|||
|
|
+ return flags;
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+ return 0;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if DECL creates a new scope for state STATE_STRING. */
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_fndecl_has_new_state (const_tree decl, const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ if (tree attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl)))
|
|||
|
|
+ for (tree arg = TREE_VALUE (attr); arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ {
|
|||
|
|
+ tree value = TREE_VALUE (arg);
|
|||
|
|
+ if (TREE_CODE (value) == STRING_CST
|
|||
|
|
+ && strcmp (TREE_STRING_POINTER (value), state_name) == 0)
|
|||
|
|
+ return true;
|
|||
|
|
+ }
|
|||
|
|
+ return false;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if attribute argument VALUE is a recognized state string,
|
|||
|
|
+ otherwise report an error. NAME is the name of the attribute to which
|
|||
|
|
+ VALUE is being passed. */
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_check_state_string (tree name, tree value)
|
|||
|
|
+{
|
|||
|
|
+ if (TREE_CODE (value) != STRING_CST)
|
|||
|
|
+ {
|
|||
|
|
+ error ("the arguments to %qE must be constant strings", name);
|
|||
|
|
+ return false;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ const char *state_name = TREE_STRING_POINTER (value);
|
|||
|
|
+ if (strcmp (state_name, "za") != 0)
|
|||
|
|
+ {
|
|||
|
|
+ error ("unrecognized state string %qs", state_name);
|
|||
|
|
+ return false;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ return true;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* qsort callback to compare two STRING_CSTs. */
|
|||
|
|
+static int
|
|||
|
|
+cmp_string_csts (const void *a, const void *b)
|
|||
|
|
+{
|
|||
|
|
+ return strcmp (TREE_STRING_POINTER (*(const_tree const *) a),
|
|||
|
|
+ TREE_STRING_POINTER (*(const_tree const *) b));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Canonicalize a list of state strings. ARGS contains the arguments to
|
|||
|
|
+ a new attribute while OLD_ATTR, if nonnull, contains a previous attribute
|
|||
|
|
+ of the same type. If CAN_MERGE_IN_PLACE, it is safe to adjust OLD_ATTR's
|
|||
|
|
+ arguments and drop the new attribute. Otherwise, the new attribute must
|
|||
|
|
+ be kept and ARGS must include the information in OLD_ATTR.
|
|||
|
|
+
|
|||
|
|
+ In both cases, the new arguments must be a sorted list of state strings
|
|||
|
|
+ with duplicates removed.
|
|||
|
|
+
|
|||
|
|
+ Return true if new attribute should be kept, false if it should be
|
|||
|
|
+ dropped. */
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_merge_string_arguments (tree args, tree old_attr,
|
|||
|
|
+ bool can_merge_in_place)
|
|||
|
|
+{
|
|||
|
|
+ /* Get a sorted list of all state strings (including duplicates). */
|
|||
|
|
+ auto add_args = [](vec<tree> &strings, const_tree args)
|
|||
|
|
+ {
|
|||
|
|
+ for (const_tree arg = args; arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ if (TREE_CODE (TREE_VALUE (arg)) == STRING_CST)
|
|||
|
|
+ strings.safe_push (TREE_VALUE (arg));
|
|||
|
|
+ };
|
|||
|
|
+ auto_vec<tree, 16> strings;
|
|||
|
|
+ add_args (strings, args);
|
|||
|
|
+ if (old_attr)
|
|||
|
|
+ add_args (strings, TREE_VALUE (old_attr));
|
|||
|
|
+ strings.qsort (cmp_string_csts);
|
|||
|
|
+
|
|||
|
|
+ /* The list can be empty if there was no previous attribute and if all
|
|||
|
|
+ the new arguments are erroneous. Drop the attribute in that case. */
|
|||
|
|
+ if (strings.is_empty ())
|
|||
|
|
+ return false;
|
|||
|
|
+
|
|||
|
|
+ /* Destructively modify one of the argument lists, removing duplicates
|
|||
|
|
+ on the fly. */
|
|||
|
|
+ bool use_old_attr = old_attr && can_merge_in_place;
|
|||
|
|
+ tree *end = use_old_attr ? &TREE_VALUE (old_attr) : &args;
|
|||
|
|
+ tree prev = NULL_TREE;
|
|||
|
|
+ for (tree arg : strings)
|
|||
|
|
+ {
|
|||
|
|
+ if (prev && simple_cst_equal (arg, prev))
|
|||
|
|
+ continue;
|
|||
|
|
+ prev = arg;
|
|||
|
|
+ if (!*end)
|
|||
|
|
+ *end = tree_cons (NULL_TREE, arg, NULL_TREE);
|
|||
|
|
+ else
|
|||
|
|
+ TREE_VALUE (*end) = arg;
|
|||
|
|
+ end = &TREE_CHAIN (*end);
|
|||
|
|
+ }
|
|||
|
|
+ *end = NULL_TREE;
|
|||
|
|
+ return !use_old_attr;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Check whether an 'aarch64_vector_pcs' attribute is valid. */
|
|||
|
|
|
|||
|
|
static tree
|
|||
|
|
@@ -2987,6 +3152,101 @@ handle_aarch64_vector_pcs_attribute (tree *node, tree name, tree,
|
|||
|
|
gcc_unreachable ();
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return true if arm::new(ARGS) is compatible with the type of decl DECL,
|
|||
|
|
+ otherwise report an error. */
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_check_arm_new_against_type (tree args, tree decl)
|
|||
|
|
+{
|
|||
|
|
+ tree type_attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
|
|||
|
|
+ for (tree arg = args; arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ {
|
|||
|
|
+ tree value = TREE_VALUE (arg);
|
|||
|
|
+ if (TREE_CODE (value) == STRING_CST)
|
|||
|
|
+ {
|
|||
|
|
+ const char *state_name = TREE_STRING_POINTER (value);
|
|||
|
|
+ if (aarch64_lookup_shared_state_flags (type_attrs, state_name))
|
|||
|
|
+ {
|
|||
|
|
+ error_at (DECL_SOURCE_LOCATION (decl),
|
|||
|
|
+ "cannot create a new %qs scope since %qs is shared"
|
|||
|
|
+ " with callers", state_name, state_name);
|
|||
|
|
+ return false;
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+ return true;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Callback for arm::new attributes. */
|
|||
|
|
+static tree
|
|||
|
|
+handle_arm_new (tree *node, tree name, tree args, int, bool *no_add_attrs)
|
|||
|
|
+{
|
|||
|
|
+ tree decl = *node;
|
|||
|
|
+ if (TREE_CODE (decl) != FUNCTION_DECL)
|
|||
|
|
+ {
|
|||
|
|
+ error ("%qE attribute applies only to function definitions", name);
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+ }
|
|||
|
|
+ if (TREE_TYPE (decl) == error_mark_node)
|
|||
|
|
+ {
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ for (tree arg = args; arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ aarch64_check_state_string (name, TREE_VALUE (arg));
|
|||
|
|
+
|
|||
|
|
+ if (!aarch64_check_arm_new_against_type (args, decl))
|
|||
|
|
+ {
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ /* If there is an old attribute, we should try to update it in-place,
|
|||
|
|
+ so that there is only one (definitive) arm::new attribute on the decl. */
|
|||
|
|
+ tree old_attr = lookup_attribute ("arm", "new", DECL_ATTRIBUTES (decl));
|
|||
|
|
+ if (!aarch64_merge_string_arguments (args, old_attr, true))
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Callback for arm::{in,out,inout,preserves} attributes. */
|
|||
|
|
+static tree
|
|||
|
|
+handle_arm_shared (tree *node, tree name, tree args,
|
|||
|
|
+ int, bool *no_add_attrs)
|
|||
|
|
+{
|
|||
|
|
+ tree type = *node;
|
|||
|
|
+ tree old_attrs = TYPE_ATTRIBUTES (type);
|
|||
|
|
+ auto flags = aarch64_attribute_shared_state_flags (IDENTIFIER_POINTER (name));
|
|||
|
|
+ for (tree arg = args; arg; arg = TREE_CHAIN (arg))
|
|||
|
|
+ {
|
|||
|
|
+ tree value = TREE_VALUE (arg);
|
|||
|
|
+ if (aarch64_check_state_string (name, value))
|
|||
|
|
+ {
|
|||
|
|
+ const char *state_name = TREE_STRING_POINTER (value);
|
|||
|
|
+ auto old_flags = aarch64_lookup_shared_state_flags (old_attrs,
|
|||
|
|
+ state_name);
|
|||
|
|
+ if (old_flags && old_flags != flags)
|
|||
|
|
+ {
|
|||
|
|
+ error ("inconsistent attributes for state %qs", state_name);
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ /* We can't update an old attribute in-place, since types are shared.
|
|||
|
|
+ Instead make sure that this new attribute contains all the
|
|||
|
|
+ information, so that the old attribute becomes redundant. */
|
|||
|
|
+ tree old_attr = lookup_attribute ("arm", IDENTIFIER_POINTER (name),
|
|||
|
|
+ old_attrs);
|
|||
|
|
+ if (!aarch64_merge_string_arguments (args, old_attr, false))
|
|||
|
|
+ *no_add_attrs = true;
|
|||
|
|
+
|
|||
|
|
+ return NULL_TREE;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Mutually-exclusive function type attributes for controlling PSTATE.SM. */
|
|||
|
|
static const struct attribute_spec::exclusions attr_streaming_exclusions[] =
|
|||
|
|
{
|
|||
|
|
@@ -3023,6 +3283,16 @@ static const attribute_spec aarch64_arm_attributes[] =
|
|||
|
|
NULL, attr_streaming_exclusions },
|
|||
|
|
{ "streaming_compatible", 0, 0, false, true, true, true,
|
|||
|
|
NULL, attr_streaming_exclusions },
|
|||
|
|
+ { "new", 1, -1, true, false, false, false,
|
|||
|
|
+ handle_arm_new, NULL },
|
|||
|
|
+ { "preserves", 1, -1, false, true, true, true,
|
|||
|
|
+ handle_arm_shared, NULL },
|
|||
|
|
+ { "in", 1, -1, false, true, true, true,
|
|||
|
|
+ handle_arm_shared, NULL },
|
|||
|
|
+ { "out", 1, -1, false, true, true, true,
|
|||
|
|
+ handle_arm_shared, NULL },
|
|||
|
|
+ { "inout", 1, -1, false, true, true, true,
|
|||
|
|
+ handle_arm_shared, NULL }
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
static const scoped_attribute_specs aarch64_arm_attribute_table =
|
|||
|
|
@@ -4202,6 +4472,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
|
|||
|
|
case PR_HI_REGS:
|
|||
|
|
case FFR_REGS:
|
|||
|
|
case PR_AND_FFR_REGS:
|
|||
|
|
+ case FAKE_REGS:
|
|||
|
|
return 1;
|
|||
|
|
default:
|
|||
|
|
return CEIL (lowest_size, UNITS_PER_WORD);
|
|||
|
|
@@ -4232,6 +4503,10 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
|
|||
|
|
if (pr_or_ffr_regnum_p (regno))
|
|||
|
|
return false;
|
|||
|
|
|
|||
|
|
+ /* These registers are abstract; their modes don't matter. */
|
|||
|
|
+ if (FAKE_REGNUM_P (regno))
|
|||
|
|
+ return true;
|
|||
|
|
+
|
|||
|
|
if (regno == SP_REGNUM)
|
|||
|
|
/* The purpose of comparing with ptr_mode is to support the
|
|||
|
|
global register variable associated with the stack pointer
|
|||
|
|
@@ -4352,12 +4627,34 @@ aarch64_fntype_pstate_sm (const_tree fntype)
|
|||
|
|
return AARCH64_FL_SM_OFF;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return state flags that describe whether and how functions of type
|
|||
|
|
+ FNTYPE share state STATE_NAME with their callers. */
|
|||
|
|
+
|
|||
|
|
+static unsigned int
|
|||
|
|
+aarch64_fntype_shared_flags (const_tree fntype, const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ return aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (fntype),
|
|||
|
|
+ state_name);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return the state of PSTATE.ZA on entry to functions of type FNTYPE. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_feature_flags
|
|||
|
|
+aarch64_fntype_pstate_za (const_tree fntype)
|
|||
|
|
+{
|
|||
|
|
+ if (aarch64_fntype_shared_flags (fntype, "za"))
|
|||
|
|
+ return AARCH64_FL_ZA_ON;
|
|||
|
|
+
|
|||
|
|
+ return 0;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Return the ISA mode on entry to functions of type FNTYPE. */
|
|||
|
|
|
|||
|
|
static aarch64_feature_flags
|
|||
|
|
aarch64_fntype_isa_mode (const_tree fntype)
|
|||
|
|
{
|
|||
|
|
- return aarch64_fntype_pstate_sm (fntype);
|
|||
|
|
+ return (aarch64_fntype_pstate_sm (fntype)
|
|||
|
|
+ | aarch64_fntype_pstate_za (fntype));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Return the state of PSTATE.SM when compiling the body of
|
|||
|
|
@@ -4370,13 +4667,37 @@ aarch64_fndecl_pstate_sm (const_tree fndecl)
|
|||
|
|
return aarch64_fntype_pstate_sm (TREE_TYPE (fndecl));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return true if function FNDECL has state STATE_NAME, either by creating
|
|||
|
|
+ new state itself or by sharing state with callers. */
|
|||
|
|
+
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_fndecl_has_state (tree fndecl, const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ return (aarch64_fndecl_has_new_state (fndecl, state_name)
|
|||
|
|
+ || aarch64_fntype_shared_flags (TREE_TYPE (fndecl),
|
|||
|
|
+ state_name) != 0);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return the state of PSTATE.ZA when compiling the body of function FNDECL.
|
|||
|
|
+ This might be different from the state of PSTATE.ZA on entry. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_feature_flags
|
|||
|
|
+aarch64_fndecl_pstate_za (const_tree fndecl)
|
|||
|
|
+{
|
|||
|
|
+ if (aarch64_fndecl_has_new_state (fndecl, "za"))
|
|||
|
|
+ return AARCH64_FL_ZA_ON;
|
|||
|
|
+
|
|||
|
|
+ return aarch64_fntype_pstate_za (TREE_TYPE (fndecl));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Return the ISA mode that should be used to compile the body of
|
|||
|
|
function FNDECL. */
|
|||
|
|
|
|||
|
|
static aarch64_feature_flags
|
|||
|
|
aarch64_fndecl_isa_mode (const_tree fndecl)
|
|||
|
|
{
|
|||
|
|
- return aarch64_fndecl_pstate_sm (fndecl);
|
|||
|
|
+ return (aarch64_fndecl_pstate_sm (fndecl)
|
|||
|
|
+ | aarch64_fndecl_pstate_za (fndecl));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Return the state of PSTATE.SM on entry to the current function.
|
|||
|
|
@@ -4389,6 +4710,44 @@ aarch64_cfun_incoming_pstate_sm ()
|
|||
|
|
return aarch64_fntype_pstate_sm (TREE_TYPE (cfun->decl));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return the state of PSTATE.ZA on entry to the current function.
|
|||
|
|
+ This might be different from the state of PSTATE.ZA in the function
|
|||
|
|
+ body. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_feature_flags
|
|||
|
|
+aarch64_cfun_incoming_pstate_za ()
|
|||
|
|
+{
|
|||
|
|
+ return aarch64_fntype_pstate_za (TREE_TYPE (cfun->decl));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return state flags that describe whether and how the current function shares
|
|||
|
|
+ state STATE_NAME with callers. */
|
|||
|
|
+
|
|||
|
|
+static unsigned int
|
|||
|
|
+aarch64_cfun_shared_flags (const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ return aarch64_fntype_shared_flags (TREE_TYPE (cfun->decl), state_name);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if the current function creates new state of type STATE_NAME
|
|||
|
|
+ (as opposed to sharing the state with its callers or ignoring the state
|
|||
|
|
+ altogether). */
|
|||
|
|
+
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_cfun_has_new_state (const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ return aarch64_fndecl_has_new_state (cfun->decl, state_name);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if the current function has state STATE_NAME, either by
|
|||
|
|
+ creating new state itself or by sharing state with callers. */
|
|||
|
|
+
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_cfun_has_state (const char *state_name)
|
|||
|
|
+{
|
|||
|
|
+ return aarch64_fndecl_has_state (cfun->decl, state_name);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Return true if a call from the current function to a function with
|
|||
|
|
ISA mode CALLEE_MODE would involve a change to PSTATE.SM around
|
|||
|
|
the BL instruction. */
|
|||
|
|
@@ -5952,6 +6311,74 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
|
|||
|
|
factor, nelts_per_vq);
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return a constant that represents FACTOR multiplied by the
|
|||
|
|
+ number of 128-bit quadwords in an SME vector. ISA_MODE is the
|
|||
|
|
+ ISA mode in which the calculation is being performed. */
|
|||
|
|
+
|
|||
|
|
+static rtx
|
|||
|
|
+aarch64_sme_vq_immediate (machine_mode mode, HOST_WIDE_INT factor,
|
|||
|
|
+ aarch64_feature_flags isa_mode)
|
|||
|
|
+{
|
|||
|
|
+ gcc_assert (aarch64_sve_rdvl_factor_p (factor));
|
|||
|
|
+ if (isa_mode & AARCH64_FL_SM_ON)
|
|||
|
|
+ /* We're in streaming mode, so we can use normal poly-int values. */
|
|||
|
|
+ return gen_int_mode ({ factor, factor }, mode);
|
|||
|
|
+
|
|||
|
|
+ rtvec vec = gen_rtvec (1, gen_int_mode (factor, SImode));
|
|||
|
|
+ rtx unspec = gen_rtx_UNSPEC (mode, vec, UNSPEC_SME_VQ);
|
|||
|
|
+ return gen_rtx_CONST (mode, unspec);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if X is a constant that represents some number X
|
|||
|
|
+ multiplied by the number of quadwords in an SME vector. Store this X
|
|||
|
|
+ in *FACTOR if so. */
|
|||
|
|
+
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_sme_vq_unspec_p (const_rtx x, HOST_WIDE_INT *factor)
|
|||
|
|
+{
|
|||
|
|
+ if (!TARGET_SME || GET_CODE (x) != CONST)
|
|||
|
|
+ return false;
|
|||
|
|
+
|
|||
|
|
+ x = XEXP (x, 0);
|
|||
|
|
+ if (GET_CODE (x) != UNSPEC
|
|||
|
|
+ || XINT (x, 1) != UNSPEC_SME_VQ
|
|||
|
|
+ || XVECLEN (x, 0) != 1)
|
|||
|
|
+ return false;
|
|||
|
|
+
|
|||
|
|
+ x = XVECEXP (x, 0, 0);
|
|||
|
|
+ if (!CONST_INT_P (x))
|
|||
|
|
+ return false;
|
|||
|
|
+
|
|||
|
|
+ *factor = INTVAL (x);
|
|||
|
|
+ return true;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if X is a constant that represents some number Y
|
|||
|
|
+ multiplied by the number of quadwords in an SME vector, and if
|
|||
|
|
+ that Y is in the range of RDSVL. */
|
|||
|
|
+
|
|||
|
|
+bool
|
|||
|
|
+aarch64_rdsvl_immediate_p (const_rtx x)
|
|||
|
|
+{
|
|||
|
|
+ HOST_WIDE_INT factor;
|
|||
|
|
+ return (aarch64_sme_vq_unspec_p (x, &factor)
|
|||
|
|
+ && aarch64_sve_rdvl_factor_p (factor));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return the asm string for an RDSVL instruction that calculates X,
|
|||
|
|
+ which is a constant that satisfies aarch64_rdsvl_immediate_p. */
|
|||
|
|
+
|
|||
|
|
+char *
|
|||
|
|
+aarch64_output_rdsvl (const_rtx x)
|
|||
|
|
+{
|
|||
|
|
+ gcc_assert (aarch64_rdsvl_immediate_p (x));
|
|||
|
|
+ static char buffer[sizeof ("rdsvl\t%x0, #-") + 3 * sizeof (int)];
|
|||
|
|
+ x = XVECEXP (XEXP (x, 0), 0, 0);
|
|||
|
|
+ snprintf (buffer, sizeof (buffer), "rdsvl\t%%x0, #%d",
|
|||
|
|
+ (int) INTVAL (x) / 16);
|
|||
|
|
+ return buffer;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
|
|||
|
|
|
|||
|
|
static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
|
|||
|
|
@@ -7717,6 +8144,15 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+ if (aarch64_rdsvl_immediate_p (base))
|
|||
|
|
+ {
|
|||
|
|
+ /* We could handle non-constant offsets if they are ever
|
|||
|
|
+ generated. */
|
|||
|
|
+ gcc_assert (const_offset == 0);
|
|||
|
|
+ emit_insn (gen_rtx_SET (dest, imm));
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
sty = aarch64_classify_symbol (base, const_offset);
|
|||
|
|
switch (sty)
|
|||
|
|
{
|
|||
|
|
@@ -8732,8 +9168,10 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
|
|||
|
|
rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode,
|
|||
|
|
pcum->pcs_variant);
|
|||
|
|
rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum);
|
|||
|
|
- return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, abi_cookie,
|
|||
|
|
- sme_mode_switch_args));
|
|||
|
|
+ rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode);
|
|||
|
|
+ return gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, abi_cookie,
|
|||
|
|
+ sme_mode_switch_args,
|
|||
|
|
+ shared_za_flags));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
aarch64_layout_arg (pcum_v, arg);
|
|||
|
|
@@ -8744,7 +9182,7 @@ void
|
|||
|
|
aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
|
|||
|
|
const_tree fntype,
|
|||
|
|
rtx libname ATTRIBUTE_UNUSED,
|
|||
|
|
- const_tree fndecl ATTRIBUTE_UNUSED,
|
|||
|
|
+ const_tree fndecl,
|
|||
|
|
unsigned n_named ATTRIBUTE_UNUSED,
|
|||
|
|
bool silent_p)
|
|||
|
|
{
|
|||
|
|
@@ -8769,6 +9207,8 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
|
|||
|
|
pcum->aapcs_stack_words = 0;
|
|||
|
|
pcum->aapcs_stack_size = 0;
|
|||
|
|
pcum->silent_p = silent_p;
|
|||
|
|
+ pcum->shared_za_flags
|
|||
|
|
+ = (fntype ? aarch64_fntype_shared_flags (fntype, "za") : 0U);
|
|||
|
|
pcum->num_sme_mode_switch_args = 0;
|
|||
|
|
|
|||
|
|
if (!silent_p
|
|||
|
|
@@ -10803,14 +11243,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Implement TARGET_EXTRA_LIVE_ON_ENTRY. */
|
|||
|
|
+
|
|||
|
|
+void
|
|||
|
|
+aarch64_extra_live_on_entry (bitmap regs)
|
|||
|
|
+{
|
|||
|
|
+ if (TARGET_ZA)
|
|||
|
|
+ {
|
|||
|
|
+ bitmap_set_bit (regs, LOWERING_REGNUM);
|
|||
|
|
+ bitmap_set_bit (regs, SME_STATE_REGNUM);
|
|||
|
|
+ bitmap_set_bit (regs, TPIDR2_SETUP_REGNUM);
|
|||
|
|
+ bitmap_set_bit (regs, ZA_FREE_REGNUM);
|
|||
|
|
+ bitmap_set_bit (regs, ZA_SAVED_REGNUM);
|
|||
|
|
+
|
|||
|
|
+ /* The only time ZA can't have live contents on entry is when
|
|||
|
|
+ the function explicitly treats it as a pure output. */
|
|||
|
|
+ auto za_flags = aarch64_cfun_shared_flags ("za");
|
|||
|
|
+ if (za_flags != (AARCH64_STATE_SHARED | AARCH64_STATE_OUT))
|
|||
|
|
+ bitmap_set_bit (regs, ZA_REGNUM);
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Return 1 if the register is used by the epilogue. We need to say the
|
|||
|
|
return register is used, but only after epilogue generation is complete.
|
|||
|
|
Note that in the case of sibcalls, the values "used by the epilogue" are
|
|||
|
|
- considered live at the start of the called function.
|
|||
|
|
-
|
|||
|
|
- For SIMD functions we need to return 1 for FP registers that are saved and
|
|||
|
|
- restored by a function but are not zero in call_used_regs. If we do not do
|
|||
|
|
- this optimizations may remove the restore of the register. */
|
|||
|
|
+ considered live at the start of the called function. */
|
|||
|
|
|
|||
|
|
int
|
|||
|
|
aarch64_epilogue_uses (int regno)
|
|||
|
|
@@ -10820,6 +11277,18 @@ aarch64_epilogue_uses (int regno)
|
|||
|
|
if (regno == LR_REGNUM)
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
+ if (regno == LOWERING_REGNUM && TARGET_ZA)
|
|||
|
|
+ return 1;
|
|||
|
|
+ if (regno == SME_STATE_REGNUM && TARGET_ZA)
|
|||
|
|
+ return 1;
|
|||
|
|
+ if (regno == TPIDR2_SETUP_REGNUM && TARGET_ZA)
|
|||
|
|
+ return 1;
|
|||
|
|
+ /* If the function shares SME state with its caller, ensure that that
|
|||
|
|
+ data is not in the lazy save buffer on exit. */
|
|||
|
|
+ if (regno == ZA_SAVED_REGNUM && aarch64_cfun_incoming_pstate_za () != 0)
|
|||
|
|
+ return 1;
|
|||
|
|
+ if (regno == ZA_REGNUM && aarch64_cfun_shared_flags ("za") != 0)
|
|||
|
|
+ return 1;
|
|||
|
|
return 0;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@@ -11501,8 +11970,10 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
|
|||
|
|
|
|||
|
|
/* There's no way to calculate VL-based values using relocations. */
|
|||
|
|
subrtx_iterator::array_type array;
|
|||
|
|
+ HOST_WIDE_INT factor;
|
|||
|
|
FOR_EACH_SUBRTX (iter, array, x, ALL)
|
|||
|
|
- if (GET_CODE (*iter) == CONST_POLY_INT)
|
|||
|
|
+ if (GET_CODE (*iter) == CONST_POLY_INT
|
|||
|
|
+ || aarch64_sme_vq_unspec_p (x, &factor))
|
|||
|
|
return true;
|
|||
|
|
|
|||
|
|
poly_int64 offset;
|
|||
|
|
@@ -12364,6 +12835,72 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
|
|||
|
|
return true;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Return a fresh memory reference to the current function's TPIDR2 block,
|
|||
|
|
+ creating a block if necessary. */
|
|||
|
|
+
|
|||
|
|
+static rtx
|
|||
|
|
+aarch64_get_tpidr2_block ()
|
|||
|
|
+{
|
|||
|
|
+ if (!cfun->machine->tpidr2_block)
|
|||
|
|
+ /* The TPIDR2 block is 16 bytes in size and must be aligned to a 128-bit
|
|||
|
|
+ boundary. */
|
|||
|
|
+ cfun->machine->tpidr2_block = assign_stack_local (V16QImode, 16, 128);
|
|||
|
|
+ return copy_rtx (cfun->machine->tpidr2_block);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return a fresh register that points to the current function's
|
|||
|
|
+ TPIDR2 block, creating a block if necessary. */
|
|||
|
|
+
|
|||
|
|
+static rtx
|
|||
|
|
+aarch64_get_tpidr2_ptr ()
|
|||
|
|
+{
|
|||
|
|
+ rtx block = aarch64_get_tpidr2_block ();
|
|||
|
|
+ return force_reg (Pmode, XEXP (block, 0));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Emit instructions to allocate a ZA lazy save buffer and initialize the
|
|||
|
|
+ current function's TPIDR2 block. */
|
|||
|
|
+
|
|||
|
|
+static void
|
|||
|
|
+aarch64_init_tpidr2_block ()
|
|||
|
|
+{
|
|||
|
|
+ rtx block = aarch64_get_tpidr2_block ();
|
|||
|
|
+
|
|||
|
|
+ /* The ZA save buffer is SVL.B*SVL.B bytes in size. */
|
|||
|
|
+ rtx svl_bytes = aarch64_sme_vq_immediate (Pmode, 16, AARCH64_ISA_MODE);
|
|||
|
|
+ rtx svl_bytes_reg = force_reg (DImode, svl_bytes);
|
|||
|
|
+ rtx za_size = expand_simple_binop (Pmode, MULT, svl_bytes_reg,
|
|||
|
|
+ svl_bytes_reg, NULL, 0, OPTAB_LIB_WIDEN);
|
|||
|
|
+ rtx za_save_buffer = allocate_dynamic_stack_space (za_size, 128,
|
|||
|
|
+ BITS_PER_UNIT, -1, true);
|
|||
|
|
+ za_save_buffer = force_reg (Pmode, za_save_buffer);
|
|||
|
|
+ cfun->machine->za_save_buffer = za_save_buffer;
|
|||
|
|
+
|
|||
|
|
+ /* The first word of the block points to the save buffer and the second
|
|||
|
|
+ word is the number of ZA slices to save. */
|
|||
|
|
+ rtx block_0 = adjust_address (block, DImode, 0);
|
|||
|
|
+ rtx block_8 = adjust_address (block, DImode, 8);
|
|||
|
|
+ emit_insn (gen_store_pair_dw_didi (block_0, za_save_buffer,
|
|||
|
|
+ block_8, svl_bytes_reg));
|
|||
|
|
+
|
|||
|
|
+ if (!memory_operand (block, V16QImode))
|
|||
|
|
+ block = replace_equiv_address (block, force_reg (Pmode, XEXP (block, 0)));
|
|||
|
|
+ emit_insn (gen_aarch64_setup_local_tpidr2 (block));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Restore the contents of ZA from the lazy save buffer, given that
|
|||
|
|
+ register TPIDR2_BLOCK points to the current function's TPIDR2 block.
|
|||
|
|
+ PSTATE.ZA is known to be 0 and TPIDR2_EL0 is known to be null. */
|
|||
|
|
+
|
|||
|
|
+void
|
|||
|
|
+aarch64_restore_za (rtx tpidr2_block)
|
|||
|
|
+{
|
|||
|
|
+ emit_insn (gen_aarch64_smstart_za ());
|
|||
|
|
+ if (REGNO (tpidr2_block) != R0_REGNUM)
|
|||
|
|
+ emit_move_insn (gen_rtx_REG (Pmode, R0_REGNUM), tpidr2_block);
|
|||
|
|
+ emit_insn (gen_aarch64_tpidr2_restore ());
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Implement TARGET_START_CALL_ARGS. */
|
|||
|
|
|
|||
|
|
static void
|
|||
|
|
@@ -12379,6 +12916,20 @@ aarch64_start_call_args (cumulative_args_t ca_v)
|
|||
|
|
" option %<-march%>, or by using the %<target%>"
|
|||
|
|
" attribute or pragma", "sme");
|
|||
|
|
}
|
|||
|
|
+
|
|||
|
|
+ if ((ca->shared_za_flags & (AARCH64_STATE_IN | AARCH64_STATE_OUT))
|
|||
|
|
+ && !aarch64_cfun_has_state ("za"))
|
|||
|
|
+ error ("call to a function that shares %qs state from a function"
|
|||
|
|
+ " that has no %qs state", "za", "za");
|
|||
|
|
+ else if (!TARGET_ZA && (ca->isa_mode & AARCH64_FL_ZA_ON))
|
|||
|
|
+ error ("call to a function that shares SME state from a function"
|
|||
|
|
+ " that has no SME state");
|
|||
|
|
+
|
|||
|
|
+ /* If this is a call to a private ZA function, emit a marker to
|
|||
|
|
+ indicate where any necessary set-up code could be inserted.
|
|||
|
|
+ The code itself is inserted by the mode-switching pass. */
|
|||
|
|
+ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
|
|||
|
|
+ emit_insn (gen_aarch64_start_private_za_call ());
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* This function is used by the call expanders of the machine description.
|
|||
|
|
@@ -12391,6 +12942,8 @@ aarch64_start_call_args (cumulative_args_t ca_v)
|
|||
|
|
The second element is a PARALLEL that lists all the argument
|
|||
|
|
registers that need to be saved and restored around a change
|
|||
|
|
in PSTATE.SM, or const0_rtx if no such switch is needed.
|
|||
|
|
+ The third element is a const_int that contains the sharing flags
|
|||
|
|
+ for ZA.
|
|||
|
|
SIBCALL indicates whether this function call is normal call or sibling call.
|
|||
|
|
It will generate different pattern accordingly. */
|
|||
|
|
|
|||
|
|
@@ -12403,10 +12956,12 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
|
|||
|
|
|
|||
|
|
rtx callee_abi = cookie;
|
|||
|
|
rtx sme_mode_switch_args = const0_rtx;
|
|||
|
|
+ unsigned int shared_za_flags = 0;
|
|||
|
|
if (GET_CODE (cookie) == PARALLEL)
|
|||
|
|
{
|
|||
|
|
callee_abi = XVECEXP (cookie, 0, 0);
|
|||
|
|
sme_mode_switch_args = XVECEXP (cookie, 0, 1);
|
|||
|
|
+ shared_za_flags = INTVAL (XVECEXP (cookie, 0, 2));
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
gcc_assert (CONST_INT_P (callee_abi));
|
|||
|
|
@@ -12426,6 +12981,41 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
|
|||
|
|
: !REG_P (callee))
|
|||
|
|
XEXP (mem, 0) = force_reg (mode, callee);
|
|||
|
|
|
|||
|
|
+ /* Accumulate the return values, including state that is shared via
|
|||
|
|
+ attributes. */
|
|||
|
|
+ auto_vec<rtx, 8> return_values;
|
|||
|
|
+ if (result)
|
|||
|
|
+ {
|
|||
|
|
+ if (GET_CODE (result) == PARALLEL)
|
|||
|
|
+ for (int i = 0; i < XVECLEN (result, 0); ++i)
|
|||
|
|
+ return_values.safe_push (XVECEXP (result, 0, i));
|
|||
|
|
+ else
|
|||
|
|
+ return_values.safe_push (result);
|
|||
|
|
+ }
|
|||
|
|
+ unsigned int orig_num_return_values = return_values.length ();
|
|||
|
|
+ if (shared_za_flags & AARCH64_STATE_OUT)
|
|||
|
|
+ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_REGNUM));
|
|||
|
|
+ /* When calling private-ZA functions from functions with ZA state,
|
|||
|
|
+ we want to know whether the call committed a lazy save. */
|
|||
|
|
+ if (TARGET_ZA && !shared_za_flags)
|
|||
|
|
+ return_values.safe_push (gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM));
|
|||
|
|
+
|
|||
|
|
+ /* Create the new return value, if necessary. */
|
|||
|
|
+ if (orig_num_return_values != return_values.length ())
|
|||
|
|
+ {
|
|||
|
|
+ if (return_values.length () == 1)
|
|||
|
|
+ result = return_values[0];
|
|||
|
|
+ else
|
|||
|
|
+ {
|
|||
|
|
+ for (rtx &x : return_values)
|
|||
|
|
+ if (GET_CODE (x) != EXPR_LIST)
|
|||
|
|
+ x = gen_rtx_EXPR_LIST (VOIDmode, x, const0_rtx);
|
|||
|
|
+ rtvec v = gen_rtvec_v (return_values.length (),
|
|||
|
|
+ return_values.address ());
|
|||
|
|
+ result = gen_rtx_PARALLEL (VOIDmode, v);
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
call = gen_rtx_CALL (VOIDmode, mem, const0_rtx);
|
|||
|
|
|
|||
|
|
if (result != NULL_RTX)
|
|||
|
|
@@ -12492,6 +13082,50 @@ aarch64_expand_call (rtx result, rtx mem, rtx cookie, bool sibcall)
|
|||
|
|
|
|||
|
|
cfun->machine->call_switches_pstate_sm = true;
|
|||
|
|
}
|
|||
|
|
+
|
|||
|
|
+ /* Add any ZA-related information.
|
|||
|
|
+ ZA_REGNUM represents the current function's ZA state, rather than
|
|||
|
|
+ the contents of the ZA register itself. We ensure that the function's
|
|||
|
|
+ ZA state is preserved by private-ZA call sequences, so the call itself
|
|||
|
|
+ does not use or clobber ZA_REGNUM. */
|
|||
|
|
+ if (TARGET_ZA)
|
|||
|
|
+ {
|
|||
|
|
+ /* The callee requires ZA to be active if the callee is shared-ZA,
|
|||
|
|
+ otherwise it requires ZA to be dormant or off. The state of ZA is
|
|||
|
|
+ captured by a combination of SME_STATE_REGNUM, TPIDR2_SETUP_REGNUM,
|
|||
|
|
+ and ZA_SAVED_REGNUM. */
|
|||
|
|
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
|
|||
|
|
+ gen_rtx_REG (DImode, SME_STATE_REGNUM));
|
|||
|
|
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
|
|||
|
|
+ gen_rtx_REG (DImode, TPIDR2_SETUP_REGNUM));
|
|||
|
|
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
|
|||
|
|
+ gen_rtx_REG (VNx16BImode, ZA_SAVED_REGNUM));
|
|||
|
|
+
|
|||
|
|
+ /* Keep the aarch64_start/end_private_za_call markers live. */
|
|||
|
|
+ if (!(callee_isa_mode & AARCH64_FL_ZA_ON))
|
|||
|
|
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
|
|||
|
|
+ gen_rtx_REG (VNx16BImode, LOWERING_REGNUM));
|
|||
|
|
+
|
|||
|
|
+ /* If the callee is a shared-ZA function, record whether it uses the
|
|||
|
|
+ current value of ZA. */
|
|||
|
|
+ if (shared_za_flags & AARCH64_STATE_IN)
|
|||
|
|
+ use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
|
|||
|
|
+ gen_rtx_REG (VNx16BImode, ZA_REGNUM));
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_END_CALL_ARGS. */
|
|||
|
|
+
|
|||
|
|
+static void
|
|||
|
|
+aarch64_end_call_args (cumulative_args_t ca_v)
|
|||
|
|
+{
|
|||
|
|
+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
|
|||
|
|
+
|
|||
|
|
+ /* If this is a call to a private ZA function, emit a marker to
|
|||
|
|
+ indicate where any necessary restoration code could be inserted.
|
|||
|
|
+ The code itself is inserted by the mode-switching pass. */
|
|||
|
|
+ if (TARGET_ZA && !(ca->isa_mode & AARCH64_FL_ZA_ON))
|
|||
|
|
+ emit_insn (gen_aarch64_end_private_za_call ());
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Emit call insn with PAT and do aarch64-specific handling. */
|
|||
|
|
@@ -13602,6 +14236,9 @@ aarch64_regno_regclass (unsigned regno)
|
|||
|
|
if (regno == FFR_REGNUM || regno == FFRT_REGNUM)
|
|||
|
|
return FFR_REGS;
|
|||
|
|
|
|||
|
|
+ if (FAKE_REGNUM_P (regno))
|
|||
|
|
+ return FAKE_REGS;
|
|||
|
|
+
|
|||
|
|
return NO_REGS;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@@ -13957,12 +14594,14 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
|
|||
|
|
return (vec_flags & VEC_ADVSIMD
|
|||
|
|
? CEIL (lowest_size, UNITS_PER_VREG)
|
|||
|
|
: CEIL (lowest_size, UNITS_PER_WORD));
|
|||
|
|
+
|
|||
|
|
case STACK_REG:
|
|||
|
|
case PR_REGS:
|
|||
|
|
case PR_LO_REGS:
|
|||
|
|
case PR_HI_REGS:
|
|||
|
|
case FFR_REGS:
|
|||
|
|
case PR_AND_FFR_REGS:
|
|||
|
|
+ case FAKE_REGS:
|
|||
|
|
return 1;
|
|||
|
|
|
|||
|
|
case NO_REGS:
|
|||
|
|
@@ -19002,10 +19641,14 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
|||
|
|
&& !fixed_regs[R18_REGNUM])
|
|||
|
|
error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
|
|||
|
|
|
|||
|
|
- if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
|
|||
|
|
+ if ((opts->x_aarch64_isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
|
|||
|
|
&& !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
|
|||
|
|
{
|
|||
|
|
- error ("streaming functions require the ISA extension %qs", "sme");
|
|||
|
|
+ if (opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
|
|||
|
|
+ error ("streaming functions require the ISA extension %qs", "sme");
|
|||
|
|
+ else
|
|||
|
|
+ error ("functions with SME state require the ISA extension %qs",
|
|||
|
|
+ "sme");
|
|||
|
|
inform (input_location, "you can enable %qs using the command-line"
|
|||
|
|
" option %<-march%>, or by using the %<target%>"
|
|||
|
|
" attribute or pragma", "sme");
|
|||
|
|
@@ -21341,6 +21984,8 @@ aarch64_conditional_register_usage (void)
|
|||
|
|
CLEAR_HARD_REG_BIT (operand_reg_set, VG_REGNUM);
|
|||
|
|
CLEAR_HARD_REG_BIT (operand_reg_set, FFR_REGNUM);
|
|||
|
|
CLEAR_HARD_REG_BIT (operand_reg_set, FFRT_REGNUM);
|
|||
|
|
+ for (int i = FIRST_FAKE_REGNUM; i <= LAST_FAKE_REGNUM; ++i)
|
|||
|
|
+ CLEAR_HARD_REG_BIT (operand_reg_set, i);
|
|||
|
|
|
|||
|
|
/* When tracking speculation, we need a couple of call-clobbered registers
|
|||
|
|
to track the speculation state. It would be nice to just use
|
|||
|
|
@@ -22795,6 +23440,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
|
|||
|
|
|| aarch64_sve_rdvl_immediate_p (x)))
|
|||
|
|
return true;
|
|||
|
|
|
|||
|
|
+ if (aarch64_rdsvl_immediate_p (x))
|
|||
|
|
+ return true;
|
|||
|
|
+
|
|||
|
|
return aarch64_classify_symbolic_expression (x)
|
|||
|
|
== SYMBOL_TINY_ABSOLUTE;
|
|||
|
|
}
|
|||
|
|
@@ -28266,9 +28914,45 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2)
|
|||
|
|
return 0;
|
|||
|
|
if (!check_attr ("arm", "streaming_compatible"))
|
|||
|
|
return 0;
|
|||
|
|
+ if (aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type1), "za")
|
|||
|
|
+ != aarch64_lookup_shared_state_flags (TYPE_ATTRIBUTES (type2), "za"))
|
|||
|
|
+ return 0;
|
|||
|
|
return 1;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Implement TARGET_MERGE_DECL_ATTRIBUTES. */
|
|||
|
|
+
|
|||
|
|
+static tree
|
|||
|
|
+aarch64_merge_decl_attributes (tree olddecl, tree newdecl)
|
|||
|
|
+{
|
|||
|
|
+ tree old_attrs = DECL_ATTRIBUTES (olddecl);
|
|||
|
|
+ tree old_new = lookup_attribute ("arm", "new", old_attrs);
|
|||
|
|
+
|
|||
|
|
+ tree new_attrs = DECL_ATTRIBUTES (newdecl);
|
|||
|
|
+ tree new_new = lookup_attribute ("arm", "new", new_attrs);
|
|||
|
|
+
|
|||
|
|
+ if (DECL_INITIAL (olddecl) && new_new)
|
|||
|
|
+ {
|
|||
|
|
+ error ("cannot apply attribute %qs to %q+D after the function"
|
|||
|
|
+ " has been defined", "new", newdecl);
|
|||
|
|
+ inform (DECL_SOURCE_LOCATION (olddecl), "%q+D defined here",
|
|||
|
|
+ newdecl);
|
|||
|
|
+ }
|
|||
|
|
+ else
|
|||
|
|
+ {
|
|||
|
|
+ if (old_new && new_new)
|
|||
|
|
+ {
|
|||
|
|
+ old_attrs = remove_attribute ("arm", "new", old_attrs);
|
|||
|
|
+ TREE_VALUE (new_new) = chainon (TREE_VALUE (new_new),
|
|||
|
|
+ TREE_VALUE (old_new));
|
|||
|
|
+ }
|
|||
|
|
+ if (new_new)
|
|||
|
|
+ aarch64_check_arm_new_against_type (TREE_VALUE (new_new), newdecl);
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ return merge_attributes (old_attrs, new_attrs);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* Implement TARGET_GET_MULTILIB_ABI_NAME */
|
|||
|
|
|
|||
|
|
static const char *
|
|||
|
|
@@ -28634,6 +29318,629 @@ aarch64_indirect_call_asm (rtx addr)
|
|||
|
|
return "";
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
+/* Implement OPTIMIZE_MODE_SWITCHING. */
|
|||
|
|
+
|
|||
|
|
+bool
|
|||
|
|
+aarch64_optimize_mode_switching (aarch64_mode_entity entity)
|
|||
|
|
+{
|
|||
|
|
+ bool have_sme_state = (aarch64_cfun_incoming_pstate_za () != 0
|
|||
|
|
+ || (aarch64_cfun_has_new_state ("za")
|
|||
|
|
+ && df_regs_ever_live_p (ZA_REGNUM)));
|
|||
|
|
+
|
|||
|
|
+ if (have_sme_state && nonlocal_goto_handler_labels)
|
|||
|
|
+ {
|
|||
|
|
+ static bool reported;
|
|||
|
|
+ if (!reported)
|
|||
|
|
+ {
|
|||
|
|
+ sorry ("non-local gotos in functions with SME state");
|
|||
|
|
+ reported = true;
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ switch (entity)
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return have_sme_state && !nonlocal_goto_handler_labels;
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_EMIT for ZA_SAVE_BUFFER. */
|
|||
|
|
+
|
|||
|
|
+static void
|
|||
|
|
+aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode mode,
|
|||
|
|
+ aarch64_tristate_mode prev_mode)
|
|||
|
|
+{
|
|||
|
|
+ if (mode == aarch64_tristate_mode::YES)
|
|||
|
|
+ {
|
|||
|
|
+ gcc_assert (prev_mode == aarch64_tristate_mode::NO);
|
|||
|
|
+ aarch64_init_tpidr2_block ();
|
|||
|
|
+ }
|
|||
|
|
+ else
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_EMIT for LOCAL_SME_STATE. */
|
|||
|
|
+
|
|||
|
|
+static void
|
|||
|
|
+aarch64_mode_emit_local_sme_state (aarch64_local_sme_state mode,
|
|||
|
|
+ aarch64_local_sme_state prev_mode)
|
|||
|
|
+{
|
|||
|
|
+ /* Back-propagation should ensure that we're always starting from
|
|||
|
|
+ a known mode. */
|
|||
|
|
+ gcc_assert (prev_mode != aarch64_local_sme_state::ANY);
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER)
|
|||
|
|
+ {
|
|||
|
|
+ /* Commit any uncommitted lazy save. This leaves ZA either active
|
|||
|
|
+ and zero (lazy save case) or off (normal case).
|
|||
|
|
+
|
|||
|
|
+ The sequence is:
|
|||
|
|
+
|
|||
|
|
+ mrs <temp>, tpidr2_el0
|
|||
|
|
+ cbz <temp>, no_save
|
|||
|
|
+ bl __arm_tpidr2_save
|
|||
|
|
+ msr tpidr2_el0, xzr
|
|||
|
|
+ zero { za } // Only if ZA is live
|
|||
|
|
+ no_save: */
|
|||
|
|
+ bool is_active = (mode == aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ || mode == aarch64_local_sme_state::ACTIVE_DEAD);
|
|||
|
|
+ auto tmp_reg = gen_reg_rtx (DImode);
|
|||
|
|
+ auto active_flag = gen_int_mode (is_active, DImode);
|
|||
|
|
+ emit_insn (gen_aarch64_read_tpidr2 (tmp_reg));
|
|||
|
|
+ emit_insn (gen_aarch64_commit_lazy_save (tmp_reg, active_flag));
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ || mode == aarch64_local_sme_state::ACTIVE_DEAD)
|
|||
|
|
+ {
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL)
|
|||
|
|
+ {
|
|||
|
|
+ /* Make ZA active after being inactive.
|
|||
|
|
+
|
|||
|
|
+ First handle the case in which the lazy save we set up was
|
|||
|
|
+ committed by a callee. If the function's source-level ZA state
|
|||
|
|
+ is live then we must conditionally restore it from the lazy
|
|||
|
|
+ save buffer. Otherwise we can just force PSTATE.ZA to 1. */
|
|||
|
|
+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE)
|
|||
|
|
+ emit_insn (gen_aarch64_restore_za (aarch64_get_tpidr2_ptr ()));
|
|||
|
|
+ else
|
|||
|
|
+ emit_insn (gen_aarch64_smstart_za ());
|
|||
|
|
+
|
|||
|
|
+ /* Now handle the case in which the lazy save was not committed.
|
|||
|
|
+ In that case, ZA still contains the current function's ZA state,
|
|||
|
|
+ and we just need to cancel the lazy save. */
|
|||
|
|
+ emit_insn (gen_aarch64_clear_tpidr2 ());
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL)
|
|||
|
|
+ {
|
|||
|
|
+ /* Retrieve the current function's ZA state from the lazy save
|
|||
|
|
+ buffer. */
|
|||
|
|
+ aarch64_restore_za (aarch64_get_tpidr2_ptr ());
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::INACTIVE_CALLER
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::OFF)
|
|||
|
|
+ {
|
|||
|
|
+ /* INACTIVE_CALLER means that we are enabling ZA for the first
|
|||
|
|
+ time in this function. The code above means that ZA is either
|
|||
|
|
+ active and zero (if we committed a lazy save) or off. Handle
|
|||
|
|
+ the latter case by forcing ZA on.
|
|||
|
|
+
|
|||
|
|
+ OFF means that PSTATE.ZA is guaranteed to be 0. We just need
|
|||
|
|
+ to force it to 1.
|
|||
|
|
+
|
|||
|
|
+ Both cases leave ZA zeroed. */
|
|||
|
|
+ emit_insn (gen_aarch64_smstart_za ());
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::ACTIVE_DEAD
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::ACTIVE_LIVE)
|
|||
|
|
+ /* A simple change in liveness, such as in a CFG structure where
|
|||
|
|
+ ZA is only conditionally defined. No code is needed. */
|
|||
|
|
+ return;
|
|||
|
|
+
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (mode == aarch64_local_sme_state::INACTIVE_LOCAL)
|
|||
|
|
+ {
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::ACTIVE_DEAD
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::INACTIVE_CALLER)
|
|||
|
|
+ {
|
|||
|
|
+ /* A transition from ACTIVE_LIVE to INACTIVE_LOCAL is the usual
|
|||
|
|
+ case of setting up a lazy save buffer before a call.
|
|||
|
|
+ A transition from INACTIVE_CALLER is similar, except that
|
|||
|
|
+ the contents of ZA are known to be zero.
|
|||
|
|
+
|
|||
|
|
+ A transition from ACTIVE_DEAD means that ZA is live at the
|
|||
|
|
+ point of the transition, but is dead on at least one incoming
|
|||
|
|
+ edge. (That is, ZA is only conditionally initialized.)
|
|||
|
|
+ For efficiency, we want to set up a lazy save even for
|
|||
|
|
+ dead contents, since forcing ZA off would make later code
|
|||
|
|
+ restore ZA from the lazy save buffer. */
|
|||
|
|
+ emit_insn (gen_aarch64_write_tpidr2 (aarch64_get_tpidr2_ptr ()));
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::SAVED_LOCAL
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::OFF)
|
|||
|
|
+ /* We're simply discarding the information about which inactive
|
|||
|
|
+ state applies. */
|
|||
|
|
+ return;
|
|||
|
|
+
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (mode == aarch64_local_sme_state::INACTIVE_CALLER
|
|||
|
|
+ || mode == aarch64_local_sme_state::OFF)
|
|||
|
|
+ {
|
|||
|
|
+ /* The transition to INACTIVE_CALLER is used before returning from
|
|||
|
|
+ new("za") functions. Any state in ZA belongs to the current
|
|||
|
|
+ function rather than a caller, but that state is no longer
|
|||
|
|
+ needed. Clear any pending lazy save and turn ZA off.
|
|||
|
|
+
|
|||
|
|
+ The transition to OFF is used before calling a private-ZA function.
|
|||
|
|
+ We committed any incoming lazy save above, so at this point any
|
|||
|
|
+ contents in ZA belong to the current function. */
|
|||
|
|
+ if (prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL)
|
|||
|
|
+ emit_insn (gen_aarch64_clear_tpidr2 ());
|
|||
|
|
+
|
|||
|
|
+ if (prev_mode != aarch64_local_sme_state::OFF
|
|||
|
|
+ && prev_mode != aarch64_local_sme_state::SAVED_LOCAL)
|
|||
|
|
+ emit_insn (gen_aarch64_smstop_za ());
|
|||
|
|
+
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ if (mode == aarch64_local_sme_state::SAVED_LOCAL)
|
|||
|
|
+ {
|
|||
|
|
+ /* This is a transition to an exception handler. */
|
|||
|
|
+ gcc_assert (prev_mode == aarch64_local_sme_state::OFF
|
|||
|
|
+ || prev_mode == aarch64_local_sme_state::INACTIVE_LOCAL);
|
|||
|
|
+ return;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_EMIT. */
|
|||
|
|
+
|
|||
|
|
+static void
|
|||
|
|
+aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ if (mode == prev_mode)
|
|||
|
|
+ return;
|
|||
|
|
+
|
|||
|
|
+ start_sequence ();
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ aarch64_mode_emit_za_save_buffer (aarch64_tristate_mode (mode),
|
|||
|
|
+ aarch64_tristate_mode (prev_mode));
|
|||
|
|
+ break;
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ aarch64_mode_emit_local_sme_state (aarch64_local_sme_state (mode),
|
|||
|
|
+ aarch64_local_sme_state (prev_mode));
|
|||
|
|
+ break;
|
|||
|
|
+ }
|
|||
|
|
+ rtx_insn *seq = get_insns ();
|
|||
|
|
+ end_sequence ();
|
|||
|
|
+
|
|||
|
|
+ /* Get the set of clobbered registers that are currently live. */
|
|||
|
|
+ HARD_REG_SET clobbers = {};
|
|||
|
|
+ for (rtx_insn *insn = seq; insn; insn = NEXT_INSN (insn))
|
|||
|
|
+ {
|
|||
|
|
+ vec_rtx_properties properties;
|
|||
|
|
+ properties.add_insn (insn, false);
|
|||
|
|
+ for (rtx_obj_reference ref : properties.refs ())
|
|||
|
|
+ if (ref.is_write () && HARD_REGISTER_NUM_P (ref.regno))
|
|||
|
|
+ SET_HARD_REG_BIT (clobbers, ref.regno);
|
|||
|
|
+ }
|
|||
|
|
+ clobbers &= live;
|
|||
|
|
+
|
|||
|
|
+ /* Emit instructions to save clobbered registers to pseudos. Queue
|
|||
|
|
+ instructions to restore the registers afterwards.
|
|||
|
|
+
|
|||
|
|
+ This should only needed in rare situations. */
|
|||
|
|
+ auto_vec<rtx, 33> after;
|
|||
|
|
+ for (unsigned int regno = R0_REGNUM; regno < R30_REGNUM; ++regno)
|
|||
|
|
+ if (TEST_HARD_REG_BIT (clobbers, regno))
|
|||
|
|
+ {
|
|||
|
|
+ rtx hard_reg = gen_rtx_REG (DImode, regno);
|
|||
|
|
+ rtx pseudo_reg = gen_reg_rtx (DImode);
|
|||
|
|
+ emit_move_insn (pseudo_reg, hard_reg);
|
|||
|
|
+ after.quick_push (gen_move_insn (hard_reg, pseudo_reg));
|
|||
|
|
+ }
|
|||
|
|
+ if (TEST_HARD_REG_BIT (clobbers, CC_REGNUM))
|
|||
|
|
+ {
|
|||
|
|
+ rtx pseudo_reg = gen_reg_rtx (DImode);
|
|||
|
|
+ emit_insn (gen_aarch64_save_nzcv (pseudo_reg));
|
|||
|
|
+ after.quick_push (gen_aarch64_restore_nzcv (pseudo_reg));
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ /* Emit the transition instructions themselves. */
|
|||
|
|
+ emit_insn (seq);
|
|||
|
|
+
|
|||
|
|
+ /* Restore the clobbered registers. */
|
|||
|
|
+ for (auto *insn : after)
|
|||
|
|
+ emit_insn (insn);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Return true if INSN references the SME state represented by hard register
|
|||
|
|
+ REGNO. */
|
|||
|
|
+
|
|||
|
|
+static bool
|
|||
|
|
+aarch64_insn_references_sme_state_p (rtx_insn *insn, unsigned int regno)
|
|||
|
|
+{
|
|||
|
|
+ df_ref ref;
|
|||
|
|
+ FOR_EACH_INSN_DEF (ref, insn)
|
|||
|
|
+ if (!DF_REF_FLAGS_IS_SET (ref, DF_REF_MUST_CLOBBER)
|
|||
|
|
+ && DF_REF_REGNO (ref) == regno)
|
|||
|
|
+ return true;
|
|||
|
|
+ FOR_EACH_INSN_USE (ref, insn)
|
|||
|
|
+ if (DF_REF_REGNO (ref) == regno)
|
|||
|
|
+ return true;
|
|||
|
|
+ return false;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_NEEDED for LOCAL_SME_STATE. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_local_sme_state
|
|||
|
|
+aarch64_mode_needed_local_sme_state (rtx_insn *insn, HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ if (!CALL_P (insn)
|
|||
|
|
+ && find_reg_note (insn, REG_EH_REGION, NULL_RTX))
|
|||
|
|
+ {
|
|||
|
|
+ static bool reported;
|
|||
|
|
+ if (!reported)
|
|||
|
|
+ {
|
|||
|
|
+ sorry ("catching non-call exceptions in functions with SME state");
|
|||
|
|
+ reported = true;
|
|||
|
|
+ }
|
|||
|
|
+ /* Aim for graceful error recovery by picking the value that is
|
|||
|
|
+ least likely to generate an ICE. */
|
|||
|
|
+ return aarch64_local_sme_state::INACTIVE_LOCAL;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ /* A non-local goto is equivalent to a return. We disallow non-local
|
|||
|
|
+ receivers in functions with SME state, so we know that the target
|
|||
|
|
+ expects ZA to be dormant or off. */
|
|||
|
|
+ if (JUMP_P (insn)
|
|||
|
|
+ && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
|
|||
|
|
+ return aarch64_local_sme_state::INACTIVE_CALLER;
|
|||
|
|
+
|
|||
|
|
+ /* start_private_za_call and end_private_za_call bracket a sequence
|
|||
|
|
+ that calls a private-ZA function. Force ZA to be turned off if the
|
|||
|
|
+ function doesn't have any live ZA state, otherwise require ZA to be
|
|||
|
|
+ inactive. */
|
|||
|
|
+ auto icode = recog_memoized (insn);
|
|||
|
|
+ if (icode == CODE_FOR_aarch64_start_private_za_call
|
|||
|
|
+ || icode == CODE_FOR_aarch64_end_private_za_call)
|
|||
|
|
+ return (TEST_HARD_REG_BIT (live, ZA_REGNUM)
|
|||
|
|
+ ? aarch64_local_sme_state::INACTIVE_LOCAL
|
|||
|
|
+ : aarch64_local_sme_state::OFF);
|
|||
|
|
+
|
|||
|
|
+ /* Force ZA to contain the current function's ZA state if INSN wants
|
|||
|
|
+ to access it. */
|
|||
|
|
+ if (aarch64_insn_references_sme_state_p (insn, ZA_REGNUM))
|
|||
|
|
+ return (TEST_HARD_REG_BIT (live, ZA_REGNUM)
|
|||
|
|
+ ? aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ : aarch64_local_sme_state::ACTIVE_DEAD);
|
|||
|
|
+
|
|||
|
|
+ return aarch64_local_sme_state::ANY;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_NEEDED for ZA_SAVE_BUFFER. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_tristate_mode
|
|||
|
|
+aarch64_mode_needed_za_save_buffer (rtx_insn *insn, HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ /* We need to set up a lazy save buffer no later than the first
|
|||
|
|
+ transition to INACTIVE_LOCAL (which involves setting up a lazy save). */
|
|||
|
|
+ if (aarch64_mode_needed_local_sme_state (insn, live)
|
|||
|
|
+ == aarch64_local_sme_state::INACTIVE_LOCAL)
|
|||
|
|
+ return aarch64_tristate_mode::YES;
|
|||
|
|
+
|
|||
|
|
+ /* Also make sure that the lazy save buffer is set up before the first
|
|||
|
|
+ insn that throws internally. The exception handler will sometimes
|
|||
|
|
+ load from it. */
|
|||
|
|
+ if (find_reg_note (insn, REG_EH_REGION, NULL_RTX))
|
|||
|
|
+ return aarch64_tristate_mode::YES;
|
|||
|
|
+
|
|||
|
|
+ return aarch64_tristate_mode::MAYBE;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_NEEDED. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return int (aarch64_mode_needed_za_save_buffer (insn, live));
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_mode_needed_local_sme_state (insn, live));
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_AFTER for LOCAL_SME_STATE. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_local_sme_state
|
|||
|
|
+aarch64_mode_after_local_sme_state (aarch64_local_sme_state mode,
|
|||
|
|
+ HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ /* Note places where ZA dies, so that we can try to avoid saving and
|
|||
|
|
+ restoring state that isn't needed. */
|
|||
|
|
+ if (mode == aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ && !TEST_HARD_REG_BIT (live, ZA_REGNUM))
|
|||
|
|
+ return aarch64_local_sme_state::ACTIVE_DEAD;
|
|||
|
|
+
|
|||
|
|
+ /* Note where ZA is born, e.g. when moving past an __arm_out("za")
|
|||
|
|
+ function. */
|
|||
|
|
+ if (mode == aarch64_local_sme_state::ACTIVE_DEAD
|
|||
|
|
+ && TEST_HARD_REG_BIT (live, ZA_REGNUM))
|
|||
|
|
+ return aarch64_local_sme_state::ACTIVE_LIVE;
|
|||
|
|
+
|
|||
|
|
+ return mode;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_AFTER. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_after (int entity, int mode, rtx_insn *, HARD_REG_SET live)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return mode;
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_mode_after_local_sme_state
|
|||
|
|
+ (aarch64_local_sme_state (mode), live));
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_CONFLUENCE for LOCAL_SME_STATE. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_local_sme_state
|
|||
|
|
+aarch64_local_sme_confluence (aarch64_local_sme_state mode1,
|
|||
|
|
+ aarch64_local_sme_state mode2)
|
|||
|
|
+{
|
|||
|
|
+ /* Perform a symmetrical check for two values. */
|
|||
|
|
+ auto is_pair = [&](aarch64_local_sme_state val1,
|
|||
|
|
+ aarch64_local_sme_state val2)
|
|||
|
|
+ {
|
|||
|
|
+ return ((mode1 == val1 && mode2 == val2)
|
|||
|
|
+ || (mode1 == val2 && mode2 == val1));
|
|||
|
|
+ };
|
|||
|
|
+
|
|||
|
|
+ /* INACTIVE_CALLER means ZA is off or it has dormant contents belonging
|
|||
|
|
+ to a caller. OFF is one of the options. */
|
|||
|
|
+ if (is_pair (aarch64_local_sme_state::INACTIVE_CALLER,
|
|||
|
|
+ aarch64_local_sme_state::OFF))
|
|||
|
|
+ return aarch64_local_sme_state::INACTIVE_CALLER;
|
|||
|
|
+
|
|||
|
|
+ /* Similarly for dormant contents belonging to the current function. */
|
|||
|
|
+ if (is_pair (aarch64_local_sme_state::INACTIVE_LOCAL,
|
|||
|
|
+ aarch64_local_sme_state::OFF))
|
|||
|
|
+ return aarch64_local_sme_state::INACTIVE_LOCAL;
|
|||
|
|
+
|
|||
|
|
+ /* Treat a conditionally-initialized value as a fully-initialized value. */
|
|||
|
|
+ if (is_pair (aarch64_local_sme_state::ACTIVE_LIVE,
|
|||
|
|
+ aarch64_local_sme_state::ACTIVE_DEAD))
|
|||
|
|
+ return aarch64_local_sme_state::ACTIVE_LIVE;
|
|||
|
|
+
|
|||
|
|
+ return aarch64_local_sme_state::ANY;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_CONFLUENCE. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_confluence (int entity, int mode1, int mode2)
|
|||
|
|
+{
|
|||
|
|
+ gcc_assert (mode1 != mode2);
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return int (aarch64_tristate_mode::MAYBE);
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_local_sme_confluence
|
|||
|
|
+ (aarch64_local_sme_state (mode1),
|
|||
|
|
+ aarch64_local_sme_state (mode2)));
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_BACKPROP for an entity that either stays
|
|||
|
|
+ NO throughput, or makes one transition from NO to YES. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_tristate_mode
|
|||
|
|
+aarch64_one_shot_backprop (aarch64_tristate_mode mode1,
|
|||
|
|
+ aarch64_tristate_mode mode2)
|
|||
|
|
+{
|
|||
|
|
+ /* Keep bringing the transition forward until it starts from NO. */
|
|||
|
|
+ if (mode1 == aarch64_tristate_mode::MAYBE
|
|||
|
|
+ && mode2 == aarch64_tristate_mode::YES)
|
|||
|
|
+ return mode2;
|
|||
|
|
+
|
|||
|
|
+ return aarch64_tristate_mode::MAYBE;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_BACKPROP for LOCAL_SME_STATE. */
|
|||
|
|
+
|
|||
|
|
+static aarch64_local_sme_state
|
|||
|
|
+aarch64_local_sme_backprop (aarch64_local_sme_state mode1,
|
|||
|
|
+ aarch64_local_sme_state mode2)
|
|||
|
|
+{
|
|||
|
|
+ /* We always need to know what the current state is when transitioning
|
|||
|
|
+ to a new state. Force any location with indeterminate starting state
|
|||
|
|
+ to be active. */
|
|||
|
|
+ if (mode1 == aarch64_local_sme_state::ANY)
|
|||
|
|
+ switch (mode2)
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_local_sme_state::INACTIVE_CALLER:
|
|||
|
|
+ case aarch64_local_sme_state::OFF:
|
|||
|
|
+ case aarch64_local_sme_state::ACTIVE_DEAD:
|
|||
|
|
+ /* The current function's ZA state is not live. */
|
|||
|
|
+ return aarch64_local_sme_state::ACTIVE_DEAD;
|
|||
|
|
+
|
|||
|
|
+ case aarch64_local_sme_state::INACTIVE_LOCAL:
|
|||
|
|
+ case aarch64_local_sme_state::ACTIVE_LIVE:
|
|||
|
|
+ /* The current function's ZA state is live. */
|
|||
|
|
+ return aarch64_local_sme_state::ACTIVE_LIVE;
|
|||
|
|
+
|
|||
|
|
+ case aarch64_local_sme_state::SAVED_LOCAL:
|
|||
|
|
+ /* This is a transition to an exception handler. Since we don't
|
|||
|
|
+ support non-call exceptions for SME functions, the source of
|
|||
|
|
+ the transition must be known. We'll assert later if that's
|
|||
|
|
+ not the case. */
|
|||
|
|
+ return aarch64_local_sme_state::ANY;
|
|||
|
|
+
|
|||
|
|
+ case aarch64_local_sme_state::ANY:
|
|||
|
|
+ return aarch64_local_sme_state::ANY;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ return aarch64_local_sme_state::ANY;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_BACKPROP. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_backprop (int entity, int mode1, int mode2)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return int (aarch64_one_shot_backprop (aarch64_tristate_mode (mode1),
|
|||
|
|
+ aarch64_tristate_mode (mode2)));
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_local_sme_backprop
|
|||
|
|
+ (aarch64_local_sme_state (mode1),
|
|||
|
|
+ aarch64_local_sme_state (mode2)));
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_ENTRY. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_entry (int entity)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return int (aarch64_tristate_mode::NO);
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_cfun_shared_flags ("za") != 0
|
|||
|
|
+ ? aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ : aarch64_local_sme_state::INACTIVE_CALLER);
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_EXIT. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_exit (int entity)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ return int (aarch64_tristate_mode::MAYBE);
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_cfun_shared_flags ("za") != 0
|
|||
|
|
+ ? aarch64_local_sme_state::ACTIVE_LIVE
|
|||
|
|
+ : aarch64_local_sme_state::INACTIVE_CALLER);
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_EH_HANDLER. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_eh_handler (int entity)
|
|||
|
|
+{
|
|||
|
|
+ switch (aarch64_mode_entity (entity))
|
|||
|
|
+ {
|
|||
|
|
+ case aarch64_mode_entity::HAVE_ZA_SAVE_BUFFER:
|
|||
|
|
+ /* Require a lazy save buffer to be allocated before the first
|
|||
|
|
+ insn that can throw. */
|
|||
|
|
+ return int (aarch64_tristate_mode::YES);
|
|||
|
|
+
|
|||
|
|
+ case aarch64_mode_entity::LOCAL_SME_STATE:
|
|||
|
|
+ return int (aarch64_local_sme_state::SAVED_LOCAL);
|
|||
|
|
+ }
|
|||
|
|
+ gcc_unreachable ();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MODE_PRIORITY. */
|
|||
|
|
+
|
|||
|
|
+static int
|
|||
|
|
+aarch64_mode_priority (int, int n)
|
|||
|
|
+{
|
|||
|
|
+ return n;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/* Implement TARGET_MD_ASM_ADJUST. */
|
|||
|
|
+
|
|||
|
|
+static rtx_insn *
|
|||
|
|
+aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
|
|||
|
|
+ vec<machine_mode> &input_modes,
|
|||
|
|
+ vec<const char *> &constraints,
|
|||
|
|
+ vec<rtx> &uses, vec<rtx> &clobbers,
|
|||
|
|
+ HARD_REG_SET &clobbered_regs, location_t loc)
|
|||
|
|
+{
|
|||
|
|
+ rtx_insn *seq = arm_md_asm_adjust (outputs, inputs, input_modes, constraints,
|
|||
|
|
+ uses, clobbers, clobbered_regs, loc);
|
|||
|
|
+
|
|||
|
|
+ /* "za" in the clobber list of a function with ZA state is defined to
|
|||
|
|
+ mean that the asm can read from and write to ZA. We can model the
|
|||
|
|
+ read using a USE, but unfortunately, it's not possible to model the
|
|||
|
|
+ write directly. Use a separate insn to model the effect.
|
|||
|
|
+
|
|||
|
|
+ We must ensure that ZA is active on entry, which is enforced by using
|
|||
|
|
+ SME_STATE_REGNUM. The asm must ensure that ZA is active on return. */
|
|||
|
|
+ if (TARGET_ZA)
|
|||
|
|
+ for (unsigned int i = clobbers.length (); i-- > 0; )
|
|||
|
|
+ {
|
|||
|
|
+ rtx x = clobbers[i];
|
|||
|
|
+ if (REG_P (x) && REGNO (x) == ZA_REGNUM)
|
|||
|
|
+ {
|
|||
|
|
+ auto id = cfun->machine->next_asm_update_za_id++;
|
|||
|
|
+
|
|||
|
|
+ start_sequence ();
|
|||
|
|
+ if (seq)
|
|||
|
|
+ emit_insn (seq);
|
|||
|
|
+ emit_insn (gen_aarch64_asm_update_za (gen_int_mode (id, SImode)));
|
|||
|
|
+ seq = get_insns ();
|
|||
|
|
+ end_sequence ();
|
|||
|
|
+
|
|||
|
|
+ uses.safe_push (gen_rtx_REG (VNx16QImode, ZA_REGNUM));
|
|||
|
|
+ uses.safe_push (gen_rtx_REG (DImode, SME_STATE_REGNUM));
|
|||
|
|
+
|
|||
|
|
+ clobbers.ordered_remove (i);
|
|||
|
|
+ CLEAR_HARD_REG_BIT (clobbered_regs, ZA_REGNUM);
|
|||
|
|
+ }
|
|||
|
|
+ }
|
|||
|
|
+ return seq;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
/* If CALL involves a change in PSTATE.SM, emit the instructions needed
|
|||
|
|
to switch to the new mode and the instructions needed to restore the
|
|||
|
|
original mode. Return true if something changed. */
|
|||
|
|
@@ -29108,6 +30415,9 @@ aarch64_get_v16qi_mode ()
|
|||
|
|
#undef TARGET_START_CALL_ARGS
|
|||
|
|
#define TARGET_START_CALL_ARGS aarch64_start_call_args
|
|||
|
|
|
|||
|
|
+#undef TARGET_END_CALL_ARGS
|
|||
|
|
+#define TARGET_END_CALL_ARGS aarch64_end_call_args
|
|||
|
|
+
|
|||
|
|
#undef TARGET_GIMPLE_FOLD_BUILTIN
|
|||
|
|
#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
|
|||
|
|
|
|||
|
|
@@ -29473,6 +30783,9 @@ aarch64_libgcc_floating_mode_supported_p
|
|||
|
|
#undef TARGET_COMP_TYPE_ATTRIBUTES
|
|||
|
|
#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
|
|||
|
|
|
|||
|
|
+#undef TARGET_MERGE_DECL_ATTRIBUTES
|
|||
|
|
+#define TARGET_MERGE_DECL_ATTRIBUTES aarch64_merge_decl_attributes
|
|||
|
|
+
|
|||
|
|
#undef TARGET_GET_MULTILIB_ABI_NAME
|
|||
|
|
#define TARGET_GET_MULTILIB_ABI_NAME aarch64_get_multilib_abi_name
|
|||
|
|
|
|||
|
|
@@ -29493,8 +30806,35 @@ aarch64_libgcc_floating_mode_supported_p
|
|||
|
|
#undef TARGET_STRICT_ARGUMENT_NAMING
|
|||
|
|
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
|
|||
|
|
|
|||
|
|
+#undef TARGET_MODE_EMIT
|
|||
|
|
+#define TARGET_MODE_EMIT aarch64_mode_emit
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_NEEDED
|
|||
|
|
+#define TARGET_MODE_NEEDED aarch64_mode_needed
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_AFTER
|
|||
|
|
+#define TARGET_MODE_AFTER aarch64_mode_after
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_CONFLUENCE
|
|||
|
|
+#define TARGET_MODE_CONFLUENCE aarch64_mode_confluence
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_BACKPROP
|
|||
|
|
+#define TARGET_MODE_BACKPROP aarch64_mode_backprop
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_ENTRY
|
|||
|
|
+#define TARGET_MODE_ENTRY aarch64_mode_entry
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_EXIT
|
|||
|
|
+#define TARGET_MODE_EXIT aarch64_mode_exit
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_EH_HANDLER
|
|||
|
|
+#define TARGET_MODE_EH_HANDLER aarch64_mode_eh_handler
|
|||
|
|
+
|
|||
|
|
+#undef TARGET_MODE_PRIORITY
|
|||
|
|
+#define TARGET_MODE_PRIORITY aarch64_mode_priority
|
|||
|
|
+
|
|||
|
|
#undef TARGET_MD_ASM_ADJUST
|
|||
|
|
-#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust
|
|||
|
|
+#define TARGET_MD_ASM_ADJUST aarch64_md_asm_adjust
|
|||
|
|
|
|||
|
|
#undef TARGET_ASM_FILE_END
|
|||
|
|
#define TARGET_ASM_FILE_END aarch64_asm_file_end
|
|||
|
|
@@ -29505,6 +30845,9 @@ aarch64_libgcc_floating_mode_supported_p
|
|||
|
|
#undef TARGET_HAVE_SHADOW_CALL_STACK
|
|||
|
|
#define TARGET_HAVE_SHADOW_CALL_STACK true
|
|||
|
|
|
|||
|
|
+#undef TARGET_EXTRA_LIVE_ON_ENTRY
|
|||
|
|
+#define TARGET_EXTRA_LIVE_ON_ENTRY aarch64_extra_live_on_entry
|
|||
|
|
+
|
|||
|
|
#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL
|
|||
|
|
#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue
|
|||
|
|
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
|||
|
|
index 6bfe55968..89d30b9bf 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64.h
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64.h
|
|||
|
|
@@ -207,6 +207,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
/* Macros to test ISA flags. */
|
|||
|
|
|
|||
|
|
#define AARCH64_ISA_SM_OFF (aarch64_isa_flags & AARCH64_FL_SM_OFF)
|
|||
|
|
+#define AARCH64_ISA_ZA_ON (aarch64_isa_flags & AARCH64_FL_ZA_ON)
|
|||
|
|
#define AARCH64_ISA_MODE (aarch64_isa_flags & AARCH64_FL_ISA_MODES)
|
|||
|
|
#define AARCH64_ISA_CRC (aarch64_isa_flags & AARCH64_FL_CRC)
|
|||
|
|
#define AARCH64_ISA_CRYPTO (aarch64_isa_flags & AARCH64_FL_CRYPTO)
|
|||
|
|
@@ -259,6 +260,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
#define TARGET_STREAMING_COMPATIBLE \
|
|||
|
|
((aarch64_isa_flags & AARCH64_FL_SM_STATE) == 0)
|
|||
|
|
|
|||
|
|
+/* PSTATE.ZA is enabled in the current function body. */
|
|||
|
|
+#define TARGET_ZA (AARCH64_ISA_ZA_ON)
|
|||
|
|
+
|
|||
|
|
/* Crypto is an optional extension to AdvSIMD. */
|
|||
|
|
#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO)
|
|||
|
|
|
|||
|
|
@@ -445,7 +449,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
1, 1, 1, 1, /* SFP, AP, CC, VG */ \
|
|||
|
|
0, 0, 0, 0, 0, 0, 0, 0, /* P0 - P7 */ \
|
|||
|
|
0, 0, 0, 0, 0, 0, 0, 0, /* P8 - P15 */ \
|
|||
|
|
- 1, 1 /* FFR and FFRT */ \
|
|||
|
|
+ 1, 1, /* FFR and FFRT */ \
|
|||
|
|
+ 1, 1, 1, 1, 1, 1, 1 /* Fake registers */ \
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* X30 is marked as caller-saved which is in line with regular function call
|
|||
|
|
@@ -455,7 +460,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
true but not until function epilogues have been generated. This ensures
|
|||
|
|
that X30 is available for use in leaf functions if needed. */
|
|||
|
|
|
|||
|
|
-#define CALL_USED_REGISTERS \
|
|||
|
|
+#define CALL_REALLY_USED_REGISTERS \
|
|||
|
|
{ \
|
|||
|
|
1, 1, 1, 1, 1, 1, 1, 1, /* R0 - R7 */ \
|
|||
|
|
1, 1, 1, 1, 1, 1, 1, 1, /* R8 - R15 */ \
|
|||
|
|
@@ -468,7 +473,8 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
1, 1, 1, 0, /* SFP, AP, CC, VG */ \
|
|||
|
|
1, 1, 1, 1, 1, 1, 1, 1, /* P0 - P7 */ \
|
|||
|
|
1, 1, 1, 1, 1, 1, 1, 1, /* P8 - P15 */ \
|
|||
|
|
- 1, 1 /* FFR and FFRT */ \
|
|||
|
|
+ 1, 1, /* FFR and FFRT */ \
|
|||
|
|
+ 0, 0, 0, 0, 0, 0, 0 /* Fake registers */ \
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
#define REGISTER_NAMES \
|
|||
|
|
@@ -484,7 +490,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
"sfp", "ap", "cc", "vg", \
|
|||
|
|
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7", \
|
|||
|
|
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15", \
|
|||
|
|
- "ffr", "ffrt" \
|
|||
|
|
+ "ffr", "ffrt", \
|
|||
|
|
+ "lowering", "tpidr2_block", "sme_state", "tpidr2_setup", \
|
|||
|
|
+ "za_free", "za_saved", "za" \
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* Generate the register aliases for core register N */
|
|||
|
|
@@ -533,7 +541,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
#define FRAME_POINTER_REGNUM SFP_REGNUM
|
|||
|
|
#define STACK_POINTER_REGNUM SP_REGNUM
|
|||
|
|
#define ARG_POINTER_REGNUM AP_REGNUM
|
|||
|
|
-#define FIRST_PSEUDO_REGISTER (FFRT_REGNUM + 1)
|
|||
|
|
+#define FIRST_PSEUDO_REGISTER (LAST_FAKE_REGNUM + 1)
|
|||
|
|
|
|||
|
|
/* The number of argument registers available for each class. */
|
|||
|
|
#define NUM_ARG_REGS 8
|
|||
|
|
@@ -657,6 +665,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
|||
|
|
|
|||
|
|
#define FP_SIMD_SAVED_REGNUM_P(REGNO) \
|
|||
|
|
(((unsigned) (REGNO - V8_REGNUM)) <= (V23_REGNUM - V8_REGNUM))
|
|||
|
|
+
|
|||
|
|
+#define FAKE_REGNUM_P(REGNO) \
|
|||
|
|
+ IN_RANGE (REGNO, FIRST_FAKE_REGNUM, LAST_FAKE_REGNUM)
|
|||
|
|
|
|||
|
|
/* Register and constant classes. */
|
|||
|
|
|
|||
|
|
@@ -677,6 +688,7 @@ enum reg_class
|
|||
|
|
PR_REGS,
|
|||
|
|
FFR_REGS,
|
|||
|
|
PR_AND_FFR_REGS,
|
|||
|
|
+ FAKE_REGS,
|
|||
|
|
ALL_REGS,
|
|||
|
|
LIM_REG_CLASSES /* Last */
|
|||
|
|
};
|
|||
|
|
@@ -700,6 +712,7 @@ enum reg_class
|
|||
|
|
"PR_REGS", \
|
|||
|
|
"FFR_REGS", \
|
|||
|
|
"PR_AND_FFR_REGS", \
|
|||
|
|
+ "FAKE_REGS", \
|
|||
|
|
"ALL_REGS" \
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@@ -720,6 +733,7 @@ enum reg_class
|
|||
|
|
{ 0x00000000, 0x00000000, 0x000ffff0 }, /* PR_REGS */ \
|
|||
|
|
{ 0x00000000, 0x00000000, 0x00300000 }, /* FFR_REGS */ \
|
|||
|
|
{ 0x00000000, 0x00000000, 0x003ffff0 }, /* PR_AND_FFR_REGS */ \
|
|||
|
|
+ { 0x00000000, 0x00000000, 0x1fc00000 }, /* FAKE_REGS */ \
|
|||
|
|
{ 0xffffffff, 0xffffffff, 0x000fffff } /* ALL_REGS */ \
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
@@ -920,6 +934,15 @@ typedef struct GTY (()) machine_function
|
|||
|
|
bool reg_is_wrapped_separately[LAST_SAVED_REGNUM];
|
|||
|
|
/* One entry for each general purpose register. */
|
|||
|
|
rtx call_via[SP_REGNUM];
|
|||
|
|
+
|
|||
|
|
+ /* A pseudo register that points to the function's TPIDR2 block, or null
|
|||
|
|
+ if the function doesn't have a TPIDR2 block. */
|
|||
|
|
+ rtx tpidr2_block;
|
|||
|
|
+
|
|||
|
|
+ /* A pseudo register that points to the function's ZA save buffer,
|
|||
|
|
+ or null if none. */
|
|||
|
|
+ rtx za_save_buffer;
|
|||
|
|
+
|
|||
|
|
bool label_is_assembled;
|
|||
|
|
|
|||
|
|
/* True if we've expanded at least one call to a function that changes
|
|||
|
|
@@ -927,6 +950,10 @@ typedef struct GTY (()) machine_function
|
|||
|
|
guarantees that no such mode switch exists. */
|
|||
|
|
bool call_switches_pstate_sm;
|
|||
|
|
|
|||
|
|
+ /* Used to generated unique identifiers for each update to ZA by an
|
|||
|
|
+ asm statement. */
|
|||
|
|
+ unsigned int next_asm_update_za_id;
|
|||
|
|
+
|
|||
|
|
/* A set of all decls that have been passed to a vld1 intrinsic in the
|
|||
|
|
current function. This is used to help guide the vector cost model. */
|
|||
|
|
hash_set<tree> *vector_load_decls;
|
|||
|
|
@@ -996,6 +1023,10 @@ typedef struct
|
|||
|
|
bool silent_p; /* True if we should act silently, rather than
|
|||
|
|
raise an error for invalid calls. */
|
|||
|
|
|
|||
|
|
+ /* AARCH64_STATE_* flags that describe whether the function shares ZA
|
|||
|
|
+ with its callers. */
|
|||
|
|
+ unsigned int shared_za_flags;
|
|||
|
|
+
|
|||
|
|
/* A list of registers that need to be saved and restored around a
|
|||
|
|
change to PSTATE.SM. An auto_vec would be more convenient, but those
|
|||
|
|
can't be copied. */
|
|||
|
|
@@ -1344,4 +1375,61 @@ extern poly_uint16 aarch64_sve_vg;
|
|||
|
|
STACK_BOUNDARY / BITS_PER_UNIT) \
|
|||
|
|
: (crtl->outgoing_args_size + STACK_POINTER_OFFSET))
|
|||
|
|
|
|||
|
|
+#ifndef USED_FOR_TARGET
|
|||
|
|
+
|
|||
|
|
+/* Enumerates the mode-switching "entities" for AArch64. */
|
|||
|
|
+enum class aarch64_mode_entity : int
|
|||
|
|
+{
|
|||
|
|
+ /* An aarch64_tristate_mode that says whether we have created a local
|
|||
|
|
+ save buffer for the current function's ZA state. The only transition
|
|||
|
|
+ is from NO to YES. */
|
|||
|
|
+ HAVE_ZA_SAVE_BUFFER,
|
|||
|
|
+
|
|||
|
|
+ /* An aarch64_local_sme_state that reflects the state of all data
|
|||
|
|
+ controlled by PSTATE.ZA. */
|
|||
|
|
+ LOCAL_SME_STATE
|
|||
|
|
+};
|
|||
|
|
+
|
|||
|
|
+/* Describes the state of all data controlled by PSTATE.ZA */
|
|||
|
|
+enum class aarch64_local_sme_state : int
|
|||
|
|
+{
|
|||
|
|
+ /* ZA is in the off or dormant state. If it is dormant, the contents
|
|||
|
|
+ of ZA belong to a caller. */
|
|||
|
|
+ INACTIVE_CALLER,
|
|||
|
|
+
|
|||
|
|
+ /* ZA is in the off state: PSTATE.ZA is 0 and TPIDR2_EL0 is null. */
|
|||
|
|
+ OFF,
|
|||
|
|
+
|
|||
|
|
+ /* ZA is in the off or dormant state. If it is dormant, the contents
|
|||
|
|
+ of ZA belong to the current function. */
|
|||
|
|
+ INACTIVE_LOCAL,
|
|||
|
|
+
|
|||
|
|
+ /* ZA is in the off state and the current function's ZA contents are
|
|||
|
|
+ stored in the lazy save buffer. This is the state on entry to
|
|||
|
|
+ exception handlers. */
|
|||
|
|
+ SAVED_LOCAL,
|
|||
|
|
+
|
|||
|
|
+ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null.
|
|||
|
|
+ The contents of ZA are live. */
|
|||
|
|
+ ACTIVE_LIVE,
|
|||
|
|
+
|
|||
|
|
+ /* ZA is in the active state: PSTATE.ZA is 1 and TPIDR2_EL0 is null.
|
|||
|
|
+ The contents of ZA are dead. */
|
|||
|
|
+ ACTIVE_DEAD,
|
|||
|
|
+
|
|||
|
|
+ /* ZA could be in multiple states. */
|
|||
|
|
+ ANY
|
|||
|
|
+};
|
|||
|
|
+
|
|||
|
|
+enum class aarch64_tristate_mode : int { NO, YES, MAYBE };
|
|||
|
|
+
|
|||
|
|
+#define OPTIMIZE_MODE_SWITCHING(ENTITY) \
|
|||
|
|
+ aarch64_optimize_mode_switching (aarch64_mode_entity (ENTITY))
|
|||
|
|
+
|
|||
|
|
+#define NUM_MODES_FOR_MODE_SWITCHING \
|
|||
|
|
+ { int (aarch64_tristate_mode::MAYBE), \
|
|||
|
|
+ int (aarch64_local_sme_state::ANY) }
|
|||
|
|
+
|
|||
|
|
+#endif
|
|||
|
|
+
|
|||
|
|
#endif /* GCC_AARCH64_H */
|
|||
|
|
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
|||
|
|
index bb867de74..05a7c6675 100644
|
|||
|
|
--- a/gcc/config/aarch64/aarch64.md
|
|||
|
|
+++ b/gcc/config/aarch64/aarch64.md
|
|||
|
|
@@ -111,6 +111,56 @@
|
|||
|
|
;; "FFR token": a fake register used for representing the scheduling
|
|||
|
|
;; restrictions on FFR-related operations.
|
|||
|
|
(FFRT_REGNUM 85)
|
|||
|
|
+
|
|||
|
|
+ ;; ----------------------------------------------------------------
|
|||
|
|
+ ;; Fake registers
|
|||
|
|
+ ;; ----------------------------------------------------------------
|
|||
|
|
+ ;; These registers represent abstract things, rather than real
|
|||
|
|
+ ;; architected registers.
|
|||
|
|
+
|
|||
|
|
+ ;; Sometimes we use placeholder instructions to mark where later
|
|||
|
|
+ ;; ABI-related lowering is needed. These placeholders read and
|
|||
|
|
+ ;; write this register. Instructions that depend on the lowering
|
|||
|
|
+ ;; read the register.
|
|||
|
|
+ (LOWERING_REGNUM 86)
|
|||
|
|
+
|
|||
|
|
+ ;; Represents the contents of the current function's TPIDR2 block,
|
|||
|
|
+ ;; in abstract form.
|
|||
|
|
+ (TPIDR2_BLOCK_REGNUM 87)
|
|||
|
|
+
|
|||
|
|
+ ;; Holds the value that the current function wants PSTATE.ZA to be.
|
|||
|
|
+ ;; The actual value can sometimes vary, because it does not track
|
|||
|
|
+ ;; changes to PSTATE.ZA that happen during a lazy save and restore.
|
|||
|
|
+ ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
|
|||
|
|
+ (SME_STATE_REGNUM 88)
|
|||
|
|
+
|
|||
|
|
+ ;; Instructions write to this register if they set TPIDR2_EL0 to a
|
|||
|
|
+ ;; well-defined value. Instructions read from the register if they
|
|||
|
|
+ ;; depend on the result of such writes.
|
|||
|
|
+ ;;
|
|||
|
|
+ ;; The register does not model the architected TPIDR2_ELO, just the
|
|||
|
|
+ ;; current function's management of it.
|
|||
|
|
+ (TPIDR2_SETUP_REGNUM 89)
|
|||
|
|
+
|
|||
|
|
+ ;; Represents the property "has an incoming lazy save been committed?".
|
|||
|
|
+ (ZA_FREE_REGNUM 90)
|
|||
|
|
+
|
|||
|
|
+ ;; Represents the property "are the current function's ZA contents
|
|||
|
|
+ ;; stored in the lazy save buffer, rather than in ZA itself?".
|
|||
|
|
+ (ZA_SAVED_REGNUM 91)
|
|||
|
|
+
|
|||
|
|
+ ;; Represents the contents of the current function's ZA state in
|
|||
|
|
+ ;; abstract form. At various times in the function, these contents
|
|||
|
|
+ ;; might be stored in ZA itself, or in the function's lazy save buffer.
|
|||
|
|
+ ;;
|
|||
|
|
+ ;; The contents persist even when the architected ZA is off. Private-ZA
|
|||
|
|
+ ;; functions have no effect on its contents.
|
|||
|
|
+ (ZA_REGNUM 92)
|
|||
|
|
+ ;; ----------------------------------------------------------------
|
|||
|
|
+ (FIRST_FAKE_REGNUM LOWERING_REGNUM)
|
|||
|
|
+ (LAST_FAKE_REGNUM ZA_REGNUM)
|
|||
|
|
+ ;; ----------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
;; The pair of scratch registers used for stack probing with -fstack-check.
|
|||
|
|
;; Leave R9 alone as a possible choice for the static chain.
|
|||
|
|
;; Note that the use of these registers is mutually exclusive with the use
|
|||
|
|
@@ -303,7 +353,12 @@
|
|||
|
|
UNSPEC_TAG_SPACE ; Translate address to MTE tag address space.
|
|||
|
|
UNSPEC_LD1RO
|
|||
|
|
UNSPEC_SALT_ADDR
|
|||
|
|
+ UNSPEC_SAVE_NZCV
|
|||
|
|
+ UNSPEC_RESTORE_NZCV
|
|||
|
|
UNSPECV_PATCHABLE_AREA
|
|||
|
|
+ ;; Wraps a constant integer that should be multiplied by the number
|
|||
|
|
+ ;; of quadwords in an SME vector.
|
|||
|
|
+ UNSPEC_SME_VQ
|
|||
|
|
])
|
|||
|
|
|
|||
|
|
(define_c_enum "unspecv" [
|
|||
|
|
@@ -379,7 +434,7 @@
|
|||
|
|
;; Q registers and is equivalent to "simd".
|
|||
|
|
|
|||
|
|
(define_enum "arches" [any rcpc8_4 fp fp_q base_simd nobase_simd
|
|||
|
|
- simd nosimd sve fp16])
|
|||
|
|
+ simd nosimd sve fp16 sme])
|
|||
|
|
|
|||
|
|
(define_enum_attr "arch" "arches" (const_string "any"))
|
|||
|
|
|
|||
|
|
@@ -423,7 +478,10 @@
|
|||
|
|
(match_test "TARGET_FP_F16INST"))
|
|||
|
|
|
|||
|
|
(and (eq_attr "arch" "sve")
|
|||
|
|
- (match_test "TARGET_SVE")))
|
|||
|
|
+ (match_test "TARGET_SVE"))
|
|||
|
|
+
|
|||
|
|
+ (and (eq_attr "arch" "sme")
|
|||
|
|
+ (match_test "TARGET_SME")))
|
|||
|
|
(const_string "yes")
|
|||
|
|
(const_string "no")))
|
|||
|
|
|
|||
|
|
@@ -928,7 +986,7 @@
|
|||
|
|
(set_attr "sls_length" "retbr")]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
-(define_insn "*cb<optab><mode>1"
|
|||
|
|
+(define_insn "aarch64_cb<optab><mode>1"
|
|||
|
|
[(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
|
|||
|
|
(const_int 0))
|
|||
|
|
(label_ref (match_operand 1 "" ""))
|
|||
|
|
@@ -1291,6 +1349,7 @@
|
|||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
|||
|
|
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
|||
|
|
[r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
|||
|
|
+ [r , UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]);
|
|||
|
|
[r , m ; load_4 , * , 4] ldr\t%w0, %1
|
|||
|
|
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
|
|||
|
|
[m , r Z; store_4 , * , 4] str\t%w1, %0
|
|||
|
|
@@ -1326,6 +1385,7 @@
|
|||
|
|
/* The "mov_imm" type for CNT is just a placeholder. */
|
|||
|
|
[r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
|||
|
|
[r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
|||
|
|
+ [r, UsR; mov_imm , sme, 4] << aarch64_output_rdsvl (operands[1]);
|
|||
|
|
[r, m ; load_8 , * , 4] ldr\t%x0, %1
|
|||
|
|
[w, m ; load_8 , fp , 4] ldr\t%d0, %1
|
|||
|
|
[m, r Z; store_8 , * , 4] str\t%x1, %0
|
|||
|
|
@@ -7733,6 +7793,21 @@
|
|||
|
|
[(set (attr "length") (symbol_ref "INTVAL (operands[0])"))]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
+(define_insn "aarch64_save_nzcv"
|
|||
|
|
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
|||
|
|
+ (unspec:DI [(reg:CC CC_REGNUM)] UNSPEC_SAVE_NZCV))]
|
|||
|
|
+ ""
|
|||
|
|
+ "mrs\t%0, nzcv"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
+(define_insn "aarch64_restore_nzcv"
|
|||
|
|
+ [(set (reg:CC CC_REGNUM)
|
|||
|
|
+ (unspec:CC [(match_operand:DI 0 "register_operand" "r")]
|
|||
|
|
+ UNSPEC_RESTORE_NZCV))]
|
|||
|
|
+ ""
|
|||
|
|
+ "msr\tnzcv, %0"
|
|||
|
|
+)
|
|||
|
|
+
|
|||
|
|
;; AdvSIMD Stuff
|
|||
|
|
(include "aarch64-simd.md")
|
|||
|
|
|
|||
|
|
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
|
|||
|
|
index 212a73416..88fb9a07c 100644
|
|||
|
|
--- a/gcc/config/aarch64/constraints.md
|
|||
|
|
+++ b/gcc/config/aarch64/constraints.md
|
|||
|
|
@@ -220,6 +220,12 @@
|
|||
|
|
(and (match_code "const_poly_int")
|
|||
|
|
(match_test "aarch64_sve_rdvl_immediate_p (op)")))
|
|||
|
|
|
|||
|
|
+(define_constraint "UsR"
|
|||
|
|
+ "@internal
|
|||
|
|
+ A constraint that matches a value produced by RDSVL."
|
|||
|
|
+ (and (match_code "const")
|
|||
|
|
+ (match_test "aarch64_rdsvl_immediate_p (op)")))
|
|||
|
|
+
|
|||
|
|
(define_constraint "Usv"
|
|||
|
|
"@internal
|
|||
|
|
A constraint that matches a VG-based constant that can be loaded by
|
|||
|
|
diff --git a/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..a245546d8
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/g++.target/aarch64/sme/exceptions_1.C
|
|||
|
|
@@ -0,0 +1,189 @@
|
|||
|
|
+// { dg-options "-O -fno-optimize-sibling-calls" }
|
|||
|
|
+// { dg-final { check-function-bodies "**" "" } }
|
|||
|
|
+
|
|||
|
|
+void callee_inout() __arm_inout("za");
|
|||
|
|
+void callee_in() noexcept __arm_in("za");
|
|||
|
|
+void callee_out() noexcept __arm_out("za");
|
|||
|
|
+void callee_normal();
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** _Z5test1v:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** bl __cxa_begin_catch
|
|||
|
|
+** bl __cxa_end_catch
|
|||
|
|
+** mov w0, #?2
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int
|
|||
|
|
+test1 ()
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** _Z5test2v:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** bl __cxa_begin_catch
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl _Z10callee_outv
|
|||
|
|
+** bl _Z9callee_inv
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl __cxa_end_catch
|
|||
|
|
+** mov w0, #?2
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int
|
|||
|
|
+test2 ()
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ callee_out();
|
|||
|
|
+ callee_in();
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** _Z5test3v:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+** bl _Z13callee_normalv
|
|||
|
|
+** ...
|
|||
|
|
+** bl __cxa_begin_catch
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl _Z10callee_outv
|
|||
|
|
+** bl _Z9callee_inv
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl __cxa_end_catch
|
|||
|
|
+** mov w0, #?2
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int
|
|||
|
|
+test3 ()
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ callee_normal();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ callee_out();
|
|||
|
|
+ callee_in();
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+__arm_new("za") int
|
|||
|
|
+test4 ()
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ // No lazy save set up because this is a shared-ZA function.
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+// { dg-final { scan-assembler {_Z5test4v:(?:(?!msr\ttpidr2_el0, x[0-9]+).)*\tret} } }
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** _Z5test5v:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl _Z12callee_inoutv
|
|||
|
|
+** add (x[0-9]+), [^\n]+
|
|||
|
|
+** msr tpidr2_el0, \1
|
|||
|
|
+** bl _Z13callee_normalv
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __cxa_begin_catch
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** bl _Z12callee_inoutv
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl __cxa_end_catch
|
|||
|
|
+** mov w0, #?2
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int
|
|||
|
|
+test5 ()
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ callee_normal();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ callee_inout();
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** _Z5test6v:
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** bl _Z13callee_normalv
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+** bl __cxa_begin_catch
|
|||
|
|
+** bl __cxa_end_catch
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+int
|
|||
|
|
+test6 () __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ try
|
|||
|
|
+ {
|
|||
|
|
+ callee_normal();
|
|||
|
|
+ callee_out();
|
|||
|
|
+ return 1;
|
|||
|
|
+ }
|
|||
|
|
+ catch (...)
|
|||
|
|
+ {
|
|||
|
|
+ return 2;
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
diff --git a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C
|
|||
|
|
index 032485adf..8b0755014 100644
|
|||
|
|
--- a/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C
|
|||
|
|
+++ b/gcc/testsuite/g++.target/aarch64/sme/keyword_macros_1.C
|
|||
|
|
@@ -2,3 +2,8 @@
|
|||
|
|
|
|||
|
|
void f1 () __arm_streaming;
|
|||
|
|
void f2 () __arm_streaming_compatible;
|
|||
|
|
+void f3 () __arm_in("za");
|
|||
|
|
+void f4 () __arm_out("za");
|
|||
|
|
+void f5 () __arm_inout("za");
|
|||
|
|
+void f6 () __arm_preserves("za");
|
|||
|
|
+__arm_new("za") void f7 () {}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c
|
|||
|
|
index 8f1b83676..fcabe3edc 100644
|
|||
|
|
--- a/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/keyword_macros_1.c
|
|||
|
|
@@ -2,3 +2,8 @@
|
|||
|
|
|
|||
|
|
void f1 () __arm_streaming;
|
|||
|
|
void f2 () __arm_streaming_compatible;
|
|||
|
|
+void f3 () __arm_in("za");
|
|||
|
|
+void f4 () __arm_out("za");
|
|||
|
|
+void f5 () __arm_inout("za");
|
|||
|
|
+void f6 () __arm_preserves("za");
|
|||
|
|
+__arm_new("za") void f7 () {}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..856880e21
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_1.c
|
|||
|
|
@@ -0,0 +1,154 @@
|
|||
|
|
+// { dg-options "" }
|
|||
|
|
+
|
|||
|
|
+void shared_a () [[arm::inout("za")]];
|
|||
|
|
+void shared_a (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_b ();
|
|||
|
|
+void shared_b () [[arm::inout("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_c () [[arm::inout("za")]];
|
|||
|
|
+void shared_c () {} // Inherits attribute from declaration (confusingly).
|
|||
|
|
+
|
|||
|
|
+void shared_d ();
|
|||
|
|
+void shared_d () [[arm::inout("za")]] {} // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_e () [[arm::inout("za")]] {}
|
|||
|
|
+void shared_e (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_f () {}
|
|||
|
|
+void shared_f () [[arm::inout("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*shared_g) ();
|
|||
|
|
+extern void (*shared_g) () [[arm::inout("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*shared_h) () [[arm::inout("za")]];
|
|||
|
|
+extern void (*shared_h) (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void preserved_a () [[arm::preserves("za")]];
|
|||
|
|
+void preserved_a (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void preserved_b ();
|
|||
|
|
+void preserved_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void preserved_c () [[arm::preserves("za")]];
|
|||
|
|
+void preserved_c () {} // Inherits attribute from declaration (confusingly).
|
|||
|
|
+
|
|||
|
|
+void preserved_d ();
|
|||
|
|
+void preserved_d () [[arm::preserves("za")]] {} // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void preserved_e () [[arm::preserves("za")]] {}
|
|||
|
|
+void preserved_e (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void preserved_f () {}
|
|||
|
|
+void preserved_f () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*preserved_g) ();
|
|||
|
|
+extern void (*preserved_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*preserved_h) () [[arm::preserves("za")]];
|
|||
|
|
+extern void (*preserved_h) (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void replicated_1 () [[arm::in("za", "za"), arm::in("za")]];
|
|||
|
|
+void replicated_2 () [[arm::out("za", "za"), arm::out("za")]];
|
|||
|
|
+void replicated_3 () [[arm::inout("za", "za"), arm::inout("za")]];
|
|||
|
|
+void replicated_4 () [[arm::preserves("za", "za"), arm::preserves("za")]];
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void invalid_1 () [[arm::in]]; // { dg-error "wrong number of arguments" }
|
|||
|
|
+void invalid_2 () [[arm::in()]]; // { dg-error "parentheses must be omitted" }
|
|||
|
|
+ // { dg-error "wrong number of arguments" "" { target *-*-* } .-1 }
|
|||
|
|
+void invalid_3 () [[arm::in("")]]; // { dg-error "unrecognized state string ''" }
|
|||
|
|
+void invalid_4 () [[arm::in("foo")]]; // { dg-error "unrecognized state string 'foo'" }
|
|||
|
|
+void invalid_5 () [[arm::in(42)]]; // { dg-error "the arguments to 'in' must be constant strings" }
|
|||
|
|
+void invalid_6 () [[arm::in(*(int *)0 ? "za" : "za")]]; // { dg-error "the arguments to 'in' must be constant strings" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void mixed_a () [[arm::preserves("za")]];
|
|||
|
|
+void mixed_a () [[arm::inout("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void mixed_b () [[arm::inout("za")]];
|
|||
|
|
+void mixed_b () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void mixed_c () [[arm::preserves("za")]];
|
|||
|
|
+void mixed_c () [[arm::in("za")]] {} // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void mixed_d () [[arm::inout("za")]];
|
|||
|
|
+void mixed_d () [[arm::in("za")]] {} // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void mixed_e () [[arm::out("za")]] {}
|
|||
|
|
+void mixed_e () [[arm::in("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void mixed_f () [[arm::inout("za")]] {}
|
|||
|
|
+void mixed_f () [[arm::out("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*mixed_g) () [[arm::in("za")]];
|
|||
|
|
+extern void (*mixed_g) () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+extern void (*mixed_h) () [[arm::preserves("za")]];
|
|||
|
|
+extern void (*mixed_h) () [[arm::out("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void contradiction_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+void contradiction_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+
|
|||
|
|
+int [[arm::inout("za")]] int_attr; // { dg-warning "only applies to function types" }
|
|||
|
|
+void *[[arm::preserves("za")]] ptr_attr; // { dg-warning "only applies to function types" }
|
|||
|
|
+
|
|||
|
|
+typedef void preserved_callback () [[arm::preserves("za")]];
|
|||
|
|
+typedef void shared_callback () [[arm::inout("za")]];
|
|||
|
|
+
|
|||
|
|
+void (*preserved_callback_ptr) () [[arm::preserves("za")]];
|
|||
|
|
+void (*shared_callback_ptr) () [[arm::inout("za")]];
|
|||
|
|
+
|
|||
|
|
+typedef void contradiction_callback_1 () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+typedef void contradiction_callback_2 () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+
|
|||
|
|
+void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+
|
|||
|
|
+struct s {
|
|||
|
|
+ void (*contradiction_callback_ptr_1) () [[arm::preserves("za"), arm::inout("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+ void (*contradiction_callback_ptr_2) () [[arm::inout("za"), arm::preserves("za")]]; // { dg-error "inconsistent attributes for state 'za'" }
|
|||
|
|
+};
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void keyword_ok_1 () __arm_inout("za");
|
|||
|
|
+void keyword_ok_1 () __arm_inout("za");
|
|||
|
|
+
|
|||
|
|
+void keyword_ok_2 () __arm_in("za");
|
|||
|
|
+void keyword_ok_2 () [[arm::in("za")]];
|
|||
|
|
+
|
|||
|
|
+void keyword_ok_3 () [[arm::out("za")]];
|
|||
|
|
+void keyword_ok_3 () __arm_out("za");
|
|||
|
|
+
|
|||
|
|
+void keyword_ok_4 () __arm_inout("za") [[arm::inout("za")]];
|
|||
|
|
+
|
|||
|
|
+void keyword_ok_5 () __arm_preserves("za");
|
|||
|
|
+void keyword_ok_5 () [[arm::preserves("za")]];
|
|||
|
|
+
|
|||
|
|
+__arm_new("za") void keyword_ok_6 () {}
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+void keyword_conflict_1 () __arm_inout("za");
|
|||
|
|
+void keyword_conflict_1 (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void keyword_conflict_2 ();
|
|||
|
|
+void keyword_conflict_2 () __arm_inout("za"); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void keyword_conflict_3 () __arm_inout("za");
|
|||
|
|
+void keyword_conflict_3 () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void keyword_conflict_4 () [[arm::preserves("za")]];
|
|||
|
|
+void keyword_conflict_4 () __arm_inout("za"); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+__arm_new("za") void keyword_conflict_5 () __arm_inout("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+__arm_new("za") void keyword_conflict_6 () __arm_preserves("za") {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..572ff309f
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_2.c
|
|||
|
|
@@ -0,0 +1,73 @@
|
|||
|
|
+// { dg-options "" }
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void new_za_a ();
|
|||
|
|
+void new_za_a ();
|
|||
|
|
+
|
|||
|
|
+void new_za_b ();
|
|||
|
|
+[[arm::new("za")]] void new_za_b ();
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void new_za_c ();
|
|||
|
|
+void new_za_c () {}
|
|||
|
|
+
|
|||
|
|
+void new_za_d ();
|
|||
|
|
+[[arm::new("za")]] void new_za_d () {}
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void new_za_e () {}
|
|||
|
|
+void new_za_e ();
|
|||
|
|
+
|
|||
|
|
+void new_za_f () {}
|
|||
|
|
+[[arm::new("za")]] void new_za_f (); // { dg-error "cannot apply attribute 'new' to 'new_za_f' after the function has been defined" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void shared_a ();
|
|||
|
|
+void shared_a () [[arm::inout("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_b () [[arm::inout("za")]];
|
|||
|
|
+[[arm::new("za")]] void shared_b (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void shared_c ();
|
|||
|
|
+void shared_c () [[arm::in("za")]] {} // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_d () [[arm::in("za")]];
|
|||
|
|
+[[arm::new("za")]] void shared_d () {} // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void shared_e () {}
|
|||
|
|
+void shared_e () [[arm::out("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_f () [[arm::out("za")]] {}
|
|||
|
|
+[[arm::new("za")]] void shared_f (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void shared_g () {}
|
|||
|
|
+void shared_g () [[arm::preserves("za")]]; // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+void shared_h () [[arm::preserves("za")]] {}
|
|||
|
|
+[[arm::new("za")]] void shared_h (); // { dg-error "conflicting types" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void contradiction_1 () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+void contradiction_2 [[arm::new("za")]] () [[arm::inout("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+[[arm::new("za")]] void contradiction_3 () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+void contradiction_4 [[arm::new("za")]] () [[arm::preserves("za")]]; // { dg-error "cannot create a new 'za' scope since 'za' is shared with callers" }
|
|||
|
|
+
|
|||
|
|
+int [[arm::new("za")]] int_attr; // { dg-warning "does not apply to types" }
|
|||
|
|
+[[arm::new("za")]] int int_var_attr; // { dg-error "applies only to function definitions" }
|
|||
|
|
+typedef void new_za_callback () [[arm::new("za")]]; // { dg-warning "does not apply to types" }
|
|||
|
|
+[[arm::new("za")]] void (*new_za_var_callback) (); // { dg-error "applies only to function definitions" }
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void complementary_1 () [[arm::streaming]] {}
|
|||
|
|
+void complementary_2 [[arm::new("za")]] () [[arm::streaming]] {}
|
|||
|
|
+[[arm::new("za")]] void complementary_3 () [[arm::streaming_compatible]] {}
|
|||
|
|
+void complementary_4 [[arm::new("za")]] () [[arm::streaming_compatible]] {}
|
|||
|
|
+
|
|||
|
|
+//----------------------------------------------------------------------------
|
|||
|
|
+
|
|||
|
|
+#pragma GCC target "+nosme"
|
|||
|
|
+
|
|||
|
|
+[[arm::new("za")]] void bereft_1 ();
|
|||
|
|
+[[arm::new("za")]] void bereft_2 () {} // { dg-error "functions with SME state require the ISA extension 'sme'" }
|
|||
|
|
+void bereft_3 () [[arm::inout("za")]];
|
|||
|
|
+void bereft_4 () [[arm::inout("za")]] {} // { dg-error "functions with SME state require the ISA extension 'sme'" }
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..203f6ae8a
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_3.c
|
|||
|
|
@@ -0,0 +1,31 @@
|
|||
|
|
+// { dg-options "" }
|
|||
|
|
+
|
|||
|
|
+void normal_callee ();
|
|||
|
|
+void in_callee () [[arm::in("za")]];
|
|||
|
|
+void out_callee () [[arm::out("za")]];
|
|||
|
|
+void inout_callee () [[arm::inout("za")]];
|
|||
|
|
+void preserves_callee () [[arm::preserves("za")]];
|
|||
|
|
+
|
|||
|
|
+struct callbacks {
|
|||
|
|
+ void (*normal_ptr) ();
|
|||
|
|
+ void (*in_ptr) () [[arm::in("za")]];
|
|||
|
|
+ void (*out_ptr) () [[arm::out("za")]];
|
|||
|
|
+ void (*inout_ptr) () [[arm::inout("za")]];
|
|||
|
|
+ void (*preserves_ptr) () [[arm::preserves("za")]];
|
|||
|
|
+};
|
|||
|
|
+
|
|||
|
|
+void
|
|||
|
|
+normal_caller (struct callbacks *c)
|
|||
|
|
+{
|
|||
|
|
+ normal_callee ();
|
|||
|
|
+ in_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ out_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ inout_callee (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ preserves_callee (); // { dg-error {call to a function that shares SME state from a function that has no SME state} }
|
|||
|
|
+
|
|||
|
|
+ c->normal_ptr ();
|
|||
|
|
+ c->in_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ c->out_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ c->inout_ptr (); // { dg-error {call to a function that shares 'za' state from a function that has no 'za' state} }
|
|||
|
|
+ c->preserves_ptr (); // { dg-error {call to a function that shares SME state from a function that has no SME state} }
|
|||
|
|
+}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..cec0abf0e
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_4.c
|
|||
|
|
@@ -0,0 +1,585 @@
|
|||
|
|
+// { dg-options "-O -fno-optimize-sibling-calls" }
|
|||
|
|
+// { dg-final { check-function-bodies "**" "" } }
|
|||
|
|
+
|
|||
|
|
+void private_za();
|
|||
|
|
+void out_za() __arm_out("za");
|
|||
|
|
+void in_za() __arm_in("za");
|
|||
|
|
+void inout_za() __arm_inout("za");
|
|||
|
|
+void preserves_za() __arm_preserves("za");
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test1:
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test1()
|
|||
|
|
+{
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test2:
|
|||
|
|
+** ldr w0, \[x0\]
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int test2(int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ return *ptr;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test3:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** mov x29, sp
|
|||
|
|
+** bl private_za
|
|||
|
|
+** (
|
|||
|
|
+** mov w0, 0
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** |
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** mov w0, 0
|
|||
|
|
+** )
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") int test3()
|
|||
|
|
+{
|
|||
|
|
+ private_za();
|
|||
|
|
+ return 0;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test4:
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x0, tpidr2_el0
|
|||
|
|
+** cbz x0, [^\n]+
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** zero { za }
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test4()
|
|||
|
|
+{
|
|||
|
|
+ in_za(); // Uses zeroed contents.
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test5:
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x0, tpidr2_el0
|
|||
|
|
+** cbz x0, [^\n]+
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test5()
|
|||
|
|
+{
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+// Despite the long test, there shouldn't be too much scope for variation
|
|||
|
|
+// here. The point is both to test correctness and code quality.
|
|||
|
|
+/*
|
|||
|
|
+** test6:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** mov x29, sp
|
|||
|
|
+** mrs x0, tpidr2_el0
|
|||
|
|
+** cbz x0, [^\n]+
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** rdsvl (x[0-9]+), #1
|
|||
|
|
+** mul (x[0-9]+), \1, \1
|
|||
|
|
+** sub sp, sp, \2
|
|||
|
|
+** mov (x[0-9]+), sp
|
|||
|
|
+** stp \3, \1, \[x29, #?16\]
|
|||
|
|
+** add (x[0-9]+), x29, #?16
|
|||
|
|
+** msr tpidr2_el0, \4
|
|||
|
|
+** bl private_za
|
|||
|
|
+** (
|
|||
|
|
+** add (x[0-9]+), x29, #?16
|
|||
|
|
+** mrs (x[0-9]+), tpidr2_el0
|
|||
|
|
+** cbnz \6, [^\n]+
|
|||
|
|
+** smstart za
|
|||
|
|
+** mov x0, \5
|
|||
|
|
+** |
|
|||
|
|
+** add x0, x29, #?16
|
|||
|
|
+** mrs (x[0-9]+), tpidr2_el0
|
|||
|
|
+** cbnz \6, [^\n]+
|
|||
|
|
+** smstart za
|
|||
|
|
+** )
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** mov sp, x29
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test6()
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+// Rely on previous tests for the part leading up to the smstart.
|
|||
|
|
+/*
|
|||
|
|
+** test7:
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test7()
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test8:
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test8()
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test9:
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** add x[0-9]+, x29, #?16
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test9()
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test10:
|
|||
|
|
+** ldr (w[0-9]+), \[x0\]
|
|||
|
|
+** cbz \1, [^\n]+
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** add [^\n]+
|
|||
|
|
+** str [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test10(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr != 0, 1))
|
|||
|
|
+ *ptr = *ptr + 1;
|
|||
|
|
+ else
|
|||
|
|
+ inout_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test11:
|
|||
|
|
+** ...
|
|||
|
|
+** ldr w[0-9]+, [^\n]+
|
|||
|
|
+** add (w[0-9]+), [^\n]+
|
|||
|
|
+** str \1, [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** ret
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ldr (w[0-9]+), [^\n]+
|
|||
|
|
+** cbnz \2, [^\n]+
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test11(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr == 0, 0))
|
|||
|
|
+ do
|
|||
|
|
+ inout_za();
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ else
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+__arm_new("za") void test12(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test13:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** smstart za
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test13(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ private_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test14:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test14(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ inout_za();
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test15:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test15(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test16:
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** b [^\n]+
|
|||
|
|
+-- loop:
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+-- loop_entry:
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test16(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test17:
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test17(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test18:
|
|||
|
|
+** ldr w[0-9]+, [^\n]+
|
|||
|
|
+** cbnz w[0-9]+, [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test18(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr, 0))
|
|||
|
|
+ {
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test19:
|
|||
|
|
+** ...
|
|||
|
|
+** ldr w[0-9]+, [^\n]+
|
|||
|
|
+** cbz w[0-9]+, [^\n]+
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test19(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr != 0, 1))
|
|||
|
|
+ private_za();
|
|||
|
|
+ else
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test20:
|
|||
|
|
+** ...
|
|||
|
|
+** bl a20
|
|||
|
|
+** (?:(?!x0).)*
|
|||
|
|
+** bl b20
|
|||
|
|
+** ...
|
|||
|
|
+** mov ([wx][0-9]+), [wx]0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** mov [wx]0, \1
|
|||
|
|
+** ...
|
|||
|
|
+** bl c20
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test20()
|
|||
|
|
+{
|
|||
|
|
+ extern int a20() __arm_inout("za");
|
|||
|
|
+ extern int b20(int);
|
|||
|
|
+ extern void c20(int) __arm_inout("za");
|
|||
|
|
+ c20(b20(a20()));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test21:
|
|||
|
|
+** ...
|
|||
|
|
+** bl a21
|
|||
|
|
+** (?:(?!x0).)*
|
|||
|
|
+** bl b21
|
|||
|
|
+** ...
|
|||
|
|
+** mov (x[0-9]+), x0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** mov x0, \1
|
|||
|
|
+** ...
|
|||
|
|
+** bl c21
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test21()
|
|||
|
|
+{
|
|||
|
|
+ extern __UINT64_TYPE__ a21() __arm_inout("za");
|
|||
|
|
+ extern __UINT64_TYPE__ b21(__UINT64_TYPE__);
|
|||
|
|
+ extern void c21(__UINT64_TYPE__) __arm_inout("za");
|
|||
|
|
+ c21(b21(a21()));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test22:
|
|||
|
|
+** (?:(?!rdsvl).)*
|
|||
|
|
+** rdsvl x[0-9]+, #1
|
|||
|
|
+** (?:(?!rdsvl).)*
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test22(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ inout_za();
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ else
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test23:
|
|||
|
|
+** (?:(?!__arm_tpidr2_save).)*
|
|||
|
|
+** bl __arm_tpidr2_save
|
|||
|
|
+** (?:(?!__arm_tpidr2_save).)*
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test23(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ else
|
|||
|
|
+ inout_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test24:
|
|||
|
|
+** ...
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+__arm_new("za") void test24()
|
|||
|
|
+{
|
|||
|
|
+ in_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ out_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ private_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..d54840d3d
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_5.c
|
|||
|
|
@@ -0,0 +1,595 @@
|
|||
|
|
+// { dg-options "-O2 -fno-optimize-sibling-calls" }
|
|||
|
|
+// { dg-final { check-function-bodies "**" "" } }
|
|||
|
|
+
|
|||
|
|
+void private_za();
|
|||
|
|
+void out_za() __arm_out("za");
|
|||
|
|
+void in_za() __arm_in("za");
|
|||
|
|
+void inout_za() __arm_inout("za");
|
|||
|
|
+void preserves_za() __arm_preserves("za");
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test1:
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test1() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test2:
|
|||
|
|
+** ldr w0, \[x0\]
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+int test2(int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ return *ptr;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test3:
|
|||
|
|
+** ...
|
|||
|
|
+** sub sp, sp, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+int test3() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ private_za();
|
|||
|
|
+ return 0;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test4:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test4() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test5:
|
|||
|
|
+** ...
|
|||
|
|
+** smstop za
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** sub sp, sp, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test5() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test6:
|
|||
|
|
+** ...
|
|||
|
|
+** bl out_za
|
|||
|
|
+** ...
|
|||
|
|
+** sub sp, sp, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test6() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test7:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test7() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test8:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** smstop za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** smstart za
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** sub sp, sp, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test8() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test9:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** bl out_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test9() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ out_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test10:
|
|||
|
|
+** ldr (w[0-9]+), \[x0\]
|
|||
|
|
+** cbz \1, [^\n]+
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** add [^\n]+
|
|||
|
|
+** str [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test10(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr != 0, 1))
|
|||
|
|
+ *ptr = *ptr + 1;
|
|||
|
|
+ else
|
|||
|
|
+ inout_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test11:
|
|||
|
|
+** (?!.*(\t__arm|\tza|tpidr2_el0)).*
|
|||
|
|
+*/
|
|||
|
|
+void test11(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr == 0, 0))
|
|||
|
|
+ do
|
|||
|
|
+ inout_za();
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ else
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+void test12(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test13:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+-- loop:
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** smstart za
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test13(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ private_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test14:
|
|||
|
|
+** ...
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test14(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ inout_za();
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test15:
|
|||
|
|
+** ...
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ldr [^\n]+
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test15(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test16:
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** stp [^\n]+
|
|||
|
|
+** ...
|
|||
|
|
+** b [^\n]+
|
|||
|
|
+-- loop:
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+-- loop_entry:
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test16(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test17:
|
|||
|
|
+** ...
|
|||
|
|
+-- loop:
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** smstart za
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+** cbnz [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** [^\n]+
|
|||
|
|
+** ldp [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test17(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ while (*ptr)
|
|||
|
|
+ ptr += 1;
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test18:
|
|||
|
|
+** ldr w[0-9]+, [^\n]+
|
|||
|
|
+** cbnz w[0-9]+, [^\n]+
|
|||
|
|
+** ret
|
|||
|
|
+** ...
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, xzr
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test18(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr, 0))
|
|||
|
|
+ {
|
|||
|
|
+ out_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+void test19(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ if (__builtin_expect (*ptr != 0, 1))
|
|||
|
|
+ private_za();
|
|||
|
|
+ else
|
|||
|
|
+ do
|
|||
|
|
+ {
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ }
|
|||
|
|
+ while (*ptr);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test20:
|
|||
|
|
+** ...
|
|||
|
|
+** bl a20
|
|||
|
|
+** (?:(?!x0).)*
|
|||
|
|
+** bl b20
|
|||
|
|
+** ...
|
|||
|
|
+** mov ([wx][0-9]+), [wx]0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** mov [wx]0, \1
|
|||
|
|
+** ...
|
|||
|
|
+** bl c20
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test20() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ extern int a20() __arm_inout("za");
|
|||
|
|
+ extern int b20(int);
|
|||
|
|
+ extern void c20(int) __arm_inout("za");
|
|||
|
|
+ c20(b20(a20()));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test21:
|
|||
|
|
+** ...
|
|||
|
|
+** bl a21
|
|||
|
|
+** (?:(?!x0).)*
|
|||
|
|
+** bl b21
|
|||
|
|
+** ...
|
|||
|
|
+** mov (x[0-9]+), x0
|
|||
|
|
+** ...
|
|||
|
|
+** bl __arm_tpidr2_restore
|
|||
|
|
+** ...
|
|||
|
|
+** mov x0, \1
|
|||
|
|
+** ...
|
|||
|
|
+** bl c21
|
|||
|
|
+** ...
|
|||
|
|
+*/
|
|||
|
|
+void test21() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ extern __UINT64_TYPE__ a21() __arm_inout("za");
|
|||
|
|
+ extern __UINT64_TYPE__ b21(__UINT64_TYPE__);
|
|||
|
|
+ extern void c21(__UINT64_TYPE__) __arm_inout("za");
|
|||
|
|
+ c21(b21(a21()));
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test22:
|
|||
|
|
+** (?:(?!rdsvl).)*
|
|||
|
|
+** rdsvl x[0-9]+, #1
|
|||
|
|
+** (?:(?!rdsvl).)*
|
|||
|
|
+*/
|
|||
|
|
+void test22(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ inout_za();
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ else
|
|||
|
|
+ private_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ in_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+void test23(volatile int *ptr) __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ else
|
|||
|
|
+ inout_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+/*
|
|||
|
|
+** test24:
|
|||
|
|
+** ...
|
|||
|
|
+** bl in_za
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** bl out_za
|
|||
|
|
+** bl inout_za
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** incb x1
|
|||
|
|
+** ...
|
|||
|
|
+** msr tpidr2_el0, x[0-9]+
|
|||
|
|
+** ...
|
|||
|
|
+** bl private_za
|
|||
|
|
+** ...
|
|||
|
|
+** mrs x[0-9]+, tpidr2_el0
|
|||
|
|
+** ...
|
|||
|
|
+** ret
|
|||
|
|
+*/
|
|||
|
|
+void test24() __arm_inout("za")
|
|||
|
|
+{
|
|||
|
|
+ in_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ out_za();
|
|||
|
|
+ inout_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ private_za();
|
|||
|
|
+ asm ("incb\tx1" ::: "x1", "za");
|
|||
|
|
+ in_za();
|
|||
|
|
+ private_za();
|
|||
|
|
+}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..d5b226ae1
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/sme/za_state_6.c
|
|||
|
|
@@ -0,0 +1,23 @@
|
|||
|
|
+// { dg-options "-O -fno-optimize-sibling-calls -fomit-frame-pointer" }
|
|||
|
|
+
|
|||
|
|
+void private_za();
|
|||
|
|
+void out_za() __arm_out("za");
|
|||
|
|
+void in_za() __arm_in("za");
|
|||
|
|
+
|
|||
|
|
+__arm_new("za") void test20(volatile int *ptr)
|
|||
|
|
+{
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ out_za();
|
|||
|
|
+ else
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+ if (*ptr)
|
|||
|
|
+ in_za();
|
|||
|
|
+ else
|
|||
|
|
+ *ptr += 1;
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+// { dg-final { scan-assembler {\tbl\t__arm_tpidr2_save\n} } }
|
|||
|
|
+// { dg-final { scan-assembler {\tsmstart\tza\n} } }
|
|||
|
|
+// { dg-final { scan-assembler {\tsmstop\tza\n} } }
|
|||
|
|
+// { dg-final { scan-assembler-not {\tsub\tsp, sp, x[0-9]+\n} } }
|
|||
|
|
--
|
|||
|
|
2.33.0
|
|||
|
|
|