[SME] Apply SME patches
This commit is contained in:
parent
0795cf6283
commit
57e4b61ee6
336
0097-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
Normal file
336
0097-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
Normal file
@ -0,0 +1,336 @@
|
||||
From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001
|
||||
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||
Date: Wed, 18 May 2022 16:02:12 +0100
|
||||
Subject: [PATCH 005/157] [Backport][SME] AArch64: Cleanup CPU option
|
||||
processing code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f
|
||||
|
||||
The --with-cpu/--with-arch configure option processing not only checks valid
|
||||
arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask.
|
||||
This isn't used however since a --with-cpu is translated into a -mcpu option
|
||||
which is processed as if written on the command-line (so TARGET_CPU_DEFAULT
|
||||
is never accessed).
|
||||
|
||||
So remove all the complex processing and bitmask, and just validate the
|
||||
option. Fix a bug that always reports valid architecture extensions as invalid.
|
||||
As a result the CPU processing in aarch64.c can be simplified.
|
||||
|
||||
gcc/
|
||||
* config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch
|
||||
processing. Add support for architectural extensions.
|
||||
* config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove
|
||||
AARCH64_CPU_DEFAULT_FLAGS.
|
||||
(TARGET_CPU_NBITS): Remove.
|
||||
(TARGET_CPU_MASK): Remove.
|
||||
* config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define.
|
||||
(get_tune_cpu): Assert CPU is always valid.
|
||||
(get_arch): Assert architecture is always valid.
|
||||
(aarch64_override_options): Cleanup CPU selection code and simplify logic.
|
||||
(aarch64_option_restore): Remove unnecessary checks on tune.
|
||||
---
|
||||
gcc/config.gcc | 43 +------------
|
||||
gcc/config/aarch64/aarch64.cc | 115 +++++++++-------------------------
|
||||
gcc/config/aarch64/aarch64.h | 9 +--
|
||||
3 files changed, 32 insertions(+), 135 deletions(-)
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 8fdde1576..3be450471 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -4190,8 +4190,6 @@ case "${target}" in
|
||||
pattern=AARCH64_CORE
|
||||
fi
|
||||
|
||||
- ext_mask=AARCH64_CPU_DEFAULT_FLAGS
|
||||
-
|
||||
# Find the base CPU or ARCH id in aarch64-cores.def or
|
||||
# aarch64-arches.def
|
||||
if [ x"$base_val" = x ] \
|
||||
@@ -4199,23 +4197,6 @@ case "${target}" in
|
||||
${srcdir}/config/aarch64/$def \
|
||||
> /dev/null; then
|
||||
|
||||
- if [ $which = arch ]; then
|
||||
- base_id=`grep "^$pattern(\"$base_val\"," \
|
||||
- ${srcdir}/config/aarch64/$def | \
|
||||
- sed -e 's/^[^,]*,[ ]*//' | \
|
||||
- sed -e 's/,.*$//'`
|
||||
- # Extract the architecture flags from aarch64-arches.def
|
||||
- ext_mask=`grep "^$pattern(\"$base_val\"," \
|
||||
- ${srcdir}/config/aarch64/$def | \
|
||||
- sed -e 's/)$//' | \
|
||||
- sed -e 's/^.*,//'`
|
||||
- else
|
||||
- base_id=`grep "^$pattern(\"$base_val\"," \
|
||||
- ${srcdir}/config/aarch64/$def | \
|
||||
- sed -e 's/^[^,]*,[ ]*//' | \
|
||||
- sed -e 's/,.*$//'`
|
||||
- fi
|
||||
-
|
||||
# Disallow extensions in --with-tune=cortex-a53+crc.
|
||||
if [ $which = tune ] && [ x"$ext_val" != x ]; then
|
||||
echo "Architecture extensions not supported in --with-$which=$val" 1>&2
|
||||
@@ -4246,25 +4227,7 @@ case "${target}" in
|
||||
grep "^\"$base_ext\""`
|
||||
|
||||
if [ x"$base_ext" = x ] \
|
||||
- || [[ -n $opt_line ]]; then
|
||||
-
|
||||
- # These regexp extract the elements based on
|
||||
- # their group match index in the regexp.
|
||||
- ext_canon=`echo -e "$opt_line" | \
|
||||
- sed -e "s/$sed_patt/\2/"`
|
||||
- ext_on=`echo -e "$opt_line" | \
|
||||
- sed -e "s/$sed_patt/\3/"`
|
||||
- ext_off=`echo -e "$opt_line" | \
|
||||
- sed -e "s/$sed_patt/\4/"`
|
||||
-
|
||||
- if [ $ext = $base_ext ]; then
|
||||
- # Adding extension
|
||||
- ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")"
|
||||
- else
|
||||
- # Removing extension
|
||||
- ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")"
|
||||
- fi
|
||||
-
|
||||
+ || [ x"$opt_line" != x ]; then
|
||||
true
|
||||
else
|
||||
echo "Unknown extension used in --with-$which=$val" 1>&2
|
||||
@@ -4273,10 +4236,6 @@ case "${target}" in
|
||||
ext_val=`echo $ext_val | sed -e 's/[a-z0-9]\+//'`
|
||||
done
|
||||
|
||||
- ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)"
|
||||
- if [ x"$base_id" != x ]; then
|
||||
- target_cpu_cname="TARGET_CPU_$base_id | $ext_mask"
|
||||
- fi
|
||||
true
|
||||
else
|
||||
# Allow --with-$which=native.
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 7c62ddb2a..ba888beb0 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table[] =
|
||||
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
|
||||
};
|
||||
|
||||
-#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
|
||||
-
|
||||
/* An ISA extension in the co-processor and main instruction set space. */
|
||||
struct aarch64_option_extension
|
||||
{
|
||||
@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
|
||||
return false;
|
||||
}
|
||||
|
||||
-static_assert (TARGET_CPU_generic < TARGET_CPU_MASK,
|
||||
- "TARGET_CPU_NBITS is big enough");
|
||||
-
|
||||
-/* Return the CPU corresponding to the enum CPU.
|
||||
- If it doesn't specify a cpu, return the default. */
|
||||
+/* Return the CPU corresponding to the enum CPU. */
|
||||
|
||||
static const struct processor *
|
||||
aarch64_get_tune_cpu (enum aarch64_processor cpu)
|
||||
{
|
||||
- if (cpu != aarch64_none)
|
||||
- return &all_cores[cpu];
|
||||
+ gcc_assert (cpu != aarch64_none);
|
||||
|
||||
- /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that
|
||||
- encode the default cpu as selected by the --with-cpu GCC configure option
|
||||
- in config.gcc.
|
||||
- ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
|
||||
- flags mechanism should be reworked to make it more sane. */
|
||||
- return &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
|
||||
+ return &all_cores[cpu];
|
||||
}
|
||||
|
||||
-/* Return the architecture corresponding to the enum ARCH.
|
||||
- If it doesn't specify a valid architecture, return the default. */
|
||||
+/* Return the architecture corresponding to the enum ARCH. */
|
||||
|
||||
static const struct processor *
|
||||
aarch64_get_arch (enum aarch64_arch arch)
|
||||
{
|
||||
- if (arch != aarch64_no_arch)
|
||||
- return &all_architectures[arch];
|
||||
-
|
||||
- const struct processor *cpu
|
||||
- = &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
|
||||
+ gcc_assert (arch != aarch64_no_arch);
|
||||
|
||||
- return &all_architectures[cpu->arch];
|
||||
+ return &all_architectures[arch];
|
||||
}
|
||||
|
||||
/* Return the VG value associated with -msve-vector-bits= value VALUE. */
|
||||
@@ -18481,10 +18464,6 @@ aarch64_override_options (void)
|
||||
uint64_t arch_isa = 0;
|
||||
aarch64_isa_flags = 0;
|
||||
|
||||
- bool valid_cpu = true;
|
||||
- bool valid_tune = true;
|
||||
- bool valid_arch = true;
|
||||
-
|
||||
selected_cpu = NULL;
|
||||
selected_arch = NULL;
|
||||
selected_tune = NULL;
|
||||
@@ -18499,77 +18478,56 @@ aarch64_override_options (void)
|
||||
If either of -march or -mtune is given, they override their
|
||||
respective component of -mcpu. */
|
||||
if (aarch64_cpu_string)
|
||||
- valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
|
||||
- &cpu_isa);
|
||||
+ aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
|
||||
|
||||
if (aarch64_arch_string)
|
||||
- valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
|
||||
- &arch_isa);
|
||||
+ aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
|
||||
|
||||
if (aarch64_tune_string)
|
||||
- valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
|
||||
+ aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
|
||||
|
||||
#ifdef SUBTARGET_OVERRIDE_OPTIONS
|
||||
SUBTARGET_OVERRIDE_OPTIONS;
|
||||
#endif
|
||||
|
||||
- /* If the user did not specify a processor, choose the default
|
||||
- one for them. This will be the CPU set during configuration using
|
||||
- --with-cpu, otherwise it is "generic". */
|
||||
- if (!selected_cpu)
|
||||
- {
|
||||
- if (selected_arch)
|
||||
- {
|
||||
- selected_cpu = &all_cores[selected_arch->ident];
|
||||
- aarch64_isa_flags = arch_isa;
|
||||
- explicit_arch = selected_arch->arch;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- /* Get default configure-time CPU. */
|
||||
- selected_cpu = aarch64_get_tune_cpu (aarch64_none);
|
||||
- aarch64_isa_flags = TARGET_CPU_DEFAULT >> TARGET_CPU_NBITS;
|
||||
- }
|
||||
-
|
||||
- if (selected_tune)
|
||||
- explicit_tune_core = selected_tune->ident;
|
||||
- }
|
||||
- /* If both -mcpu and -march are specified check that they are architecturally
|
||||
- compatible, warn if they're not and prefer the -march ISA flags. */
|
||||
- else if (selected_arch)
|
||||
+ if (selected_cpu && selected_arch)
|
||||
{
|
||||
+ /* If both -mcpu and -march are specified, warn if they are not
|
||||
+ architecturally compatible and prefer the -march ISA flags. */
|
||||
if (selected_arch->arch != selected_cpu->arch)
|
||||
{
|
||||
warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
|
||||
aarch64_cpu_string,
|
||||
aarch64_arch_string);
|
||||
}
|
||||
+
|
||||
aarch64_isa_flags = arch_isa;
|
||||
- explicit_arch = selected_arch->arch;
|
||||
- explicit_tune_core = selected_tune ? selected_tune->ident
|
||||
- : selected_cpu->ident;
|
||||
}
|
||||
- else
|
||||
+ else if (selected_cpu)
|
||||
{
|
||||
- /* -mcpu but no -march. */
|
||||
- aarch64_isa_flags = cpu_isa;
|
||||
- explicit_tune_core = selected_tune ? selected_tune->ident
|
||||
- : selected_cpu->ident;
|
||||
- gcc_assert (selected_cpu);
|
||||
selected_arch = &all_architectures[selected_cpu->arch];
|
||||
- explicit_arch = selected_arch->arch;
|
||||
+ aarch64_isa_flags = cpu_isa;
|
||||
}
|
||||
-
|
||||
- /* Set the arch as well as we will need it when outputing
|
||||
- the .arch directive in assembly. */
|
||||
- if (!selected_arch)
|
||||
+ else if (selected_arch)
|
||||
{
|
||||
- gcc_assert (selected_cpu);
|
||||
+ selected_cpu = &all_cores[selected_arch->ident];
|
||||
+ aarch64_isa_flags = arch_isa;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* No -mcpu or -march specified, so use the default CPU. */
|
||||
+ selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
|
||||
selected_arch = &all_architectures[selected_cpu->arch];
|
||||
+ aarch64_isa_flags = selected_cpu->flags;
|
||||
}
|
||||
|
||||
+ explicit_arch = selected_arch->arch;
|
||||
if (!selected_tune)
|
||||
selected_tune = selected_cpu;
|
||||
+ explicit_tune_core = selected_tune->ident;
|
||||
+
|
||||
+ gcc_assert (explicit_tune_core != aarch64_none);
|
||||
+ gcc_assert (explicit_arch != aarch64_no_arch);
|
||||
|
||||
if (aarch64_enable_bti == 2)
|
||||
{
|
||||
@@ -18605,15 +18563,6 @@ aarch64_override_options (void)
|
||||
if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
|
||||
sorry ("return address signing is only supported for %<-mabi=lp64%>");
|
||||
|
||||
- /* Make sure we properly set up the explicit options. */
|
||||
- if ((aarch64_cpu_string && valid_cpu)
|
||||
- || (aarch64_tune_string && valid_tune))
|
||||
- gcc_assert (explicit_tune_core != aarch64_none);
|
||||
-
|
||||
- if ((aarch64_cpu_string && valid_cpu)
|
||||
- || (aarch64_arch_string && valid_arch))
|
||||
- gcc_assert (explicit_arch != aarch64_no_arch);
|
||||
-
|
||||
/* The pass to insert speculation tracking runs before
|
||||
shrink-wrapping and the latter does not know how to update the
|
||||
tracking status. So disable it in this case. */
|
||||
@@ -18719,11 +18668,7 @@ aarch64_option_restore (struct gcc_options *opts,
|
||||
opts->x_explicit_arch = ptr->x_explicit_arch;
|
||||
selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
|
||||
opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
|
||||
- if (opts->x_explicit_tune_core == aarch64_none
|
||||
- && opts->x_explicit_arch != aarch64_no_arch)
|
||||
- selected_tune = &all_cores[selected_arch->ident];
|
||||
- else
|
||||
- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
|
||||
+ selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
|
||||
opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
|
||||
opts->x_aarch64_branch_protection_string
|
||||
= ptr->x_aarch64_branch_protection_string;
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 6834c3e99..14e2af054 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -811,16 +811,9 @@ enum target_cpus
|
||||
TARGET_CPU_generic
|
||||
};
|
||||
|
||||
-/* Define how many bits are used to represent the CPU in TARGET_CPU_DEFAULT.
|
||||
- This needs to be big enough to fit the value of TARGET_CPU_generic.
|
||||
- All bits after this are used to represent the AARCH64_CPU_DEFAULT_FLAGS. */
|
||||
-#define TARGET_CPU_NBITS 8
|
||||
-#define TARGET_CPU_MASK ((1 << TARGET_CPU_NBITS) - 1)
|
||||
-
|
||||
/* If there is no CPU defined at configure, use generic as default. */
|
||||
#ifndef TARGET_CPU_DEFAULT
|
||||
-#define TARGET_CPU_DEFAULT \
|
||||
- (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << TARGET_CPU_NBITS))
|
||||
+# define TARGET_CPU_DEFAULT TARGET_CPU_generic
|
||||
#endif
|
||||
|
||||
/* If inserting NOP before a mult-accumulate insn remember to adjust the
|
||||
--
|
||||
2.33.0
|
||||
|
||||
528
0098-Backport-SME-AArch64-Cleanup-option-processing-code.patch
Normal file
528
0098-Backport-SME-AArch64-Cleanup-option-processing-code.patch
Normal file
@ -0,0 +1,528 @@
|
||||
From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001
|
||||
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||
Date: Wed, 1 Jun 2022 16:46:36 +0100
|
||||
Subject: [PATCH 006/157] [Backport][SME] AArch64: Cleanup option processing
|
||||
code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1
|
||||
|
||||
Further cleanup option processing. Remove the duplication of global
|
||||
variables for CPU and tune settings so that CPU option processing is
|
||||
simplified even further. Move global variables that need save and
|
||||
restore due to target option processing into aarch64.opt. This removes
|
||||
the need for explicit saving/restoring and unnecessary reparsing of
|
||||
options.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.opt (explicit_tune_core): Rename to
|
||||
selected_tune.
|
||||
(explicit_arch): Rename to selected_arch.
|
||||
(x_aarch64_override_tune_string): Remove.
|
||||
(aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored.
|
||||
(aarch64_override_tune_string): Add Save so it gets saved/restored.
|
||||
* config/aarch64/aarch64.h (aarch64_architecture_version): Remove.
|
||||
* config/aarch64/aarch64.cc (aarch64_architecture_version): Remove.
|
||||
(processor): Remove archtecture_version field.
|
||||
(selected_arch): Remove global.
|
||||
(selected_cpu): Remove global.
|
||||
(selected_tune): Remove global.
|
||||
(aarch64_ra_sign_key): Move global to aarch64.opt so it is saved.
|
||||
(aarch64_override_options_internal): Use aarch64_get_tune_cpu.
|
||||
(aarch64_override_options): Further simplify code to only set
|
||||
selected_arch and selected_tune globals.
|
||||
(aarch64_option_save): Remove now that target options are saved.
|
||||
(aarch64_option_restore): Remove redundant target option restores.
|
||||
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use
|
||||
AARCH64_ISA_V9.
|
||||
* config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from...
|
||||
* config/aarch64/aarch64-protos.h (aarch64_key_type): Remove.
|
||||
(aarch64_ra_sign_key): Remove.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-c.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-opts.h | 6 +
|
||||
gcc/config/aarch64/aarch64-protos.h | 8 --
|
||||
gcc/config/aarch64/aarch64.cc | 183 ++++++++++------------------
|
||||
gcc/config/aarch64/aarch64.h | 3 -
|
||||
gcc/config/aarch64/aarch64.opt | 12 +-
|
||||
6 files changed, 76 insertions(+), 138 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
||||
index a4c407724..90d45e45d 100644
|
||||
--- a/gcc/config/aarch64/aarch64-c.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-c.cc
|
||||
@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
{
|
||||
aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
|
||||
|
||||
- builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version);
|
||||
+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
|
||||
|
||||
builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
|
||||
flag_short_enums ? 1 : 4);
|
||||
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
|
||||
index 93572fe83..421648a15 100644
|
||||
--- a/gcc/config/aarch64/aarch64-opts.h
|
||||
+++ b/gcc/config/aarch64/aarch64-opts.h
|
||||
@@ -98,4 +98,10 @@ enum stack_protector_guard {
|
||||
SSP_GLOBAL /* global canary */
|
||||
};
|
||||
|
||||
+/* The key type that -msign-return-address should use. */
|
||||
+enum aarch64_key_type {
|
||||
+ AARCH64_KEY_A,
|
||||
+ AARCH64_KEY_B
|
||||
+};
|
||||
+
|
||||
#endif
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index 475d174dd..e60ce3c36 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -672,14 +672,6 @@ enum simd_immediate_check {
|
||||
AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
|
||||
};
|
||||
|
||||
-/* The key type that -msign-return-address should use. */
|
||||
-enum aarch64_key_type {
|
||||
- AARCH64_KEY_A,
|
||||
- AARCH64_KEY_B
|
||||
-};
|
||||
-
|
||||
-extern enum aarch64_key_type aarch64_ra_sign_key;
|
||||
-
|
||||
extern struct tune_params aarch64_tune_params;
|
||||
|
||||
/* The available SVE predicate patterns, known in the ACLE as "svpattern". */
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index ba888beb0..254ecfaa2 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
|
||||
aarch64_addr_query_type);
|
||||
static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
|
||||
|
||||
-/* Major revision number of the ARM Architecture implemented by the target. */
|
||||
-unsigned aarch64_architecture_version;
|
||||
-
|
||||
/* The processor for which instructions should be scheduled. */
|
||||
enum aarch64_processor aarch64_tune = cortexa53;
|
||||
|
||||
@@ -2931,7 +2928,6 @@ struct processor
|
||||
enum aarch64_processor ident;
|
||||
enum aarch64_processor sched_core;
|
||||
enum aarch64_arch arch;
|
||||
- unsigned architecture_version;
|
||||
const uint64_t flags;
|
||||
const struct tune_params *const tune;
|
||||
};
|
||||
@@ -2940,9 +2936,9 @@ struct processor
|
||||
static const struct processor all_architectures[] =
|
||||
{
|
||||
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
|
||||
- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
|
||||
+ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL},
|
||||
#include "aarch64-arches.def"
|
||||
- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
|
||||
+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
|
||||
};
|
||||
|
||||
/* Processor cores implementing AArch64. */
|
||||
@@ -2950,23 +2946,13 @@ static const struct processor all_cores[] =
|
||||
{
|
||||
#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
|
||||
- all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
|
||||
FLAGS, &COSTS##_tunings},
|
||||
#include "aarch64-cores.def"
|
||||
- {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
|
||||
+ {"generic", generic, cortexa53, AARCH64_ARCH_8A,
|
||||
AARCH64_FL_FOR_ARCH8, &generic_tunings},
|
||||
- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
|
||||
+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
|
||||
};
|
||||
|
||||
-
|
||||
-/* Target specification. These are populated by the -march, -mtune, -mcpu
|
||||
- handling code or by target attributes. */
|
||||
-static const struct processor *selected_arch;
|
||||
-static const struct processor *selected_cpu;
|
||||
-static const struct processor *selected_tune;
|
||||
-
|
||||
-enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
|
||||
-
|
||||
/* The current tuning set. */
|
||||
struct tune_params aarch64_tune_params = generic_tunings;
|
||||
|
||||
@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void)
|
||||
/* Use the specified limit for the number of cases before using jump
|
||||
tables at higher optimization levels. */
|
||||
if (optimize > 2
|
||||
- && selected_cpu->tune->max_case_values != 0)
|
||||
- return selected_cpu->tune->max_case_values;
|
||||
+ && aarch64_tune_params.max_case_values != 0)
|
||||
+ return aarch64_tune_params.max_case_values;
|
||||
else
|
||||
return optimize_size ? 8 : 11;
|
||||
}
|
||||
@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
|
||||
return;
|
||||
}
|
||||
|
||||
+/* Return the CPU corresponding to the enum CPU. */
|
||||
+
|
||||
+static const struct processor *
|
||||
+aarch64_get_tune_cpu (enum aarch64_processor cpu)
|
||||
+{
|
||||
+ gcc_assert (cpu != aarch64_none);
|
||||
+
|
||||
+ return &all_cores[cpu];
|
||||
+}
|
||||
+
|
||||
+/* Return the architecture corresponding to the enum ARCH. */
|
||||
+
|
||||
+static const struct processor *
|
||||
+aarch64_get_arch (enum aarch64_arch arch)
|
||||
+{
|
||||
+ gcc_assert (arch != aarch64_no_arch);
|
||||
+
|
||||
+ return &all_architectures[arch];
|
||||
+}
|
||||
+
|
||||
/* Parse STRING looking for options in the format:
|
||||
string :: option:string
|
||||
option :: name=substring
|
||||
@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
|
||||
void
|
||||
aarch64_override_options_internal (struct gcc_options *opts)
|
||||
{
|
||||
- aarch64_tune_flags = selected_tune->flags;
|
||||
- aarch64_tune = selected_tune->sched_core;
|
||||
+ const struct processor *tune = aarch64_get_tune_cpu (opts->x_selected_tune);
|
||||
+ aarch64_tune_flags = tune->flags;
|
||||
+ aarch64_tune = tune->sched_core;
|
||||
/* Make a copy of the tuning parameters attached to the core, which
|
||||
we may later overwrite. */
|
||||
- aarch64_tune_params = *(selected_tune->tune);
|
||||
- aarch64_architecture_version = selected_arch->architecture_version;
|
||||
- if (selected_tune->tune == &generic_tunings)
|
||||
+ aarch64_tune_params = *(tune->tune);
|
||||
+ if (tune->tune == &generic_tunings)
|
||||
aarch64_adjust_generic_arch_tuning (aarch64_tune_params);
|
||||
|
||||
if (opts->x_aarch64_override_tune_string)
|
||||
aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
|
||||
- &aarch64_tune_params);
|
||||
+ &aarch64_tune_params);
|
||||
|
||||
/* This target defaults to strict volatile bitfields. */
|
||||
if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
|
||||
@@ -18051,13 +18057,6 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
|
||||
opts->x_flag_prefetch_loop_arrays = 1;
|
||||
|
||||
- if (opts->x_aarch64_arch_string == NULL)
|
||||
- opts->x_aarch64_arch_string = selected_arch->name;
|
||||
- if (opts->x_aarch64_cpu_string == NULL)
|
||||
- opts->x_aarch64_cpu_string = selected_cpu->name;
|
||||
- if (opts->x_aarch64_tune_string == NULL)
|
||||
- opts->x_aarch64_tune_string = selected_tune->name;
|
||||
-
|
||||
aarch64_override_options_after_change_1 (opts);
|
||||
}
|
||||
|
||||
@@ -18409,26 +18408,6 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
|
||||
return false;
|
||||
}
|
||||
|
||||
-/* Return the CPU corresponding to the enum CPU. */
|
||||
-
|
||||
-static const struct processor *
|
||||
-aarch64_get_tune_cpu (enum aarch64_processor cpu)
|
||||
-{
|
||||
- gcc_assert (cpu != aarch64_none);
|
||||
-
|
||||
- return &all_cores[cpu];
|
||||
-}
|
||||
-
|
||||
-/* Return the architecture corresponding to the enum ARCH. */
|
||||
-
|
||||
-static const struct processor *
|
||||
-aarch64_get_arch (enum aarch64_arch arch)
|
||||
-{
|
||||
- gcc_assert (arch != aarch64_no_arch);
|
||||
-
|
||||
- return &all_architectures[arch];
|
||||
-}
|
||||
-
|
||||
/* Return the VG value associated with -msve-vector-bits= value VALUE. */
|
||||
|
||||
static poly_uint16
|
||||
@@ -18464,9 +18443,9 @@ aarch64_override_options (void)
|
||||
uint64_t arch_isa = 0;
|
||||
aarch64_isa_flags = 0;
|
||||
|
||||
- selected_cpu = NULL;
|
||||
- selected_arch = NULL;
|
||||
- selected_tune = NULL;
|
||||
+ const struct processor *cpu = NULL;
|
||||
+ const struct processor *arch = NULL;
|
||||
+ const struct processor *tune = NULL;
|
||||
|
||||
if (aarch64_harden_sls_string)
|
||||
aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
|
||||
@@ -18478,56 +18457,52 @@ aarch64_override_options (void)
|
||||
If either of -march or -mtune is given, they override their
|
||||
respective component of -mcpu. */
|
||||
if (aarch64_cpu_string)
|
||||
- aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
|
||||
+ aarch64_validate_mcpu (aarch64_cpu_string, &cpu, &cpu_isa);
|
||||
|
||||
if (aarch64_arch_string)
|
||||
- aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
|
||||
+ aarch64_validate_march (aarch64_arch_string, &arch, &arch_isa);
|
||||
|
||||
if (aarch64_tune_string)
|
||||
- aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
|
||||
+ aarch64_validate_mtune (aarch64_tune_string, &tune);
|
||||
|
||||
#ifdef SUBTARGET_OVERRIDE_OPTIONS
|
||||
SUBTARGET_OVERRIDE_OPTIONS;
|
||||
#endif
|
||||
|
||||
- if (selected_cpu && selected_arch)
|
||||
+ if (cpu && arch)
|
||||
{
|
||||
/* If both -mcpu and -march are specified, warn if they are not
|
||||
architecturally compatible and prefer the -march ISA flags. */
|
||||
- if (selected_arch->arch != selected_cpu->arch)
|
||||
+ if (arch->arch != cpu->arch)
|
||||
{
|
||||
warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
|
||||
aarch64_cpu_string,
|
||||
aarch64_arch_string);
|
||||
}
|
||||
|
||||
+ selected_arch = arch->arch;
|
||||
aarch64_isa_flags = arch_isa;
|
||||
}
|
||||
- else if (selected_cpu)
|
||||
+ else if (cpu)
|
||||
{
|
||||
- selected_arch = &all_architectures[selected_cpu->arch];
|
||||
+ selected_arch = cpu->arch;
|
||||
aarch64_isa_flags = cpu_isa;
|
||||
}
|
||||
- else if (selected_arch)
|
||||
+ else if (arch)
|
||||
{
|
||||
- selected_cpu = &all_cores[selected_arch->ident];
|
||||
+ cpu = &all_cores[arch->ident];
|
||||
+ selected_arch = arch->arch;
|
||||
aarch64_isa_flags = arch_isa;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No -mcpu or -march specified, so use the default CPU. */
|
||||
- selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
|
||||
- selected_arch = &all_architectures[selected_cpu->arch];
|
||||
- aarch64_isa_flags = selected_cpu->flags;
|
||||
+ cpu = &all_cores[TARGET_CPU_DEFAULT];
|
||||
+ selected_arch = cpu->arch;
|
||||
+ aarch64_isa_flags = cpu->flags;
|
||||
}
|
||||
|
||||
- explicit_arch = selected_arch->arch;
|
||||
- if (!selected_tune)
|
||||
- selected_tune = selected_cpu;
|
||||
- explicit_tune_core = selected_tune->ident;
|
||||
-
|
||||
- gcc_assert (explicit_tune_core != aarch64_none);
|
||||
- gcc_assert (explicit_arch != aarch64_no_arch);
|
||||
+ selected_tune = tune ? tune->ident : cpu->ident;
|
||||
|
||||
if (aarch64_enable_bti == 2)
|
||||
{
|
||||
@@ -18646,38 +18621,14 @@ initialize_aarch64_code_model (struct gcc_options *opts)
|
||||
}
|
||||
}
|
||||
|
||||
-/* Implement TARGET_OPTION_SAVE. */
|
||||
-
|
||||
-static void
|
||||
-aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts,
|
||||
- struct gcc_options */* opts_set */)
|
||||
-{
|
||||
- ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
|
||||
- ptr->x_aarch64_branch_protection_string
|
||||
- = opts->x_aarch64_branch_protection_string;
|
||||
-}
|
||||
-
|
||||
/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
|
||||
using the information saved in PTR. */
|
||||
|
||||
static void
|
||||
aarch64_option_restore (struct gcc_options *opts,
|
||||
- struct gcc_options */* opts_set */,
|
||||
- struct cl_target_option *ptr)
|
||||
+ struct gcc_options * /* opts_set */,
|
||||
+ struct cl_target_option * /* ptr */)
|
||||
{
|
||||
- opts->x_explicit_arch = ptr->x_explicit_arch;
|
||||
- selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
|
||||
- opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
|
||||
- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
|
||||
- opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
|
||||
- opts->x_aarch64_branch_protection_string
|
||||
- = ptr->x_aarch64_branch_protection_string;
|
||||
- if (opts->x_aarch64_branch_protection_string)
|
||||
- {
|
||||
- aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
|
||||
- NULL);
|
||||
- }
|
||||
-
|
||||
aarch64_override_options_internal (opts);
|
||||
}
|
||||
|
||||
@@ -18687,11 +18638,11 @@ static void
|
||||
aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
|
||||
{
|
||||
const struct processor *cpu
|
||||
- = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
|
||||
- uint64_t isa_flags = ptr->x_aarch64_isa_flags;
|
||||
- const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
|
||||
+ = aarch64_get_tune_cpu (ptr->x_selected_tune);
|
||||
+ const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch);
|
||||
std::string extension
|
||||
- = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
|
||||
+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags,
|
||||
+ arch->flags);
|
||||
|
||||
fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
|
||||
fprintf (file, "%*sselected arch = %s%s\n", indent, "",
|
||||
@@ -18804,8 +18755,7 @@ aarch64_handle_attr_arch (const char *str)
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
gcc_assert (tmp_arch);
|
||||
- selected_arch = tmp_arch;
|
||||
- explicit_arch = selected_arch->arch;
|
||||
+ selected_arch = tmp_arch->arch;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -18843,11 +18793,8 @@ aarch64_handle_attr_cpu (const char *str)
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
gcc_assert (tmp_cpu);
|
||||
- selected_tune = tmp_cpu;
|
||||
- explicit_tune_core = selected_tune->ident;
|
||||
-
|
||||
- selected_arch = &all_architectures[tmp_cpu->arch];
|
||||
- explicit_arch = selected_arch->arch;
|
||||
+ selected_tune = tmp_cpu->ident;
|
||||
+ selected_arch = tmp_cpu->arch;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -18915,8 +18862,7 @@ aarch64_handle_attr_tune (const char *str)
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
gcc_assert (tmp_tune);
|
||||
- selected_tune = tmp_tune;
|
||||
- explicit_tune_core = selected_tune->ident;
|
||||
+ selected_tune = tmp_tune->ident;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -22821,7 +22767,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
|
||||
gcc_assert (targ_options);
|
||||
|
||||
const struct processor *this_arch
|
||||
- = aarch64_get_arch (targ_options->x_explicit_arch);
|
||||
+ = aarch64_get_arch (targ_options->x_selected_arch);
|
||||
|
||||
uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
|
||||
std::string extension
|
||||
@@ -22840,7 +22786,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
|
||||
useful to readers of the generated asm. Do it only when it changes
|
||||
from function to function and verbose assembly is requested. */
|
||||
const struct processor *this_tune
|
||||
- = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
|
||||
+ = aarch64_get_tune_cpu (targ_options->x_selected_tune);
|
||||
|
||||
if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
|
||||
{
|
||||
@@ -22952,7 +22898,7 @@ aarch64_start_file (void)
|
||||
= TREE_TARGET_OPTION (target_option_default_node);
|
||||
|
||||
const struct processor *default_arch
|
||||
- = aarch64_get_arch (default_options->x_explicit_arch);
|
||||
+ = aarch64_get_arch (default_options->x_selected_arch);
|
||||
uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
|
||||
std::string extension
|
||||
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
|
||||
@@ -27950,9 +27896,6 @@ aarch64_libgcc_floating_mode_supported_p
|
||||
#undef TARGET_OFFLOAD_OPTIONS
|
||||
#define TARGET_OFFLOAD_OPTIONS aarch64_offload_options
|
||||
|
||||
-#undef TARGET_OPTION_SAVE
|
||||
-#define TARGET_OPTION_SAVE aarch64_option_save
|
||||
-
|
||||
#undef TARGET_OPTION_RESTORE
|
||||
#define TARGET_OPTION_RESTORE aarch64_option_restore
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 14e2af054..7d73689e4 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -144,9 +144,6 @@
|
||||
|
||||
#define PCC_BITFIELD_TYPE_MATTERS 1
|
||||
|
||||
-/* Major revision number of the ARM Architecture implemented by the target. */
|
||||
-extern unsigned aarch64_architecture_version;
|
||||
-
|
||||
/* Instruction tuning/selection flags. */
|
||||
|
||||
/* Bit values used to identify processor capabilities. */
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index 101664c7c..836a3c784 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -22,13 +22,10 @@ HeaderInclude
|
||||
config/aarch64/aarch64-opts.h
|
||||
|
||||
TargetVariable
|
||||
-enum aarch64_processor explicit_tune_core = aarch64_none
|
||||
+enum aarch64_processor selected_tune = aarch64_none
|
||||
|
||||
TargetVariable
|
||||
-enum aarch64_arch explicit_arch = aarch64_no_arch
|
||||
-
|
||||
-TargetSave
|
||||
-const char *x_aarch64_override_tune_string
|
||||
+enum aarch64_arch selected_arch = aarch64_no_arch
|
||||
|
||||
TargetVariable
|
||||
uint64_t aarch64_isa_flags = 0
|
||||
@@ -36,6 +33,9 @@ uint64_t aarch64_isa_flags = 0
|
||||
TargetVariable
|
||||
unsigned aarch64_enable_bti = 2
|
||||
|
||||
+TargetVariable
|
||||
+enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A
|
||||
+
|
||||
; The TLS dialect names to use with -mtls-dialect.
|
||||
|
||||
Enum
|
||||
@@ -139,7 +139,7 @@ Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI
|
||||
Generate code that conforms to the specified ABI.
|
||||
|
||||
moverride=
|
||||
-Target RejectNegative ToLower Joined Var(aarch64_override_tune_string)
|
||||
+Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) Save
|
||||
-moverride=<string> Power users only! Override CPU optimization parameters.
|
||||
|
||||
Enum
|
||||
--
|
||||
2.33.0
|
||||
|
||||
108
0099-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
Normal file
108
0099-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
Normal file
@ -0,0 +1,108 @@
|
||||
From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001
|
||||
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
Date: Mon, 26 Sep 2022 10:10:25 +0100
|
||||
Subject: [PATCH 007/157] [Backport][SME] aarch64: Add -march support for
|
||||
Armv9.1-A, Armv9.2-A, Armv9.3-A
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3
|
||||
|
||||
This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject
|
||||
through -march. These are already supported in binutils.
|
||||
|
||||
Bootstrapped and tested on aarch64-none-linux-gnu.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64-arches.def (armv9.1-a): Define.
|
||||
(armv9.2-a): Likewise.
|
||||
(armv9.3-a): Likewise.
|
||||
* config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise.
|
||||
(AARCH64_FL_V9_2): Likewise.
|
||||
(AARCH64_FL_V9_3): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH9_1): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH9_2): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH9_3): Likewise.
|
||||
(AARCH64_ISA_V9_1): Likewise.
|
||||
(AARCH64_ISA_V9_2): Likewise.
|
||||
(AARCH64_ISA_V9_3): Likewise.
|
||||
* doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a,
|
||||
armv9.3-a values to -march.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-arches.def | 3 +++
|
||||
gcc/config/aarch64/aarch64.h | 18 ++++++++++++++++++
|
||||
gcc/doc/invoke.texi | 3 +++
|
||||
3 files changed, 24 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
|
||||
index 3c2b16588..6150448dc 100644
|
||||
--- a/gcc/config/aarch64/aarch64-arches.def
|
||||
+++ b/gcc/config/aarch64/aarch64-arches.def
|
||||
@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8
|
||||
AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8)
|
||||
AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R)
|
||||
AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9)
|
||||
+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1)
|
||||
+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2)
|
||||
+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3)
|
||||
|
||||
#undef AARCH64_ARCH
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 7d73689e4..42aae37ef 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -239,6 +239,15 @@
|
||||
/* Armv8.8-a architecture extensions. */
|
||||
#define AARCH64_FL_V8_8 (1ULL << 45)
|
||||
|
||||
+/* Armv9.1-A. */
|
||||
+#define AARCH64_FL_V9_1 (1ULL << 46)
|
||||
+
|
||||
+/* Armv9.2-A. */
|
||||
+#define AARCH64_FL_V9_2 (1ULL << 47)
|
||||
+
|
||||
+/* Armv9.3-A. */
|
||||
+#define AARCH64_FL_V9_3 (1ULL << 48)
|
||||
+
|
||||
/* Has FP and SIMD. */
|
||||
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
|
||||
|
||||
@@ -274,6 +283,12 @@
|
||||
#define AARCH64_FL_FOR_ARCH9 \
|
||||
(AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
|
||||
| AARCH64_FL_F16)
|
||||
+#define AARCH64_FL_FOR_ARCH9_1 \
|
||||
+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
|
||||
+#define AARCH64_FL_FOR_ARCH9_2 \
|
||||
+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
|
||||
+#define AARCH64_FL_FOR_ARCH9_3 \
|
||||
+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
|
||||
|
||||
/* Macros to test ISA flags. */
|
||||
|
||||
@@ -314,6 +329,9 @@
|
||||
#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R)
|
||||
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
|
||||
#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9)
|
||||
+#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1)
|
||||
+#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2)
|
||||
+#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3)
|
||||
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
|
||||
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 17d9e4126..53709b246 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -19176,6 +19176,9 @@ and the features that they enable by default:
|
||||
@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64}
|
||||
@item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops}
|
||||
@item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2}
|
||||
+@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm}
|
||||
+@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64}
|
||||
+@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops}
|
||||
@item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r}
|
||||
@end multitable
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
112
0100-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
Normal file
112
0100-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
Normal file
@ -0,0 +1,112 @@
|
||||
From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001
|
||||
From: xiezhiheng <xiezhiheng@huawei.com>
|
||||
Date: Mon, 19 Feb 2024 19:27:29 +0800
|
||||
Subject: [PATCH 008/157] [Backport][SME] Revert "aarch64: Define
|
||||
__ARM_FEATURE_RCPC"
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d
|
||||
|
||||
Revert this commit to solve conflicts with later patches,
|
||||
and will apply it later.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-c.cc | 1 -
|
||||
gcc/config/aarch64/aarch64-cores.def | 10 +++++-----
|
||||
gcc/config/aarch64/aarch64.h | 4 +---
|
||||
.../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 -------------------
|
||||
4 files changed, 6 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
||||
index 90d45e45d..3d2fb5ec2 100644
|
||||
--- a/gcc/config/aarch64/aarch64-c.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-c.cc
|
||||
@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
|
||||
aarch64_def_or_undef (TARGET_LS64,
|
||||
"__ARM_FEATURE_LS64", pfile);
|
||||
- aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
|
||||
|
||||
/* Not for ACLE, but required to keep "float.h" correct if we switch
|
||||
target between implementations that do or do not support ARMv8.2-A
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 70b11eb80..842d64932 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1)
|
||||
+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 42aae37ef..7c090c8f2 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -262,8 +262,7 @@
|
||||
#define AARCH64_FL_FOR_ARCH8_2 \
|
||||
(AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
|
||||
#define AARCH64_FL_FOR_ARCH8_3 \
|
||||
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \
|
||||
- | AARCH64_FL_RCPC)
|
||||
+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
|
||||
#define AARCH64_FL_FOR_ARCH8_4 \
|
||||
(AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
|
||||
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
|
||||
@@ -314,7 +313,6 @@
|
||||
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
|
||||
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
|
||||
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
|
||||
-#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC)
|
||||
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
|
||||
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
|
||||
#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5)
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
index 307fa3d67..bfb044f5d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
@@ -248,26 +248,6 @@
|
||||
#error "__ARM_FEATURE_CRC32 is not defined but should be!"
|
||||
#endif
|
||||
|
||||
-#pragma GCC target ("arch=armv8.2-a")
|
||||
-#ifdef __ARM_FEATURE_RCPC
|
||||
-#error "__ARM_FEATURE_RCPC is defined but should not be!"
|
||||
-#endif
|
||||
-
|
||||
-#pragma GCC target ("arch=armv8.2-a+rcpc")
|
||||
-#ifndef __ARM_FEATURE_RCPC
|
||||
-#error "__ARM_FEATURE_RCPC is not defined but should be!"
|
||||
-#endif
|
||||
-
|
||||
-#pragma GCC target ("+norcpc")
|
||||
-#ifdef __ARM_FEATURE_RCPC
|
||||
-#error "__ARM_FEATURE_RCPC is defined but should not be!"
|
||||
-#endif
|
||||
-
|
||||
-#pragma GCC target ("arch=armv8.3-a")
|
||||
-#ifndef __ARM_FEATURE_RCPC
|
||||
-#error "__ARM_FEATURE_RCPC is not defined but should be!"
|
||||
-#endif
|
||||
-
|
||||
int
|
||||
foo (int a)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,39 @@
|
||||
From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001
|
||||
From: xiezhiheng <xiezhiheng@huawei.com>
|
||||
Date: Tue, 20 Feb 2024 10:13:06 +0800
|
||||
Subject: [PATCH 009/157] [Backport][SME] Revert "Ampere-1 and Ampere-1A core
|
||||
definition in aarch64-cores.def"
|
||||
|
||||
Revert it to solve conflicts with later patches, and will apply it
|
||||
later. It's introduced by commit 3668a59ae22a and e9f0d974600e.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-cores.def | 6 ++----
|
||||
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 842d64932..0402bfb74 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH
|
||||
AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
/* Ampere Computing ('\xC0') cores. */
|
||||
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1)
|
||||
-AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1)
|
||||
+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
|
||||
/* Do not swap around "emag" and "xgene1",
|
||||
this order is required to handle variant correctly. */
|
||||
AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor
|
||||
/* Armv9.0-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG
|
||||
- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
|
||||
AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
157
0102-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
Normal file
157
0102-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
Normal file
@ -0,0 +1,157 @@
|
||||
From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:50 +0100
|
||||
Subject: [PATCH 010/157] [Backport][SME] aarch64: Rename AARCH64_ISA
|
||||
architecture-level macros
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9
|
||||
|
||||
All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R
|
||||
are for the A profile: they cause __ARM_ARCH_PROFILE to be set to
|
||||
'A' and they are associated with architecture names like armv8.4-a.
|
||||
|
||||
It's convenient for later patches if we make this explicit
|
||||
by adding an "A" to the name. Also, rather than add an underscore
|
||||
(as for V8_R) it's more convenient to add the profile directly
|
||||
to the number, like we already do in the ARCH_IDENT field of the
|
||||
aarch64-arches.def entries.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3)
|
||||
(AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6)
|
||||
(AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2)
|
||||
(AARCH64_ISA_V9_3): Add "A" to the end of the name.
|
||||
(AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R.
|
||||
(TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update
|
||||
accordingly.
|
||||
* common/config/aarch64/aarch64-common.cc
|
||||
(aarch64_get_extension_string_for_isa_flags): Likewise.
|
||||
* config/aarch64/aarch64-c.cc
|
||||
(aarch64_define_unconditional_macros): Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-c.cc | 4 +--
|
||||
gcc/config/aarch64/aarch64.h | 28 ++++++++++-----------
|
||||
3 files changed, 17 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 85ce8133b..3dc020f0c 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
|
||||
|
||||
Note that assemblers with Armv8-R AArch64 support should not have this
|
||||
issue, so we don't need this fix when targeting Armv8-R. */
|
||||
- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R)
|
||||
+ if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
|
||||
isa_flag_bits |= AARCH64_ISA_CRC;
|
||||
|
||||
/* Pass Two:
|
||||
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
||||
index 3d2fb5ec2..18c9b975b 100644
|
||||
--- a/gcc/config/aarch64/aarch64-c.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-c.cc
|
||||
@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
|
||||
builtin_define ("__ARM_ARCH_8A");
|
||||
|
||||
builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
|
||||
- AARCH64_ISA_V8_R ? 'R' : 'A');
|
||||
+ AARCH64_ISA_V8R ? 'R' : 'A');
|
||||
builtin_define ("__ARM_FEATURE_CLZ");
|
||||
builtin_define ("__ARM_FEATURE_IDIV");
|
||||
builtin_define ("__ARM_FEATURE_UNALIGNED");
|
||||
@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
{
|
||||
aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
|
||||
|
||||
- builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
|
||||
+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8);
|
||||
|
||||
builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
|
||||
flag_short_enums ? 1 : 4);
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 7c090c8f2..356a263b2 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -297,7 +297,7 @@
|
||||
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
|
||||
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
|
||||
#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
|
||||
-#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
|
||||
+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2)
|
||||
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
|
||||
#define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
|
||||
#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
|
||||
@@ -305,31 +305,31 @@
|
||||
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
|
||||
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
|
||||
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
|
||||
-#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
|
||||
+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3)
|
||||
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
|
||||
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
|
||||
#define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2)
|
||||
-#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4)
|
||||
+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4)
|
||||
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
|
||||
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
|
||||
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
|
||||
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
|
||||
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
|
||||
-#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5)
|
||||
+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5)
|
||||
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
|
||||
#define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG)
|
||||
-#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6)
|
||||
+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6)
|
||||
#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
|
||||
#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
|
||||
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
|
||||
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
|
||||
#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
|
||||
-#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R)
|
||||
+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R)
|
||||
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
|
||||
-#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9)
|
||||
-#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1)
|
||||
-#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2)
|
||||
-#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3)
|
||||
+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9)
|
||||
+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1)
|
||||
+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2)
|
||||
+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3)
|
||||
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
|
||||
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
|
||||
|
||||
@@ -383,16 +383,16 @@
|
||||
#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
|
||||
|
||||
/* ARMv8.3-A features. */
|
||||
-#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
|
||||
+#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
|
||||
|
||||
/* Javascript conversion instruction from Armv8.3-a. */
|
||||
-#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3)
|
||||
+#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3A)
|
||||
|
||||
/* Armv8.3-a Complex number extension to AdvSIMD extensions. */
|
||||
#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
|
||||
|
||||
/* Floating-point rounding instructions from Armv8.5-a. */
|
||||
-#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
|
||||
+#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT)
|
||||
|
||||
/* TME instructions are enabled. */
|
||||
#define TARGET_TME (AARCH64_ISA_TME)
|
||||
@@ -401,7 +401,7 @@
|
||||
#define TARGET_RNG (AARCH64_ISA_RNG)
|
||||
|
||||
/* Memory Tagging instructions optional to Armv8.5 enabled through +memtag. */
|
||||
-#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG)
|
||||
+#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
|
||||
|
||||
/* I8MM instructions are enabled through +i8mm. */
|
||||
#define TARGET_I8MM (AARCH64_ISA_I8MM)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
220
0103-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
Normal file
220
0103-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
Normal file
@ -0,0 +1,220 @@
|
||||
From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:51 +0100
|
||||
Subject: [PATCH 011/157] [Backport][SME] aarch64: Rename AARCH64_FL
|
||||
architecture-level macros
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301
|
||||
|
||||
Following on from the previous AARCH64_ISA patch, this one adds the
|
||||
profile name directly to the end of architecture-level AARCH64_FL_*
|
||||
macros.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2)
|
||||
(AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6)
|
||||
(AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1)
|
||||
(AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name.
|
||||
(AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R.
|
||||
(AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly.
|
||||
(AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise.
|
||||
(AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A)
|
||||
(AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise.
|
||||
(AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise.
|
||||
(AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------
|
||||
1 file changed, 36 insertions(+), 36 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 356a263b2..5a91dfdd2 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -154,22 +154,22 @@
|
||||
/* ARMv8.1-A architecture extensions. */
|
||||
#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
|
||||
#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */
|
||||
-#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
|
||||
+#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */
|
||||
/* Armv8-R. */
|
||||
-#define AARCH64_FL_V8_R (1 << 7) /* Armv8-R AArch64. */
|
||||
+#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */
|
||||
/* ARMv8.2-A architecture extensions. */
|
||||
-#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
|
||||
+#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */
|
||||
#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
|
||||
#define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */
|
||||
/* ARMv8.3-A architecture extensions. */
|
||||
-#define AARCH64_FL_V8_3 (1 << 11) /* Has ARMv8.3-A features. */
|
||||
+#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */
|
||||
#define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */
|
||||
#define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */
|
||||
/* New flags to split crypto into aes and sha2. */
|
||||
#define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */
|
||||
#define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */
|
||||
/* ARMv8.4-A architecture extensions. */
|
||||
-#define AARCH64_FL_V8_4 (1 << 16) /* Has ARMv8.4-A features. */
|
||||
+#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */
|
||||
#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */
|
||||
#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */
|
||||
#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */
|
||||
@@ -179,7 +179,7 @@
|
||||
#define AARCH64_FL_PROFILE (1 << 21)
|
||||
|
||||
/* ARMv8.5-A architecture extensions. */
|
||||
-#define AARCH64_FL_V8_5 (1 << 22) /* Has ARMv8.5-A features. */
|
||||
+#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */
|
||||
#define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */
|
||||
#define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging
|
||||
Extensions. */
|
||||
@@ -204,7 +204,7 @@
|
||||
#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */
|
||||
|
||||
/* Armv8.6-A architecture extensions. */
|
||||
-#define AARCH64_FL_V8_6 (1ULL << 34)
|
||||
+#define AARCH64_FL_V8_6A (1ULL << 34)
|
||||
|
||||
/* 8-bit Integer Matrix Multiply (I8MM) extensions. */
|
||||
#define AARCH64_FL_I8MM (1ULL << 35)
|
||||
@@ -225,28 +225,28 @@
|
||||
#define AARCH64_FL_PAUTH (1ULL << 40)
|
||||
|
||||
/* Armv9.0-A. */
|
||||
-#define AARCH64_FL_V9 (1ULL << 41) /* Armv9.0-A Architecture. */
|
||||
+#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */
|
||||
|
||||
/* 64-byte atomic load/store extensions. */
|
||||
#define AARCH64_FL_LS64 (1ULL << 42)
|
||||
|
||||
/* Armv8.7-a architecture extensions. */
|
||||
-#define AARCH64_FL_V8_7 (1ULL << 43)
|
||||
+#define AARCH64_FL_V8_7A (1ULL << 43)
|
||||
|
||||
/* Hardware memory operation instructions. */
|
||||
#define AARCH64_FL_MOPS (1ULL << 44)
|
||||
|
||||
/* Armv8.8-a architecture extensions. */
|
||||
-#define AARCH64_FL_V8_8 (1ULL << 45)
|
||||
+#define AARCH64_FL_V8_8A (1ULL << 45)
|
||||
|
||||
/* Armv9.1-A. */
|
||||
-#define AARCH64_FL_V9_1 (1ULL << 46)
|
||||
+#define AARCH64_FL_V9_1A (1ULL << 46)
|
||||
|
||||
/* Armv9.2-A. */
|
||||
-#define AARCH64_FL_V9_2 (1ULL << 47)
|
||||
+#define AARCH64_FL_V9_2A (1ULL << 47)
|
||||
|
||||
/* Armv9.3-A. */
|
||||
-#define AARCH64_FL_V9_3 (1ULL << 48)
|
||||
+#define AARCH64_FL_V9_3A (1ULL << 48)
|
||||
|
||||
/* Has FP and SIMD. */
|
||||
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
|
||||
@@ -258,36 +258,36 @@
|
||||
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
|
||||
#define AARCH64_FL_FOR_ARCH8_1 \
|
||||
(AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
|
||||
- | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
|
||||
+ | AARCH64_FL_RDMA | AARCH64_FL_V8_1A)
|
||||
#define AARCH64_FL_FOR_ARCH8_2 \
|
||||
- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
|
||||
+ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A)
|
||||
#define AARCH64_FL_FOR_ARCH8_3 \
|
||||
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
|
||||
+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
|
||||
#define AARCH64_FL_FOR_ARCH8_4 \
|
||||
- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
|
||||
+ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
|
||||
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
|
||||
#define AARCH64_FL_FOR_ARCH8_5 \
|
||||
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \
|
||||
+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \
|
||||
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
|
||||
#define AARCH64_FL_FOR_ARCH8_6 \
|
||||
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \
|
||||
+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
|
||||
| AARCH64_FL_I8MM | AARCH64_FL_BF16)
|
||||
#define AARCH64_FL_FOR_ARCH8_7 \
|
||||
- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64)
|
||||
+ (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
|
||||
#define AARCH64_FL_FOR_ARCH8_8 \
|
||||
- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS)
|
||||
+ (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
|
||||
|
||||
#define AARCH64_FL_FOR_ARCH8_R \
|
||||
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R)
|
||||
+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R)
|
||||
#define AARCH64_FL_FOR_ARCH9 \
|
||||
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
|
||||
+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
|
||||
| AARCH64_FL_F16)
|
||||
#define AARCH64_FL_FOR_ARCH9_1 \
|
||||
- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
|
||||
+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A)
|
||||
#define AARCH64_FL_FOR_ARCH9_2 \
|
||||
- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
|
||||
+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A)
|
||||
#define AARCH64_FL_FOR_ARCH9_3 \
|
||||
- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
|
||||
+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A)
|
||||
|
||||
/* Macros to test ISA flags. */
|
||||
|
||||
@@ -297,7 +297,7 @@
|
||||
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
|
||||
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
|
||||
#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
|
||||
-#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2)
|
||||
+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2A)
|
||||
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
|
||||
#define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
|
||||
#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
|
||||
@@ -305,31 +305,31 @@
|
||||
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
|
||||
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
|
||||
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
|
||||
-#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3)
|
||||
+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A)
|
||||
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
|
||||
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
|
||||
#define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2)
|
||||
-#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4)
|
||||
+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4A)
|
||||
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
|
||||
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
|
||||
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
|
||||
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
|
||||
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
|
||||
-#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5)
|
||||
+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
|
||||
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
|
||||
#define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG)
|
||||
-#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6)
|
||||
+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6A)
|
||||
#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
|
||||
#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
|
||||
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
|
||||
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
|
||||
#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
|
||||
-#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R)
|
||||
+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R)
|
||||
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
|
||||
-#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9)
|
||||
-#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1)
|
||||
-#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2)
|
||||
-#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3)
|
||||
+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A)
|
||||
+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1A)
|
||||
+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2A)
|
||||
+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A)
|
||||
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
|
||||
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
398
0104-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
Normal file
398
0104-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
Normal file
@ -0,0 +1,398 @@
|
||||
From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:51 +0100
|
||||
Subject: [PATCH 012/157] [Backport][SME] aarch64: Rename AARCH64_FL_FOR_ARCH
|
||||
macros
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2
|
||||
|
||||
This patch renames AARCH64_FL_FOR_ARCH* macros to follow the
|
||||
same V<number><profile> names that we (now) use elsewhere.
|
||||
|
||||
The names are only temporary -- a later patch will move the
|
||||
information to the .def file instead. However, it helps with
|
||||
the sequencing to do this first.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to...
|
||||
(AARCH64_FL_FOR_V8A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_1): Rename to...
|
||||
(AARCH64_FL_FOR_V8_1A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_2): Rename to...
|
||||
(AARCH64_FL_FOR_V8_2A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_3): Rename to...
|
||||
(AARCH64_FL_FOR_V8_3A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_4): Rename to...
|
||||
(AARCH64_FL_FOR_V8_4A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_5): Rename to...
|
||||
(AARCH64_FL_FOR_V8_5A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_6): Rename to...
|
||||
(AARCH64_FL_FOR_V8_6A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_7): Rename to...
|
||||
(AARCH64_FL_FOR_V8_7A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_8): Rename to...
|
||||
(AARCH64_FL_FOR_V8_8A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH8_R): Rename to...
|
||||
(AARCH64_FL_FOR_V8R): ...this.
|
||||
(AARCH64_FL_FOR_ARCH9): Rename to...
|
||||
(AARCH64_FL_FOR_V9A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH9_1): Rename to...
|
||||
(AARCH64_FL_FOR_V9_1A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH9_2): Rename to...
|
||||
(AARCH64_FL_FOR_V9_2A): ...this.
|
||||
(AARCH64_FL_FOR_ARCH9_3): Rename to...
|
||||
(AARCH64_FL_FOR_V9_3A): ...this.
|
||||
* common/config/aarch64/aarch64-common.cc (all_cores): Update
|
||||
accordingly.
|
||||
* config/aarch64/aarch64-arches.def: Likewise.
|
||||
* config/aarch64/aarch64-cores.def: Likewise.
|
||||
* config/aarch64/aarch64.cc (all_cores): Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-arches.def | 28 ++---
|
||||
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
|
||||
gcc/config/aarch64/aarch64.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64.h | 56 ++++-----
|
||||
5 files changed, 109 insertions(+), 109 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 3dc020f0c..0461201a5 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] =
|
||||
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
{NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
|
||||
#include "config/aarch64/aarch64-cores.def"
|
||||
- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
|
||||
+ {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
|
||||
{"", aarch64_no_arch, 0}
|
||||
};
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
|
||||
index 6150448dc..c6bf7d82c 100644
|
||||
--- a/gcc/config/aarch64/aarch64-arches.def
|
||||
+++ b/gcc/config/aarch64/aarch64-arches.def
|
||||
@@ -30,19 +30,19 @@
|
||||
Due to the assumptions about the positions of these fields in config.gcc,
|
||||
the NAME should be kept as the first argument and FLAGS as the last. */
|
||||
|
||||
-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8)
|
||||
-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1)
|
||||
-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2)
|
||||
-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3)
|
||||
-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4)
|
||||
-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5)
|
||||
-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6)
|
||||
-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8_7)
|
||||
-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8)
|
||||
-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R)
|
||||
-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9)
|
||||
-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1)
|
||||
-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2)
|
||||
-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3)
|
||||
+AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A)
|
||||
+AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A)
|
||||
+AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A)
|
||||
+AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A)
|
||||
+AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A)
|
||||
+AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A)
|
||||
+AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A)
|
||||
+AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A)
|
||||
+AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A)
|
||||
+AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R)
|
||||
+AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A)
|
||||
+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A)
|
||||
+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A)
|
||||
+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A)
|
||||
|
||||
#undef AARCH64_ARCH
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 0402bfb74..c4038c641 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -46,132 +46,132 @@
|
||||
/* ARMv8-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
/* Do not swap around "thunderxt88p1" and "thunderxt88",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
|
||||
/* OcteonTX is the official name for T81/T83. */
|
||||
-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
/* Ampere Computing ('\xC0') cores. */
|
||||
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
|
||||
+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
|
||||
/* Do not swap around "emag" and "xgene1",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
+AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
|
||||
/* APM ('P') cores. */
|
||||
-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
|
||||
+AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
|
||||
/* Samsung ('S') cores. */
|
||||
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
|
||||
/* HXT ('h') cores. */
|
||||
-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
+AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
|
||||
/* ARMv8.1-A Architecture Processors. */
|
||||
|
||||
/* Broadcom ('B') cores. */
|
||||
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
|
||||
/* ARMv8.2-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
+AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
|
||||
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
|
||||
/* Fujitsu ('F') cores. */
|
||||
-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
|
||||
/* HiSilicon ('H') cores. */
|
||||
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
|
||||
/* ARM DynamIQ big.LITTLE configurations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
|
||||
/* Armv8-R Architecture Processors. */
|
||||
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1)
|
||||
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
|
||||
|
||||
/* Armv9.0-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
|
||||
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
|
||||
-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
|
||||
#undef AARCH64_CORE
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 254ecfaa2..3714c1047 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -2949,7 +2949,7 @@ static const struct processor all_cores[] =
|
||||
FLAGS, &COSTS##_tunings},
|
||||
#include "aarch64-cores.def"
|
||||
{"generic", generic, cortexa53, AARCH64_ARCH_8A,
|
||||
- AARCH64_FL_FOR_ARCH8, &generic_tunings},
|
||||
+ AARCH64_FL_FOR_V8A, &generic_tunings},
|
||||
{NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
|
||||
};
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 5a91dfdd2..918a14193 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -255,39 +255,39 @@
|
||||
#define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD)
|
||||
|
||||
/* Architecture flags that effect instruction selection. */
|
||||
-#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
|
||||
-#define AARCH64_FL_FOR_ARCH8_1 \
|
||||
- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
|
||||
+#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD)
|
||||
+#define AARCH64_FL_FOR_V8_1A \
|
||||
+ (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \
|
||||
| AARCH64_FL_RDMA | AARCH64_FL_V8_1A)
|
||||
-#define AARCH64_FL_FOR_ARCH8_2 \
|
||||
- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A)
|
||||
-#define AARCH64_FL_FOR_ARCH8_3 \
|
||||
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
|
||||
-#define AARCH64_FL_FOR_ARCH8_4 \
|
||||
- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
|
||||
+#define AARCH64_FL_FOR_V8_2A \
|
||||
+ (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A)
|
||||
+#define AARCH64_FL_FOR_V8_3A \
|
||||
+ (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
|
||||
+#define AARCH64_FL_FOR_V8_4A \
|
||||
+ (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
|
||||
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
|
||||
-#define AARCH64_FL_FOR_ARCH8_5 \
|
||||
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \
|
||||
+#define AARCH64_FL_FOR_V8_5A \
|
||||
+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \
|
||||
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
|
||||
-#define AARCH64_FL_FOR_ARCH8_6 \
|
||||
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
|
||||
+#define AARCH64_FL_FOR_V8_6A \
|
||||
+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
|
||||
| AARCH64_FL_I8MM | AARCH64_FL_BF16)
|
||||
-#define AARCH64_FL_FOR_ARCH8_7 \
|
||||
- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
|
||||
-#define AARCH64_FL_FOR_ARCH8_8 \
|
||||
- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
|
||||
-
|
||||
-#define AARCH64_FL_FOR_ARCH8_R \
|
||||
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R)
|
||||
-#define AARCH64_FL_FOR_ARCH9 \
|
||||
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
|
||||
+#define AARCH64_FL_FOR_V8_7A \
|
||||
+ (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
|
||||
+#define AARCH64_FL_FOR_V8_8A \
|
||||
+ (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
|
||||
+
|
||||
+#define AARCH64_FL_FOR_V8R \
|
||||
+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R)
|
||||
+#define AARCH64_FL_FOR_V9A \
|
||||
+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
|
||||
| AARCH64_FL_F16)
|
||||
-#define AARCH64_FL_FOR_ARCH9_1 \
|
||||
- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A)
|
||||
-#define AARCH64_FL_FOR_ARCH9_2 \
|
||||
- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A)
|
||||
-#define AARCH64_FL_FOR_ARCH9_3 \
|
||||
- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A)
|
||||
+#define AARCH64_FL_FOR_V9_1A \
|
||||
+ (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A)
|
||||
+#define AARCH64_FL_FOR_V9_2A \
|
||||
+ (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A)
|
||||
+#define AARCH64_FL_FOR_V9_3A \
|
||||
+ (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A)
|
||||
|
||||
/* Macros to test ISA flags. */
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
315
0105-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
Normal file
315
0105-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
Normal file
@ -0,0 +1,315 @@
|
||||
From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:52 +0100
|
||||
Subject: [PATCH 013/157] [Backport][SME] aarch64: Add "V" to
|
||||
aarch64-arches.def names
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1
|
||||
|
||||
This patch completes the renaming of architecture-level related
|
||||
things by adding "V" to the name of the architecture in
|
||||
aarch64-arches.def. Since the "V" is predictable, we can easily
|
||||
drop it when we don't need it (as when matching /proc/cpuinfo).
|
||||
|
||||
Having a valid C identifier is necessary for later patches.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-arches.def: Add a leading "V" to the
|
||||
ARCH_IDENT fields.
|
||||
* config/aarch64/aarch64-cores.def: Update accordingly.
|
||||
* common/config/aarch64/aarch64-common.cc (all_cores): Likewise.
|
||||
* config/aarch64/aarch64.cc (all_cores): Likewise.
|
||||
* config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the
|
||||
leading "V".
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-arches.def | 28 ++---
|
||||
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
|
||||
gcc/config/aarch64/aarch64.cc | 2 +-
|
||||
gcc/config/aarch64/driver-aarch64.cc | 3 +-
|
||||
5 files changed, 83 insertions(+), 82 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 0461201a5..6ca89d31f 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] =
|
||||
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
{NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
|
||||
#include "config/aarch64/aarch64-cores.def"
|
||||
- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
|
||||
+ {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
|
||||
{"", aarch64_no_arch, 0}
|
||||
};
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
|
||||
index c6bf7d82c..e42202822 100644
|
||||
--- a/gcc/config/aarch64/aarch64-arches.def
|
||||
+++ b/gcc/config/aarch64/aarch64-arches.def
|
||||
@@ -30,19 +30,19 @@
|
||||
Due to the assumptions about the positions of these fields in config.gcc,
|
||||
the NAME should be kept as the first argument and FLAGS as the last. */
|
||||
|
||||
-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A)
|
||||
-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A)
|
||||
-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A)
|
||||
-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A)
|
||||
-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A)
|
||||
-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A)
|
||||
-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A)
|
||||
-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A)
|
||||
-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A)
|
||||
-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R)
|
||||
-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A)
|
||||
-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A)
|
||||
-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A)
|
||||
-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A)
|
||||
+AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A)
|
||||
+AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A)
|
||||
+AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A)
|
||||
+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A)
|
||||
+AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A)
|
||||
+AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A)
|
||||
+AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A)
|
||||
+AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A)
|
||||
+AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A)
|
||||
+AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R)
|
||||
+AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A)
|
||||
+AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A)
|
||||
+AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A)
|
||||
+AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A)
|
||||
|
||||
#undef AARCH64_ARCH
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index c4038c641..f4c2f4ea4 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -46,132 +46,132 @@
|
||||
/* ARMv8-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
/* Do not swap around "thunderxt88p1" and "thunderxt88",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
|
||||
/* OcteonTX is the official name for T81/T83. */
|
||||
-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
/* Ampere Computing ('\xC0') cores. */
|
||||
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
|
||||
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
|
||||
/* Do not swap around "emag" and "xgene1",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
|
||||
/* APM ('P') cores. */
|
||||
-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
|
||||
+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
|
||||
/* Samsung ('S') cores. */
|
||||
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
|
||||
/* HXT ('h') cores. */
|
||||
-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
|
||||
/* ARMv8.1-A Architecture Processors. */
|
||||
|
||||
/* Broadcom ('B') cores. */
|
||||
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
|
||||
/* ARMv8.2-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
|
||||
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
|
||||
/* Fujitsu ('F') cores. */
|
||||
-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
|
||||
/* HiSilicon ('H') cores. */
|
||||
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
|
||||
/* ARM DynamIQ big.LITTLE configurations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
|
||||
/* Armv8-R Architecture Processors. */
|
||||
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
|
||||
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
|
||||
|
||||
/* Armv9.0-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
|
||||
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
|
||||
-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
|
||||
#undef AARCH64_CORE
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 3714c1047..22b51e12f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -2948,7 +2948,7 @@ static const struct processor all_cores[] =
|
||||
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
|
||||
FLAGS, &COSTS##_tunings},
|
||||
#include "aarch64-cores.def"
|
||||
- {"generic", generic, cortexa53, AARCH64_ARCH_8A,
|
||||
+ {"generic", generic, cortexa53, AARCH64_ARCH_V8A,
|
||||
AARCH64_FL_FOR_V8A, &generic_tunings},
|
||||
{NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
|
||||
};
|
||||
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
|
||||
index d714a8bda..644780ef2 100644
|
||||
--- a/gcc/config/aarch64/driver-aarch64.cc
|
||||
+++ b/gcc/config/aarch64/driver-aarch64.cc
|
||||
@@ -78,8 +78,9 @@ struct aarch64_arch_driver_info
|
||||
const uint64_t flags;
|
||||
};
|
||||
|
||||
+/* Skip the leading "V" in the architecture name. */
|
||||
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
|
||||
- { #ARCH_IDENT, NAME, FLAGS },
|
||||
+ { #ARCH_IDENT + 1, NAME, FLAGS },
|
||||
|
||||
static struct aarch64_arch_driver_info aarch64_arches[] =
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
55
0106-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
Normal file
55
0106-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
Normal file
@ -0,0 +1,55 @@
|
||||
From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:52 +0100
|
||||
Subject: [PATCH 014/157] [Backport][SME] aarch64: Small config.gcc cleanups
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e
|
||||
|
||||
The aarch64-option-extensions.def parsing in config.gcc had
|
||||
some code left over from when it tried to parse the whole
|
||||
macro definition. Also, config.gcc now only looks at the
|
||||
first fields of the aarch64-arches.def entries.
|
||||
|
||||
gcc/
|
||||
* config.gcc: Remove dead aarch64-option-extensions.def code.
|
||||
* config/aarch64/aarch64-arches.def: Update comment.
|
||||
---
|
||||
gcc/config.gcc | 8 --------
|
||||
gcc/config/aarch64/aarch64-arches.def | 2 +-
|
||||
2 files changed, 1 insertion(+), 9 deletions(-)
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 3be450471..da66603cd 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -4210,14 +4210,6 @@ case "${target}" in
|
||||
options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \
|
||||
${srcdir}/config/aarch64/aarch64-option-extensions.def`"
|
||||
|
||||
- # Match one element inside AARCH64_OPT_EXTENSION, we
|
||||
- # consume anything that's not a ,.
|
||||
- elem="[ ]*\([^,]\+\)[ ]*"
|
||||
-
|
||||
- # Repeat the pattern for the number of entries in the
|
||||
- # AARCH64_OPT_EXTENSION, currently 6 times.
|
||||
- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem"
|
||||
-
|
||||
while [ x"$ext_val" != x ]
|
||||
do
|
||||
ext_val=`echo $ext_val | sed -e 's/\+//'`
|
||||
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
|
||||
index e42202822..ece96e22a 100644
|
||||
--- a/gcc/config/aarch64/aarch64-arches.def
|
||||
+++ b/gcc/config/aarch64/aarch64-arches.def
|
||||
@@ -28,7 +28,7 @@
|
||||
ARCH_REV is an integer specifying the architecture major revision.
|
||||
FLAGS are the flags implied by the architecture.
|
||||
Due to the assumptions about the positions of these fields in config.gcc,
|
||||
- the NAME should be kept as the first argument and FLAGS as the last. */
|
||||
+ NAME should be kept as the first argument. */
|
||||
|
||||
AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A)
|
||||
AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
273
0107-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
Normal file
273
0107-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
Normal file
@ -0,0 +1,273 @@
|
||||
From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:53 +0100
|
||||
Subject: [PATCH 015/157] [Backport][SME] aarch64: Avoid redundancy in
|
||||
aarch64-cores.def
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c
|
||||
|
||||
The flags fields of the aarch64-cores.def always start with
|
||||
AARCH64_FL_FOR_<ARCH>. After previous changes, <ARCH> is always
|
||||
identical to the previous field, so we can drop the explicit
|
||||
AARCH64_FL_FOR_<ARCH> and derive it programmatically.
|
||||
|
||||
This isn't a big saving in itself, but it helps with later patches.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_<ARCH>
|
||||
from the flags field.
|
||||
* common/config/aarch64/aarch64-common.cc (all_cores): Add it
|
||||
here instead.
|
||||
* config/aarch64/aarch64.cc (all_cores): Likewise.
|
||||
* config/aarch64/driver-aarch64.cc (all_cores): Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
|
||||
gcc/config/aarch64/aarch64.cc | 2 +-
|
||||
gcc/config/aarch64/driver-aarch64.cc | 2 +-
|
||||
4 files changed, 68 insertions(+), 68 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 6ca89d31f..a965ac660 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -251,7 +251,7 @@ struct arch_to_arch_name
|
||||
static const struct processor_name_to_arch all_cores[] =
|
||||
{
|
||||
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
- {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
|
||||
+ {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS},
|
||||
#include "config/aarch64/aarch64-cores.def"
|
||||
{"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
|
||||
{"", aarch64_no_arch, 0}
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index f4c2f4ea4..008b0b8c1 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -46,132 +46,132 @@
|
||||
/* ARMv8-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
|
||||
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
|
||||
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
|
||||
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
|
||||
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
|
||||
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
/* Do not swap around "thunderxt88p1" and "thunderxt88",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
|
||||
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
|
||||
|
||||
/* OcteonTX is the official name for T81/T83. */
|
||||
-AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
|
||||
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
|
||||
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
/* Ampere Computing ('\xC0') cores. */
|
||||
-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
|
||||
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1)
|
||||
/* Do not swap around "emag" and "xgene1",
|
||||
this order is required to handle variant correctly. */
|
||||
-AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
|
||||
|
||||
/* APM ('P') cores. */
|
||||
-AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
|
||||
+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
|
||||
|
||||
/* Samsung ('S') cores. */
|
||||
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
|
||||
|
||||
/* HXT ('h') cores. */
|
||||
-AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
|
||||
|
||||
/* ARMv8.1-A Architecture Processors. */
|
||||
|
||||
/* Broadcom ('B') cores. */
|
||||
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
|
||||
|
||||
/* ARMv8.2-A Architecture Processors. */
|
||||
|
||||
/* ARM ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
-AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
|
||||
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
|
||||
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
|
||||
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
|
||||
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
|
||||
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
|
||||
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
|
||||
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
|
||||
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
|
||||
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
|
||||
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
|
||||
+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
|
||||
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
|
||||
|
||||
/* Cavium ('C') cores. */
|
||||
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
|
||||
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
|
||||
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
|
||||
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
|
||||
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
|
||||
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
|
||||
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
|
||||
|
||||
/* Fujitsu ('F') cores. */
|
||||
-AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
|
||||
/* HiSilicon ('H') cores. */
|
||||
-AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
|
||||
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
|
||||
|
||||
/* ARM DynamIQ big.LITTLE configurations. */
|
||||
|
||||
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
|
||||
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
|
||||
|
||||
/* Armv8-R Architecture Processors. */
|
||||
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
|
||||
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1)
|
||||
|
||||
/* Armv9.0-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
|
||||
|
||||
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
|
||||
|
||||
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
|
||||
|
||||
-AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
|
||||
|
||||
#undef AARCH64_CORE
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 22b51e12f..f975aad07 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -2946,7 +2946,7 @@ static const struct processor all_cores[] =
|
||||
{
|
||||
#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
|
||||
- FLAGS, &COSTS##_tunings},
|
||||
+ AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings},
|
||||
#include "aarch64-cores.def"
|
||||
{"generic", generic, cortexa53, AARCH64_ARCH_V8A,
|
||||
AARCH64_FL_FOR_V8A, &generic_tunings},
|
||||
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
|
||||
index 644780ef2..97690de62 100644
|
||||
--- a/gcc/config/aarch64/driver-aarch64.cc
|
||||
+++ b/gcc/config/aarch64/driver-aarch64.cc
|
||||
@@ -62,7 +62,7 @@ struct aarch64_core_data
|
||||
#define DEFAULT_ARCH "8A"
|
||||
|
||||
#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
- { CORE_NAME, #ARCH, IMP, PART, VARIANT, FLAGS },
|
||||
+ { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS },
|
||||
|
||||
static struct aarch64_core_data aarch64_cpu_data[] =
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,83 @@
|
||||
From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:53 +0100
|
||||
Subject: [PATCH 016/157] [Backport][SME] aarch64: Remove AARCH64_FL_RCPC8_4
|
||||
[PR107025]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957
|
||||
|
||||
AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated
|
||||
entry in aarch64-option-extensions.def. This means that, although
|
||||
it is internally separated from AARCH64_FL_V8_4A, there is no
|
||||
mechanism for turning it on and off individually, independently
|
||||
of armv8.4-a.
|
||||
|
||||
The only place that the flag was used independently was in the
|
||||
entry for thunderx3t110, which enabled it alongside V8_3A.
|
||||
As noted in PR107025, this means that any use of the extension
|
||||
will fail to assemble.
|
||||
|
||||
In the PR trail, Andrew suggested removing the core entry.
|
||||
That might be best long-term, but since the barrier for removing
|
||||
command-line options without a deprecation period is very high,
|
||||
this patch instead just drops the flag from the core entry.
|
||||
We'll still produce correct code.
|
||||
|
||||
gcc/
|
||||
PR target/107025
|
||||
* config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete.
|
||||
(AARCH64_FL_FOR_V8_4A): Update accordingly.
|
||||
(AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly.
|
||||
* config/aarch64/aarch64-cores.def (thunderx3t110): Remove
|
||||
AARCH64_FL_RCPC8_4.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-cores.def | 2 +-
|
||||
gcc/config/aarch64/aarch64.h | 5 ++---
|
||||
2 files changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 008b0b8c1..cf500d0a9 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 918a14193..f4e0cd148 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -173,7 +173,6 @@
|
||||
#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */
|
||||
#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */
|
||||
#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */
|
||||
-#define AARCH64_FL_RCPC8_4 (1 << 20) /* Has ARMv8.4-a RCPC extensions. */
|
||||
|
||||
/* Statistical Profiling extensions. */
|
||||
#define AARCH64_FL_PROFILE (1 << 21)
|
||||
@@ -265,7 +264,7 @@
|
||||
(AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
|
||||
#define AARCH64_FL_FOR_V8_4A \
|
||||
(AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
|
||||
- | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
|
||||
+ | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM)
|
||||
#define AARCH64_FL_FOR_V8_5A \
|
||||
(AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \
|
||||
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
|
||||
@@ -313,7 +312,7 @@
|
||||
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
|
||||
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
|
||||
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
|
||||
-#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
|
||||
+#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A)
|
||||
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
|
||||
#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
|
||||
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
154
0109-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
Normal file
154
0109-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
Normal file
@ -0,0 +1,154 @@
|
||||
From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:53 +0100
|
||||
Subject: [PATCH 017/157] [Backport][SME] aarch64: Fix transitive closure of
|
||||
features
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1
|
||||
|
||||
aarch64-option-extensions.def requires us to maintain the transitive
|
||||
closure of options by hand. This patch fixes a few cases where a
|
||||
flag was missed.
|
||||
|
||||
+noaes and +nosha2 now disable +crypto, which IMO makes more
|
||||
sense and is consistent with the Clang behaviour.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-option-extensions.def (dotprod): Depend
|
||||
on fp as well as simd.
|
||||
(sha3): Likewise.
|
||||
(aes): Likewise. Make +noaes disable crypto.
|
||||
(sha2): Likewise +nosha2. Also make +nosha2 disable sha3 and
|
||||
sve2-sha3.
|
||||
(sve2-sha3): Depend on sha2 as well as sha3.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to
|
||||
disable crypto but keep aes.
|
||||
* gcc.target/aarch64/pragma_cpp_predefs_4.c: New test.
|
||||
---
|
||||
.../aarch64/aarch64-option-extensions.def | 16 ++++---
|
||||
.../gcc.target/aarch64/options_set_6.c | 5 +-
|
||||
.../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++
|
||||
3 files changed, 58 insertions(+), 10 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
index b4d0ac8b6..b98008127 100644
|
||||
--- a/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \
|
||||
|
||||
/* Enabling "dotprod" also enables "simd".
|
||||
Disabling "dotprod" only disables "dotprod". */
|
||||
-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \
|
||||
+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \
|
||||
false, "asimddp")
|
||||
|
||||
/* Enabling "aes" also enables "simd".
|
||||
Disabling "aes" disables "aes" and "sve2-aes'. */
|
||||
-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \
|
||||
- AARCH64_FL_SVE2_AES, false, "aes")
|
||||
+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \
|
||||
+ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes")
|
||||
|
||||
/* Enabling "sha2" also enables "simd".
|
||||
Disabling "sha2" just disables "sha2". */
|
||||
-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \
|
||||
- "sha1 sha2")
|
||||
+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
|
||||
+ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
|
||||
+ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
|
||||
|
||||
/* Enabling "sha3" enables "simd" and "sha2".
|
||||
Disabling "sha3" disables "sha3" and "sve2-sha3". */
|
||||
-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \
|
||||
+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
|
||||
AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \
|
||||
"sha3 sha512")
|
||||
|
||||
/* Enabling "sm4" also enables "simd".
|
||||
Disabling "sm4" disables "sm4" and "sve2-sm4". */
|
||||
-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \
|
||||
+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \
|
||||
AARCH64_FL_SVE2_SM4, false, "sm3 sm4")
|
||||
|
||||
/* Enabling "fp16fml" also enables "fp" and "fp16".
|
||||
@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \
|
||||
/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and
|
||||
"sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */
|
||||
AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \
|
||||
+ AARCH64_FL_SHA2 | \
|
||||
AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
|
||||
AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3")
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
|
||||
index 90a055928..2a1d7fe5b 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
|
||||
@@ -6,7 +6,6 @@ int main ()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */
|
||||
|
||||
-/* Group as a whole was requested to be turned on, crypto itself is a bit and so
|
||||
- just turning off one feature can't turn it off. */
|
||||
+/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
|
||||
new file mode 100644
|
||||
index 000000000..0e6461fa4
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
|
||||
@@ -0,0 +1,47 @@
|
||||
+#pragma GCC target "+nothing+dotprod"
|
||||
+#ifndef __ARM_FEATURE_FMA
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+nothing+aes"
|
||||
+#ifndef __ARM_FEATURE_FMA
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+nothing+sha2"
|
||||
+#ifndef __ARM_FEATURE_FMA
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+nothing+sha3"
|
||||
+#ifndef __ARM_FEATURE_FMA
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+nothing+sm4"
|
||||
+#ifndef __ARM_FEATURE_FMA
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+crypto+noaes"
|
||||
+#ifdef __ARM_FEATURE_CRYPTO
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+crypto+nosha2"
|
||||
+#ifdef __ARM_FEATURE_CRYPTO
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+nothing+sve2-sha3"
|
||||
+#ifndef __ARM_FEATURE_SHA2
|
||||
+#error Foo
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target "+sve2-sha3+nosha2"
|
||||
+#ifdef __ARM_FEATURE_SHA3
|
||||
+#error Foo
|
||||
+#endif
|
||||
+#ifdef __ARM_FEATURE_SVE2_SHA3
|
||||
+#error Foo
|
||||
+#endif
|
||||
--
|
||||
2.33.0
|
||||
|
||||
194
0110-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
Normal file
194
0110-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
Normal file
@ -0,0 +1,194 @@
|
||||
From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:54 +0100
|
||||
Subject: [PATCH 018/157] [Backport][SME] aarch64: Reorder an entry in
|
||||
aarch64-option-extensions.def
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766
|
||||
|
||||
aarch64-option-extensions.def was topologically sorted except
|
||||
for one case: crypto came before its aes and sha2 dependencies.
|
||||
This patch moves crypto after sha2 instead.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-option-extensions.def: Move crypto
|
||||
after sha2.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto
|
||||
to come after +crc.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise.
|
||||
* gcc.target/aarch64/options_set_2.c: Likewise.
|
||||
* gcc.target/aarch64/options_set_3.c: Likewise.
|
||||
* gcc.target/aarch64/options_set_4.c: Likewise.
|
||||
---
|
||||
.../aarch64/aarch64-option-extensions.def | 20 +++++++++----------
|
||||
.../aarch64/cpunative/native_cpu_0.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_13.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_16.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_17.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_6.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_7.c | 2 +-
|
||||
.../gcc.target/aarch64/options_set_2.c | 2 +-
|
||||
.../gcc.target/aarch64/options_set_3.c | 2 +-
|
||||
.../gcc.target/aarch64/options_set_4.c | 4 ++--
|
||||
10 files changed, 20 insertions(+), 20 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
index b98008127..df2c8d19b 100644
|
||||
--- a/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \
|
||||
AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
|
||||
false, "asimd")
|
||||
|
||||
-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
|
||||
- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
|
||||
- "sve2-aes", "sve2-sha3", "sve2-sm4". */
|
||||
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
|
||||
- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
|
||||
- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
|
||||
- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
|
||||
- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
|
||||
- "aes pmull sha1 sha2")
|
||||
-
|
||||
/* Enabling or disabling "crc" only changes "crc". */
|
||||
AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
|
||||
|
||||
@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
|
||||
AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
|
||||
AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
|
||||
|
||||
+/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
|
||||
+ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
|
||||
+ "sve2-aes", "sve2-sha3", "sve2-sm4". */
|
||||
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
|
||||
+ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
|
||||
+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
|
||||
+ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
|
||||
+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
|
||||
+ "aes pmull sha1 sha2")
|
||||
+
|
||||
/* Enabling "sha3" enables "simd" and "sha2".
|
||||
Disabling "sha3" disables "sha3" and "sve2-sha3". */
|
||||
AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
|
||||
index f155f51ba..8499f87c3 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
|
||||
|
||||
/* Test a normal looking procinfo. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
|
||||
index b7b3a8e13..551669091 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
|
||||
|
||||
/* Test one with mixed order of feature bits. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
|
||||
index a424e7c56..2f963bb23 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
|
||||
|
||||
/* Test a normal looking procinfo. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
|
||||
index c269c5fef..c68a697aa 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
|
||||
|
||||
/* Test a normal looking procinfo. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
|
||||
index da72052e6..7608e8845 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
|
||||
@@ -7,7 +7,7 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
|
||||
|
||||
/* Test one where the feature bits for crypto and fp16 are given in
|
||||
same order as declared in options file. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
|
||||
index 96ad4c14d..72b14b4f6 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
|
||||
@@ -7,7 +7,7 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
|
||||
|
||||
/* Test one where the crypto and fp16 options are specified in different
|
||||
order from what is in the options file. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
|
||||
index 3476febce..f82cb5f78 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
|
||||
@@ -6,6 +6,6 @@ int main ()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
|
||||
|
||||
/* Check to see if crc and crypto are maintained if crypto specified. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
|
||||
index 4558339f1..7d350cfa3 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
|
||||
@@ -6,6 +6,6 @@ int main ()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
|
||||
|
||||
/* Check if smallest set is maintained when outputting. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
|
||||
index 15514bfe9..5370e02e1 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
|
||||
@@ -6,7 +6,7 @@ int main ()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
|
||||
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
|
||||
|
||||
/* Check if individual bits that make up a grouping is specified that only the
|
||||
- grouping is kept. */
|
||||
\ No newline at end of file
|
||||
+ grouping is kept. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1176
0111-Backport-SME-aarch64-Simplify-feature-definitions.patch
Normal file
1176
0111-Backport-SME-aarch64-Simplify-feature-definitions.patch
Normal file
File diff suppressed because it is too large
Load Diff
467
0112-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
Normal file
467
0112-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
Normal file
@ -0,0 +1,467 @@
|
||||
From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:55 +0100
|
||||
Subject: [PATCH 020/157] [Backport][SME] aarch64: Simplify generation of .arch
|
||||
strings
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d
|
||||
|
||||
aarch64-common.cc has two arrays, one maintaining the original
|
||||
definition order and one sorted by population count. Sorting
|
||||
by population count was a way of ensuring topological ordering,
|
||||
taking advantage of the fact that the entries are partially
|
||||
ordered by the subset relation. However, the sorting is not
|
||||
needed now that the .def file is forced to have topological
|
||||
order from the outset.
|
||||
|
||||
Other changes are:
|
||||
|
||||
(1) The population count used:
|
||||
|
||||
uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
|
||||
uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
|
||||
int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
|
||||
int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
|
||||
|
||||
where I think the & was supposed to be |. This meant that the
|
||||
counts would always be 1 in practice, since flag_canonical is
|
||||
a single bit. This led us to printing +nofp+nosimd even though
|
||||
GCC "knows" (and GAS agrees) that +nofp disables simd.
|
||||
|
||||
(2) The .arch output code converts +aes+sha2 to +crypto. I think
|
||||
the main reason for doing this is to support assemblers that
|
||||
predate the individual per-feature crypto flags. It therefore
|
||||
seems more natural to treat it as a special case, rather than
|
||||
as an instance of a general pattern. Hopefully we won't do
|
||||
something similar in future!
|
||||
|
||||
(There is already special handling of CRC, for different reasons.)
|
||||
|
||||
(3) Previously, if the /proc/cpuinfo code saw a feature like sve,
|
||||
it would assume the presence of all the features that sve
|
||||
depends on. It would be possible to keep that behaviour
|
||||
if necessary, but it was simpler to assume the presence of
|
||||
fp16 (say) only when fphp is present. There's an argument
|
||||
that that's more conservatively correct too.
|
||||
|
||||
gcc/
|
||||
* common/config/aarch64/aarch64-common.cc
|
||||
(TARGET_OPTION_INIT_STRUCT): Delete.
|
||||
(aarch64_option_extension): Remove is_synthetic_flag.
|
||||
(all_extensions): Update accordingly.
|
||||
(all_extensions_by_on, opt_ext, opt_ext_cmp): Delete.
|
||||
(aarch64_option_init_struct, aarch64_contains_opt): Delete.
|
||||
(aarch64_get_extension_string_for_isa_flags): Rewrite to use
|
||||
all_extensions instead of all_extensions_on.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve.
|
||||
* gcc.target/aarch64/cpunative/info_9: Likewise svesm4.
|
||||
* gcc.target/aarch64/cpunative/info_15: Likewise.
|
||||
* gcc.target/aarch64/cpunative/info_16: Likewise sve2.
|
||||
* gcc.target/aarch64/cpunative/info_17: Likewise.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp
|
||||
rather than +nofp+nosimd.
|
||||
* gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise.
|
||||
* gcc.target/aarch64/target_attr_15.c: Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 244 ++++--------------
|
||||
.../gcc.target/aarch64/cpunative/info_15 | 2 +-
|
||||
.../gcc.target/aarch64/cpunative/info_16 | 2 +-
|
||||
.../gcc.target/aarch64/cpunative/info_17 | 2 +-
|
||||
.../gcc.target/aarch64/cpunative/info_8 | 2 +-
|
||||
.../gcc.target/aarch64/cpunative/info_9 | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_10.c | 2 +-
|
||||
.../aarch64/cpunative/native_cpu_2.c | 2 +-
|
||||
.../gcc.target/aarch64/target_attr_15.c | 2 +-
|
||||
9 files changed, 55 insertions(+), 205 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 74729bb30..057dc094d 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -42,8 +42,6 @@
|
||||
|
||||
#undef TARGET_OPTION_OPTIMIZATION_TABLE
|
||||
#define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table
|
||||
-#undef TARGET_OPTION_INIT_STRUCT
|
||||
-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct
|
||||
|
||||
#define INVALID_IMP ((unsigned) -1)
|
||||
|
||||
@@ -209,7 +207,6 @@ struct aarch64_option_extension
|
||||
const uint64_t flag_canonical;
|
||||
const uint64_t flags_on;
|
||||
const uint64_t flags_off;
|
||||
- const bool is_synthetic;
|
||||
};
|
||||
|
||||
/* ISA extensions in AArch64. */
|
||||
@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions[] =
|
||||
{NAME, AARCH64_FL_##IDENT, \
|
||||
feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
|
||||
feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
|
||||
- & ~AARCH64_FL_##IDENT, \
|
||||
- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
|
||||
+ & ~AARCH64_FL_##IDENT},
|
||||
#include "config/aarch64/aarch64-option-extensions.def"
|
||||
- {NULL, 0, 0, 0, false}
|
||||
-};
|
||||
-
|
||||
-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of
|
||||
- bits and extension turned on. Cached for efficiency. */
|
||||
-static struct aarch64_option_extension all_extensions_by_on[] =
|
||||
-{
|
||||
-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
|
||||
- {NAME, AARCH64_FL_##IDENT, \
|
||||
- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
|
||||
- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
|
||||
- & ~AARCH64_FL_##IDENT, \
|
||||
- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
|
||||
-#include "config/aarch64/aarch64-option-extensions.def"
|
||||
- {NULL, 0, 0, 0, false}
|
||||
+ {NULL, 0, 0, 0}
|
||||
};
|
||||
|
||||
struct processor_name_to_arch
|
||||
@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
|
||||
candidates->safe_push (opt->name);
|
||||
}
|
||||
|
||||
-/* Comparer to sort aarch64's feature extensions by population count. Largest
|
||||
- first. */
|
||||
-
|
||||
-typedef const struct aarch64_option_extension opt_ext;
|
||||
-
|
||||
-int opt_ext_cmp (const void* a, const void* b)
|
||||
-{
|
||||
- opt_ext *opt_a = (opt_ext *)a;
|
||||
- opt_ext *opt_b = (opt_ext *)b;
|
||||
-
|
||||
- /* We consider the total set of bits an options turns on to be the union of
|
||||
- the singleton set containing the option itself and the set of options it
|
||||
- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and
|
||||
- FL_SIMD. As such the set of bits represented by this option is
|
||||
- {FL_DOTPROD, FL_SIMD}. */
|
||||
- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
|
||||
- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
|
||||
- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
|
||||
- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
|
||||
- int order = popcnt_b - popcnt_a;
|
||||
-
|
||||
- /* If they have the same amount of bits set, give it a more
|
||||
- deterministic ordering by using the value of the bits themselves. */
|
||||
- if (order != 0)
|
||||
- return order;
|
||||
-
|
||||
- if (total_flags_a != total_flags_b)
|
||||
- return total_flags_a < total_flags_b ? 1 : -1;
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* Implement TARGET_OPTION_INIT_STRUCT. */
|
||||
-
|
||||
-static void
|
||||
-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
|
||||
-{
|
||||
- /* Sort the extensions based on how many bits they set, order the larger
|
||||
- counts first. We sort the list because this makes processing the
|
||||
- feature bits O(n) instead of O(n^2). While n is small, the function
|
||||
- to calculate the feature strings is called on every options push,
|
||||
- pop and attribute change (arm_neon headers, lto etc all cause this to
|
||||
- happen quite frequently). It is a trade-off between time and space and
|
||||
- so time won. */
|
||||
- int n_extensions
|
||||
- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension);
|
||||
- qsort (&all_extensions_by_on, n_extensions,
|
||||
- sizeof (struct aarch64_option_extension), opt_ext_cmp);
|
||||
-}
|
||||
-
|
||||
-/* Checks to see if enough bits from the option OPT are enabled in
|
||||
- ISA_FLAG_BITS to be able to replace the individual options with the
|
||||
- canonicalized version of the option. This is done based on two rules:
|
||||
-
|
||||
- 1) Synthetic groups, such as +crypto we only care about the bits that are
|
||||
- turned on. e.g. +aes+sha2 can be replaced with +crypto.
|
||||
-
|
||||
- 2) Options that themselves have a bit, such as +rdma, in this case, all the
|
||||
- feature bits they turn on must be available and the bit for the option
|
||||
- itself must be. In this case it's effectively a reduction rather than a
|
||||
- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would
|
||||
- need +rdma+fp+simd which is reduced down to +rdma.
|
||||
-*/
|
||||
-
|
||||
-static bool
|
||||
-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt)
|
||||
-{
|
||||
- uint64_t flags_check
|
||||
- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical;
|
||||
-
|
||||
- return (isa_flag_bits & flags_check) == flags_check;
|
||||
-}
|
||||
-
|
||||
/* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS
|
||||
gives the default set of flags which are implied by whatever -march
|
||||
we'd put out. Our job is to figure out the minimal set of "+" and
|
||||
@@ -436,118 +345,59 @@ std::string
|
||||
aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
|
||||
uint64_t default_arch_flags)
|
||||
{
|
||||
- const struct aarch64_option_extension *opt = NULL;
|
||||
std::string outstr = "";
|
||||
|
||||
- uint64_t isa_flag_bits = isa_flags;
|
||||
-
|
||||
- /* Pass one: Minimize the search space by reducing the set of options
|
||||
- to the smallest set that still turns on the same features as before in
|
||||
- conjunction with the bits that are turned on by default for the selected
|
||||
- architecture. */
|
||||
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
|
||||
+ aarch64_feature_flags current_flags = default_arch_flags;
|
||||
+
|
||||
+ /* As a special case, do not assume that the assembler will enable CRC
|
||||
+ even if it is the default for the architecture. This is required
|
||||
+ because some CPUs had an incorrect specification in older assemblers:
|
||||
+ even though CRC should be the default for these cases the -mcpu
|
||||
+ values would not turn it on.
|
||||
+
|
||||
+ However, assemblers with Armv8-R AArch64 support should not have this
|
||||
+ issue, so we don't need this fix when targeting Armv8-R. */
|
||||
+ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R)
|
||||
+ ? AARCH64_FL_CRC : 0);
|
||||
+
|
||||
+ /* Add the features in isa_flags & ~current_flags using the smallest
|
||||
+ possible number of extensions. We can do this by iterating over the
|
||||
+ array in reverse order, since the array is sorted topologically.
|
||||
+ But in order to make the output more readable, it seems better
|
||||
+ to add the strings in definition order. */
|
||||
+ aarch64_feature_flags added = 0;
|
||||
+ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; )
|
||||
{
|
||||
- /* If the bit is on by default, then all the options it turns on are also
|
||||
- on by default due to the transitive dependencies.
|
||||
-
|
||||
- If the option is enabled explicitly in the set then we need to emit
|
||||
- an option for it. Since this list is sorted by extensions setting the
|
||||
- largest number of featers first, we can be sure that nothing else will
|
||||
- ever need to set the bits we already set. Consider the following
|
||||
- situation:
|
||||
-
|
||||
- Feat1 = A + B + C
|
||||
- Feat2 = A + B
|
||||
- Feat3 = A + D
|
||||
- Feat4 = B + C
|
||||
- Feat5 = C
|
||||
-
|
||||
- The following results are expected:
|
||||
-
|
||||
- A + C = A + Feat5
|
||||
- B + C = Feat4
|
||||
- Feat4 + A = Feat1
|
||||
- Feat2 + Feat5 = Feat1
|
||||
- Feat1 + C = Feat1
|
||||
- Feat3 + Feat4 = Feat1 + D
|
||||
-
|
||||
- This search assumes that all invidual feature bits are use visible,
|
||||
- in other words the user must be able to do +A, +B, +C and +D. */
|
||||
- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt))
|
||||
- {
|
||||
- /* We remove all the dependent bits, to prevent them from being turned
|
||||
- on twice. This only works because we assume that all there are
|
||||
- individual options to set all bits standalone. */
|
||||
-
|
||||
- /* PR target/94396.
|
||||
-
|
||||
- For flags which would already imply a bit that's on by default (e.g
|
||||
- fp16fml which implies +fp,+fp16) we must emit the flags that are not
|
||||
- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So
|
||||
- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit
|
||||
- +fp16. But if +fp16fml is used in an architecture where it is
|
||||
- completely optional we only have to emit the canonical flag. */
|
||||
- uint64_t toggle_bits = opt->flags_on & default_arch_flags;
|
||||
- /* Now check to see if the canonical flag is on by default. If it
|
||||
- is not then enabling it will enable all bits in flags_on. */
|
||||
- if ((opt->flag_canonical & default_arch_flags) == 0)
|
||||
- toggle_bits = opt->flags_on;
|
||||
-
|
||||
- isa_flag_bits &= ~toggle_bits;
|
||||
- isa_flag_bits |= opt->flag_canonical;
|
||||
- }
|
||||
- }
|
||||
+ auto &opt = all_extensions[i];
|
||||
|
||||
- /* By toggling bits on and off, we may have set bits on that are already
|
||||
- enabled by default. So we mask the default set out so we don't emit an
|
||||
- option for them. Instead of checking for this each time during Pass One
|
||||
- we just mask all default bits away at the end. */
|
||||
- isa_flag_bits &= ~default_arch_flags;
|
||||
-
|
||||
- /* We now have the smallest set of features we need to process. A subsequent
|
||||
- linear scan of the bits in isa_flag_bits will allow us to print the ext
|
||||
- names. However as a special case if CRC was enabled before, always print
|
||||
- it. This is required because some CPUs have an incorrect specification
|
||||
- in older assemblers. Even though CRC should be the default for these
|
||||
- cases the -mcpu values won't turn it on.
|
||||
-
|
||||
- Note that assemblers with Armv8-R AArch64 support should not have this
|
||||
- issue, so we don't need this fix when targeting Armv8-R. */
|
||||
- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
|
||||
- isa_flag_bits |= AARCH64_ISA_CRC;
|
||||
-
|
||||
- /* Pass Two:
|
||||
- Print the option names that we're sure we must turn on. These are only
|
||||
- optional extension names. Mandatory ones have already been removed and
|
||||
- ones we explicitly want off have been too. */
|
||||
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
|
||||
- {
|
||||
- if (isa_flag_bits & opt->flag_canonical)
|
||||
- {
|
||||
- outstr += "+";
|
||||
- outstr += opt->name;
|
||||
- }
|
||||
- }
|
||||
+ /* As a special case, emit +crypto rather than +aes+sha2,
|
||||
+ in order to support assemblers that predate the separate
|
||||
+ per-feature crypto flags. */
|
||||
+ auto flags = opt.flag_canonical;
|
||||
+ if (flags == AARCH64_FL_CRYPTO)
|
||||
+ flags = AARCH64_FL_AES | AARCH64_FL_SHA2;
|
||||
|
||||
- /* Pass Three:
|
||||
- Print out a +no for any mandatory extension that we are
|
||||
- turning off. By this point aarch64_parse_extension would have ensured
|
||||
- that any optional extensions are turned off. The only things left are
|
||||
- things that can't be turned off usually, e.g. something that is on by
|
||||
- default because it's mandatory and we want it off. For turning off bits
|
||||
- we don't guarantee the smallest set of flags, but instead just emit all
|
||||
- options the user has specified.
|
||||
-
|
||||
- The assembler requires all +<opts> to be printed before +no<opts>. */
|
||||
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
|
||||
- {
|
||||
- if ((~isa_flags) & opt->flag_canonical
|
||||
- && !((~default_arch_flags) & opt->flag_canonical))
|
||||
+ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
|
||||
{
|
||||
- outstr += "+no";
|
||||
- outstr += opt->name;
|
||||
+ current_flags |= opt.flag_canonical | opt.flags_on;
|
||||
+ added |= opt.flag_canonical;
|
||||
}
|
||||
}
|
||||
+ for (auto &opt : all_extensions)
|
||||
+ if (added & opt.flag_canonical)
|
||||
+ {
|
||||
+ outstr += "+";
|
||||
+ outstr += opt.name;
|
||||
+ }
|
||||
+
|
||||
+ /* Remove the features in current_flags & ~isa_flags. */
|
||||
+ for (auto &opt : all_extensions)
|
||||
+ if (opt.flag_canonical & current_flags & ~isa_flags)
|
||||
+ {
|
||||
+ current_flags &= ~(opt.flag_canonical | opt.flags_off);
|
||||
+ outstr += "+no";
|
||||
+ outstr += opt.name;
|
||||
+ }
|
||||
|
||||
return outstr;
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
|
||||
index bc6453945..6b425ea20 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
|
||||
@@ -1,6 +1,6 @@
|
||||
processor : 0
|
||||
BogoMIPS : 100.00
|
||||
-Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp
|
||||
+Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
|
||||
index 2c04ff19c..26f01c496 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
|
||||
@@ -1,6 +1,6 @@
|
||||
processor : 0
|
||||
BogoMIPS : 100.00
|
||||
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
|
||||
+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
|
||||
CPU implementer : 0xfe
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
|
||||
index 2c04ff19c..26f01c496 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
|
||||
@@ -1,6 +1,6 @@
|
||||
processor : 0
|
||||
BogoMIPS : 100.00
|
||||
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
|
||||
+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
|
||||
CPU implementer : 0xfe
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
|
||||
index d6d9d03a2..76da16c57 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
|
||||
@@ -1,6 +1,6 @@
|
||||
processor : 0
|
||||
BogoMIPS : 100.00
|
||||
-Features : asimd sve fp
|
||||
+Features : asimd sve fp fphp asimdhp
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
|
||||
index c9aa4a9a0..14703dd1d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
|
||||
@@ -1,6 +1,6 @@
|
||||
processor : 0
|
||||
BogoMIPS : 100.00
|
||||
-Features : asimd fp svesm4
|
||||
+Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4
|
||||
CPU implementer : 0x41
|
||||
CPU architecture: 8
|
||||
CPU variant : 0x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
|
||||
index 6a753965c..ddb06b822 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
|
||||
|
||||
/* Test one with no entry in feature list. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
|
||||
index aad71f434..edbdb5626 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
|
||||
@@ -7,6 +7,6 @@ int main()
|
||||
return 0;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
|
||||
+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
|
||||
|
||||
/* Test one where asimd is provided byt no fp. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
|
||||
index 108b372e4..069a00108 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
|
||||
@@ -10,4 +10,4 @@ foo (int a)
|
||||
return a + 1;
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */
|
||||
+/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,43 @@
|
||||
From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:55 +0100
|
||||
Subject: [PATCH 021/157] [Backport][SME] aarch64: Avoid std::string in static
|
||||
data
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0
|
||||
|
||||
Just a minor patch to avoid having to construct std::strings
|
||||
in static data.
|
||||
|
||||
gcc/
|
||||
* common/config/aarch64/aarch64-common.cc (processor_name_to_arch)
|
||||
(arch_to_arch_name): Use const char * instead of std::string.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 057dc094d..2bdf51b8b 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions[] =
|
||||
|
||||
struct processor_name_to_arch
|
||||
{
|
||||
- const std::string processor_name;
|
||||
+ const char *const processor_name;
|
||||
const enum aarch64_arch arch;
|
||||
const uint64_t flags;
|
||||
};
|
||||
@@ -231,7 +231,7 @@ struct processor_name_to_arch
|
||||
struct arch_to_arch_name
|
||||
{
|
||||
const enum aarch64_arch arch;
|
||||
- const std::string arch_name;
|
||||
+ const char *const arch_name;
|
||||
const uint64_t flags;
|
||||
};
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
195
0114-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
Normal file
195
0114-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
Normal file
@ -0,0 +1,195 @@
|
||||
From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:55 +0100
|
||||
Subject: [PATCH 022/157] [Backport][SME] aarch64: Tweak constness of
|
||||
option-related data
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2
|
||||
|
||||
Some of the option structures have all-const member variables.
|
||||
That doesn't seem necessary: we can just use const on the objects
|
||||
that are supposed to be read-only.
|
||||
|
||||
Also, with the new, more C++-heavy option handling, it seems
|
||||
better to use constexpr for the static data, to make sure that
|
||||
we're not adding unexpected overhead.
|
||||
|
||||
gcc/
|
||||
* common/config/aarch64/aarch64-common.cc (aarch64_option_extension)
|
||||
(processor_name_to_arch, arch_to_arch_name): Remove const from
|
||||
member variables.
|
||||
(all_extensions, all_cores, all_architectures): Make a constexpr.
|
||||
* config/aarch64/aarch64.cc (processor): Remove const from
|
||||
member variables.
|
||||
(all_architectures): Make a constexpr.
|
||||
* config/aarch64/driver-aarch64.cc (aarch64_core_data)
|
||||
(aarch64_arch_driver_info): Remove const from member variables.
|
||||
(aarch64_cpu_data, aarch64_arches): Make a constexpr.
|
||||
(get_arch_from_id): Return a pointer to const.
|
||||
(host_detect_local_cpu): Update accordingly.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++-----------
|
||||
gcc/config/aarch64/aarch64.cc | 14 +++++------
|
||||
gcc/config/aarch64/driver-aarch64.cc | 15 ++++++------
|
||||
3 files changed, 27 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 2bdf51b8b..ac3486d71 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts,
|
||||
/* An ISA extension in the co-processor and main instruction set space. */
|
||||
struct aarch64_option_extension
|
||||
{
|
||||
- const char *const name;
|
||||
- const uint64_t flag_canonical;
|
||||
- const uint64_t flags_on;
|
||||
- const uint64_t flags_off;
|
||||
+ const char *name;
|
||||
+ uint64_t flag_canonical;
|
||||
+ uint64_t flags_on;
|
||||
+ uint64_t flags_off;
|
||||
};
|
||||
|
||||
/* ISA extensions in AArch64. */
|
||||
-static const struct aarch64_option_extension all_extensions[] =
|
||||
+static constexpr aarch64_option_extension all_extensions[] =
|
||||
{
|
||||
#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
|
||||
{NAME, AARCH64_FL_##IDENT, \
|
||||
@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions[] =
|
||||
|
||||
struct processor_name_to_arch
|
||||
{
|
||||
- const char *const processor_name;
|
||||
- const enum aarch64_arch arch;
|
||||
- const uint64_t flags;
|
||||
+ const char *processor_name;
|
||||
+ aarch64_arch arch;
|
||||
+ uint64_t flags;
|
||||
};
|
||||
|
||||
struct arch_to_arch_name
|
||||
{
|
||||
- const enum aarch64_arch arch;
|
||||
- const char *const arch_name;
|
||||
- const uint64_t flags;
|
||||
+ aarch64_arch arch;
|
||||
+ const char *arch_name;
|
||||
+ uint64_t flags;
|
||||
};
|
||||
|
||||
/* Map processor names to the architecture revision they implement and
|
||||
the default set of architectural feature flags they support. */
|
||||
-static const struct processor_name_to_arch all_cores[] =
|
||||
+static constexpr processor_name_to_arch all_cores[] =
|
||||
{
|
||||
#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \
|
||||
{NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
|
||||
@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores[] =
|
||||
};
|
||||
|
||||
/* Map architecture revisions to their string representation. */
|
||||
-static const struct arch_to_arch_name all_architectures[] =
|
||||
+static constexpr arch_to_arch_name all_architectures[] =
|
||||
{
|
||||
#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \
|
||||
{AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 1363873b1..71db7ace1 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions[] =
|
||||
/* A processor implementing AArch64. */
|
||||
struct processor
|
||||
{
|
||||
- const char *const name;
|
||||
- enum aarch64_processor ident;
|
||||
- enum aarch64_processor sched_core;
|
||||
- enum aarch64_arch arch;
|
||||
- const uint64_t flags;
|
||||
- const struct tune_params *const tune;
|
||||
+ const char *name;
|
||||
+ aarch64_processor ident;
|
||||
+ aarch64_processor sched_core;
|
||||
+ aarch64_arch arch;
|
||||
+ uint64_t flags;
|
||||
+ const tune_params *tune;
|
||||
};
|
||||
|
||||
/* Architectures implementing AArch64. */
|
||||
-static const struct processor all_architectures[] =
|
||||
+static constexpr processor all_architectures[] =
|
||||
{
|
||||
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \
|
||||
{NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \
|
||||
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
|
||||
index ddfc9451f..ee9cb65a5 100644
|
||||
--- a/gcc/config/aarch64/driver-aarch64.cc
|
||||
+++ b/gcc/config/aarch64/driver-aarch64.cc
|
||||
@@ -50,7 +50,7 @@ struct aarch64_core_data
|
||||
unsigned char implementer_id; /* Exactly 8 bits */
|
||||
unsigned int part_no; /* 12 bits + 12 bits */
|
||||
unsigned variant;
|
||||
- const uint64_t flags;
|
||||
+ uint64_t flags;
|
||||
};
|
||||
|
||||
#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
|
||||
@@ -64,7 +64,7 @@ struct aarch64_core_data
|
||||
#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
|
||||
{ CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
|
||||
|
||||
-static struct aarch64_core_data aarch64_cpu_data[] =
|
||||
+static constexpr aarch64_core_data aarch64_cpu_data[] =
|
||||
{
|
||||
#include "aarch64-cores.def"
|
||||
{ NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 }
|
||||
@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info
|
||||
{
|
||||
const char* id;
|
||||
const char* name;
|
||||
- const uint64_t flags;
|
||||
+ uint64_t flags;
|
||||
};
|
||||
|
||||
/* Skip the leading "V" in the architecture name. */
|
||||
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
|
||||
{ #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable },
|
||||
|
||||
-static struct aarch64_arch_driver_info aarch64_arches[] =
|
||||
+static constexpr aarch64_arch_driver_info aarch64_arches[] =
|
||||
{
|
||||
#include "aarch64-arches.def"
|
||||
{NULL, NULL, 0}
|
||||
@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches[] =
|
||||
/* Return an aarch64_arch_driver_info for the architecture described
|
||||
by ID, or NULL if ID describes something we don't know about. */
|
||||
|
||||
-static struct aarch64_arch_driver_info*
|
||||
+static const aarch64_arch_driver_info *
|
||||
get_arch_from_id (const char* id)
|
||||
{
|
||||
unsigned int i = 0;
|
||||
@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv)
|
||||
|
||||
if (aarch64_cpu_data[i].name == NULL)
|
||||
{
|
||||
- aarch64_arch_driver_info* arch_info
|
||||
- = get_arch_from_id (DEFAULT_ARCH);
|
||||
+ auto arch_info = get_arch_from_id (DEFAULT_ARCH);
|
||||
|
||||
gcc_assert (arch_info);
|
||||
|
||||
@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv)
|
||||
else if (arch)
|
||||
{
|
||||
const char *arch_id = aarch64_cpu_data[i].arch;
|
||||
- aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id);
|
||||
+ auto arch_info = get_arch_from_id (arch_id);
|
||||
|
||||
/* We got some arch indentifier that's not in aarch64-arches.def? */
|
||||
if (!arch_info)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
394
0115-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
Normal file
394
0115-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
Normal file
@ -0,0 +1,394 @@
|
||||
From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:56 +0100
|
||||
Subject: [PATCH 023/157] [Backport][SME] aarch64: Make more use of
|
||||
aarch64_feature_flags
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd
|
||||
|
||||
A previous patch added a aarch64_feature_flags typedef, to abstract
|
||||
the representation of the feature flags. This patch makes existing
|
||||
code use the typedef too. Hope I've caught them all!
|
||||
|
||||
gcc/
|
||||
* common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags
|
||||
for feature flags throughout.
|
||||
* config/aarch64/aarch64-protos.h: Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins.h: Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins.cc: Likewise.
|
||||
* config/aarch64/aarch64.cc: Likewise.
|
||||
* config/aarch64/aarch64.opt: Likewise.
|
||||
* config/aarch64/driver-aarch64.cc: Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++-------
|
||||
gcc/config/aarch64/aarch64-protos.h | 5 ++--
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 29 ++++++++++++---------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 9 ++++---
|
||||
gcc/config/aarch64/aarch64.cc | 29 +++++++++++----------
|
||||
gcc/config/aarch64/aarch64.opt | 2 +-
|
||||
gcc/config/aarch64/driver-aarch64.cc | 10 +++----
|
||||
7 files changed, 56 insertions(+), 47 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index ac3486d71..3efa57b26 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts,
|
||||
struct aarch64_option_extension
|
||||
{
|
||||
const char *name;
|
||||
- uint64_t flag_canonical;
|
||||
- uint64_t flags_on;
|
||||
- uint64_t flags_off;
|
||||
+ aarch64_feature_flags flag_canonical;
|
||||
+ aarch64_feature_flags flags_on;
|
||||
+ aarch64_feature_flags flags_off;
|
||||
};
|
||||
|
||||
/* ISA extensions in AArch64. */
|
||||
@@ -225,14 +225,14 @@ struct processor_name_to_arch
|
||||
{
|
||||
const char *processor_name;
|
||||
aarch64_arch arch;
|
||||
- uint64_t flags;
|
||||
+ aarch64_feature_flags flags;
|
||||
};
|
||||
|
||||
struct arch_to_arch_name
|
||||
{
|
||||
aarch64_arch arch;
|
||||
const char *arch_name;
|
||||
- uint64_t flags;
|
||||
+ aarch64_feature_flags flags;
|
||||
};
|
||||
|
||||
/* Map processor names to the architecture revision they implement and
|
||||
@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures[] =
|
||||
a copy of the string is created and stored to INVALID_EXTENSION. */
|
||||
|
||||
enum aarch64_parse_opt_result
|
||||
-aarch64_parse_extension (const char *str, uint64_t *isa_flags,
|
||||
+aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
|
||||
std::string *invalid_extension)
|
||||
{
|
||||
/* The extension string is parsed left to right. */
|
||||
@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
|
||||
that all the "+" flags come before the "+no" flags. */
|
||||
|
||||
std::string
|
||||
-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
|
||||
- uint64_t default_arch_flags)
|
||||
+aarch64_get_extension_string_for_isa_flags
|
||||
+ (aarch64_feature_flags isa_flags,
|
||||
+ aarch64_feature_flags default_arch_flags)
|
||||
{
|
||||
std::string outstr = "";
|
||||
|
||||
@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name)
|
||||
|| a_to_an->arch == aarch64_no_arch)
|
||||
fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name);
|
||||
|
||||
- uint64_t extensions = p_to_a->flags;
|
||||
+ aarch64_feature_flags extensions = p_to_a->flags;
|
||||
aarch64_parse_extension (extension_str.c_str (), &extensions, NULL);
|
||||
|
||||
std::string outstr = a_to_an->arch_name
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index e60ce3c36..ef84df731 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
|
||||
const struct cl_decoded_option *, location_t);
|
||||
const char *aarch64_rewrite_selected_cpu (const char *name);
|
||||
enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
|
||||
- uint64_t *,
|
||||
+ aarch64_feature_flags *,
|
||||
std::string *);
|
||||
void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates);
|
||||
-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t);
|
||||
+std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
|
||||
+ aarch64_feature_flags);
|
||||
|
||||
rtl_opt_pass *make_pass_fma_steering (gcc::context *);
|
||||
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index c06e99339..b927a886e 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -82,7 +82,7 @@ public:
|
||||
|
||||
/* The architecture extensions that the function requires, as a set of
|
||||
AARCH64_FL_* flags. */
|
||||
- uint64_t required_extensions;
|
||||
+ aarch64_feature_flags required_extensions;
|
||||
|
||||
/* True if the decl represents an overloaded function that needs to be
|
||||
resolved by function_resolver. */
|
||||
@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl)
|
||||
Report an error against LOCATION if not. */
|
||||
static bool
|
||||
check_required_extensions (location_t location, tree fndecl,
|
||||
- uint64_t required_extensions)
|
||||
+ aarch64_feature_flags required_extensions)
|
||||
{
|
||||
- uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags;
|
||||
+ auto missing_extensions = required_extensions & ~aarch64_isa_flags;
|
||||
if (missing_extensions == 0)
|
||||
return check_required_registers (location, fndecl);
|
||||
|
||||
- static const struct { uint64_t flag; const char *name; } extensions[] = {
|
||||
+ static const struct {
|
||||
+ aarch64_feature_flags flag;
|
||||
+ const char *name;
|
||||
+ } extensions[] = {
|
||||
#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
|
||||
{ AARCH64_FL_##IDENT, EXT_NAME },
|
||||
#include "aarch64-option-extensions.def"
|
||||
@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance)
|
||||
registered_function &
|
||||
function_builder::add_function (const function_instance &instance,
|
||||
const char *name, tree fntype, tree attrs,
|
||||
- uint64_t required_extensions,
|
||||
+ aarch64_feature_flags required_extensions,
|
||||
bool overloaded_p,
|
||||
bool placeholder_p)
|
||||
{
|
||||
@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance,
|
||||
one-to-one mapping between "short" and "full" names, and if standard
|
||||
overload resolution therefore isn't necessary. */
|
||||
void
|
||||
-function_builder::add_unique_function (const function_instance &instance,
|
||||
- tree return_type,
|
||||
- vec<tree> &argument_types,
|
||||
- uint64_t required_extensions,
|
||||
- bool force_direct_overloads)
|
||||
+function_builder::
|
||||
+add_unique_function (const function_instance &instance,
|
||||
+ tree return_type,
|
||||
+ vec<tree> &argument_types,
|
||||
+ aarch64_feature_flags required_extensions,
|
||||
+ bool force_direct_overloads)
|
||||
{
|
||||
/* Add the function under its full (unique) name. */
|
||||
char *name = get_name (instance, false);
|
||||
@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance,
|
||||
features are available as part of resolving the function to the
|
||||
relevant unique function. */
|
||||
void
|
||||
-function_builder::add_overloaded_function (const function_instance &instance,
|
||||
- uint64_t required_extensions)
|
||||
+function_builder::
|
||||
+add_overloaded_function (const function_instance &instance,
|
||||
+ aarch64_feature_flags required_extensions)
|
||||
{
|
||||
char *name = get_name (instance, true);
|
||||
if (registered_function **map_value = m_overload_names.get (name))
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 24594d584..63d1db776 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -263,7 +263,7 @@ struct function_group_info
|
||||
|
||||
/* The architecture extensions that the functions require, as a set of
|
||||
AARCH64_FL_* flags. */
|
||||
- uint64_t required_extensions;
|
||||
+ aarch64_feature_flags required_extensions;
|
||||
};
|
||||
|
||||
/* Describes a single fully-resolved function (i.e. one that has a
|
||||
@@ -321,8 +321,9 @@ public:
|
||||
~function_builder ();
|
||||
|
||||
void add_unique_function (const function_instance &, tree,
|
||||
- vec<tree> &, uint64_t, bool);
|
||||
- void add_overloaded_function (const function_instance &, uint64_t);
|
||||
+ vec<tree> &, aarch64_feature_flags, bool);
|
||||
+ void add_overloaded_function (const function_instance &,
|
||||
+ aarch64_feature_flags);
|
||||
void add_overloaded_functions (const function_group_info &,
|
||||
mode_suffix_index);
|
||||
|
||||
@@ -338,7 +339,7 @@ private:
|
||||
|
||||
registered_function &add_function (const function_instance &,
|
||||
const char *, tree, tree,
|
||||
- uint64_t, bool, bool);
|
||||
+ aarch64_feature_flags, bool, bool);
|
||||
|
||||
/* The function type to use for functions that are resolved by
|
||||
function_resolver. */
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 71db7ace1..8cb820767 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -2929,7 +2929,7 @@ struct processor
|
||||
aarch64_processor ident;
|
||||
aarch64_processor sched_core;
|
||||
aarch64_arch arch;
|
||||
- uint64_t flags;
|
||||
+ aarch64_feature_flags flags;
|
||||
const tune_params *tune;
|
||||
};
|
||||
|
||||
@@ -17428,7 +17428,8 @@ static void initialize_aarch64_code_model (struct gcc_options *);
|
||||
|
||||
static enum aarch64_parse_opt_result
|
||||
aarch64_parse_arch (const char *to_parse, const struct processor **res,
|
||||
- uint64_t *isa_flags, std::string *invalid_extension)
|
||||
+ aarch64_feature_flags *isa_flags,
|
||||
+ std::string *invalid_extension)
|
||||
{
|
||||
const char *ext;
|
||||
const struct processor *arch;
|
||||
@@ -17451,7 +17452,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
|
||||
if (strlen (arch->name) == len
|
||||
&& strncmp (arch->name, to_parse, len) == 0)
|
||||
{
|
||||
- uint64_t isa_temp = arch->flags;
|
||||
+ auto isa_temp = arch->flags;
|
||||
|
||||
if (ext != NULL)
|
||||
{
|
||||
@@ -17483,7 +17484,8 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
|
||||
|
||||
static enum aarch64_parse_opt_result
|
||||
aarch64_parse_cpu (const char *to_parse, const struct processor **res,
|
||||
- uint64_t *isa_flags, std::string *invalid_extension)
|
||||
+ aarch64_feature_flags *isa_flags,
|
||||
+ std::string *invalid_extension)
|
||||
{
|
||||
const char *ext;
|
||||
const struct processor *cpu;
|
||||
@@ -17505,8 +17507,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res,
|
||||
{
|
||||
if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
|
||||
{
|
||||
- uint64_t isa_temp = cpu->flags;
|
||||
-
|
||||
+ auto isa_temp = cpu->flags;
|
||||
|
||||
if (ext != NULL)
|
||||
{
|
||||
@@ -18137,7 +18138,7 @@ aarch64_print_hint_for_extensions (const std::string &str)
|
||||
|
||||
static bool
|
||||
aarch64_validate_mcpu (const char *str, const struct processor **res,
|
||||
- uint64_t *isa_flags)
|
||||
+ aarch64_feature_flags *isa_flags)
|
||||
{
|
||||
std::string invalid_extension;
|
||||
enum aarch64_parse_opt_result parse_res
|
||||
@@ -18351,7 +18352,7 @@ aarch64_validate_mbranch_protection (const char *const_str)
|
||||
|
||||
static bool
|
||||
aarch64_validate_march (const char *str, const struct processor **res,
|
||||
- uint64_t *isa_flags)
|
||||
+ aarch64_feature_flags *isa_flags)
|
||||
{
|
||||
std::string invalid_extension;
|
||||
enum aarch64_parse_opt_result parse_res
|
||||
@@ -18441,8 +18442,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
|
||||
static void
|
||||
aarch64_override_options (void)
|
||||
{
|
||||
- uint64_t cpu_isa = 0;
|
||||
- uint64_t arch_isa = 0;
|
||||
+ aarch64_feature_flags cpu_isa = 0;
|
||||
+ aarch64_feature_flags arch_isa = 0;
|
||||
aarch64_isa_flags = 0;
|
||||
|
||||
const struct processor *cpu = NULL;
|
||||
@@ -18890,7 +18891,7 @@ static bool
|
||||
aarch64_handle_attr_isa_flags (char *str)
|
||||
{
|
||||
enum aarch64_parse_opt_result parse_res;
|
||||
- uint64_t isa_flags = aarch64_isa_flags;
|
||||
+ auto isa_flags = aarch64_isa_flags;
|
||||
|
||||
/* We allow "+nothing" in the beginning to clear out all architectural
|
||||
features if the user wants to handpick specific features. */
|
||||
@@ -19162,7 +19163,7 @@ aarch64_process_target_attr (tree args)
|
||||
{
|
||||
/* Check if token is possibly an arch extension without
|
||||
leading '+'. */
|
||||
- uint64_t isa_temp = 0;
|
||||
+ aarch64_feature_flags isa_temp = 0;
|
||||
auto with_plus = std::string ("+") + token;
|
||||
enum aarch64_parse_opt_result ext_res
|
||||
= aarch64_parse_extension (with_plus.c_str (), &isa_temp, nullptr);
|
||||
@@ -22771,7 +22772,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
|
||||
const struct processor *this_arch
|
||||
= aarch64_get_arch (targ_options->x_selected_arch);
|
||||
|
||||
- uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
|
||||
+ auto isa_flags = targ_options->x_aarch64_isa_flags;
|
||||
std::string extension
|
||||
= aarch64_get_extension_string_for_isa_flags (isa_flags,
|
||||
this_arch->flags);
|
||||
@@ -22901,7 +22902,7 @@ aarch64_start_file (void)
|
||||
|
||||
const struct processor *default_arch
|
||||
= aarch64_get_arch (default_options->x_selected_arch);
|
||||
- uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
|
||||
+ auto default_isa_flags = default_options->x_aarch64_isa_flags;
|
||||
std::string extension
|
||||
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
|
||||
default_arch->flags);
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index 836a3c784..47ec7824f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -28,7 +28,7 @@ TargetVariable
|
||||
enum aarch64_arch selected_arch = aarch64_no_arch
|
||||
|
||||
TargetVariable
|
||||
-uint64_t aarch64_isa_flags = 0
|
||||
+aarch64_feature_flags aarch64_isa_flags = 0
|
||||
|
||||
TargetVariable
|
||||
unsigned aarch64_enable_bti = 2
|
||||
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
|
||||
index ee9cb65a5..2ae47c020 100644
|
||||
--- a/gcc/config/aarch64/driver-aarch64.cc
|
||||
+++ b/gcc/config/aarch64/driver-aarch64.cc
|
||||
@@ -31,7 +31,7 @@
|
||||
struct aarch64_arch_extension
|
||||
{
|
||||
const char *ext;
|
||||
- uint64_t flag;
|
||||
+ aarch64_feature_flags flag;
|
||||
const char *feat_string;
|
||||
};
|
||||
|
||||
@@ -50,7 +50,7 @@ struct aarch64_core_data
|
||||
unsigned char implementer_id; /* Exactly 8 bits */
|
||||
unsigned int part_no; /* 12 bits + 12 bits */
|
||||
unsigned variant;
|
||||
- uint64_t flags;
|
||||
+ aarch64_feature_flags flags;
|
||||
};
|
||||
|
||||
#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
|
||||
@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info
|
||||
{
|
||||
const char* id;
|
||||
const char* name;
|
||||
- uint64_t flags;
|
||||
+ aarch64_feature_flags flags;
|
||||
};
|
||||
|
||||
/* Skip the leading "V" in the architecture name. */
|
||||
@@ -261,8 +261,8 @@ host_detect_local_cpu (int argc, const char **argv)
|
||||
unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
|
||||
unsigned int n_variants = 0;
|
||||
bool processed_exts = false;
|
||||
- uint64_t extension_flags = 0;
|
||||
- uint64_t default_flags = 0;
|
||||
+ aarch64_feature_flags extension_flags = 0;
|
||||
+ aarch64_feature_flags default_flags = 0;
|
||||
std::string buf;
|
||||
size_t sep_pos = -1;
|
||||
char *fcpu_info;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,70 @@
|
||||
From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:56 +0100
|
||||
Subject: [PATCH 024/157] [Backport][SME] aarch64: Tweak contents of
|
||||
flags_on/off fields
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971
|
||||
|
||||
After previous changes, it's more convenient if the flags_on and
|
||||
flags_off fields of all_extensions include the feature flag itself.
|
||||
|
||||
gcc/
|
||||
* common/config/aarch64/aarch64-common.cc (all_extensions):
|
||||
Include the feature flag in flags_on and flags_off.
|
||||
(aarch64_parse_extension): Update accordingly.
|
||||
(aarch64_get_extension_string_for_isa_flags): Likewise.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++--------
|
||||
1 file changed, 6 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 3efa57b26..752ba5632 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -213,10 +213,8 @@ struct aarch64_option_extension
|
||||
static constexpr aarch64_option_extension all_extensions[] =
|
||||
{
|
||||
#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
|
||||
- {NAME, AARCH64_FL_##IDENT, \
|
||||
- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
|
||||
- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
|
||||
- & ~AARCH64_FL_##IDENT},
|
||||
+ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
|
||||
+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
|
||||
#include "config/aarch64/aarch64-option-extensions.def"
|
||||
{NULL, 0, 0, 0}
|
||||
};
|
||||
@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
|
||||
{
|
||||
/* Add or remove the extension. */
|
||||
if (adding_ext)
|
||||
- *isa_flags |= (opt->flags_on | opt->flag_canonical);
|
||||
+ *isa_flags |= opt->flags_on;
|
||||
else
|
||||
- *isa_flags &= ~(opt->flags_off | opt->flag_canonical);
|
||||
+ *isa_flags &= ~opt->flags_off;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags
|
||||
|
||||
if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
|
||||
{
|
||||
- current_flags |= opt.flag_canonical | opt.flags_on;
|
||||
+ current_flags |= opt.flags_on;
|
||||
added |= opt.flag_canonical;
|
||||
}
|
||||
}
|
||||
@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags
|
||||
for (auto &opt : all_extensions)
|
||||
if (opt.flag_canonical & current_flags & ~isa_flags)
|
||||
{
|
||||
- current_flags &= ~(opt.flag_canonical | opt.flags_off);
|
||||
+ current_flags &= ~opt.flags_off;
|
||||
outstr += "+no";
|
||||
outstr += opt.name;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
370
0117-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
Normal file
370
0117-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
Normal file
@ -0,0 +1,370 @@
|
||||
From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:57 +0100
|
||||
Subject: [PATCH 025/157] [Backport][SME] aarch64: Tweak handling of
|
||||
-mgeneral-regs-only
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0
|
||||
|
||||
-mgeneral-regs-only is effectively "+nofp for the compiler without
|
||||
changing the assembler's ISA flags". Currently that's implemented
|
||||
by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on
|
||||
!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP
|
||||
registers depend (directly or indirectly) on one of those three TARGET
|
||||
macros. The problem is that it's easy to forgot to do the last bit.
|
||||
|
||||
This patch instead represents the distinction between "assemnbler
|
||||
ISA flags" and "compiler ISA flags" more directly, funnelling
|
||||
all updates through a new function that sets both sets of flags
|
||||
together.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable.
|
||||
* config/aarch64/aarch64.h (aarch64_asm_isa_flags)
|
||||
(aarch64_isa_flags): Redefine as read-only macros.
|
||||
(TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on
|
||||
!TARGET_GENERAL_REGS_ONLY.
|
||||
* common/config/aarch64/aarch64-common.cc
|
||||
(aarch64_set_asm_isa_flags): New function.
|
||||
(aarch64_handle_option): Call it when updating -mgeneral-regs.
|
||||
* config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace
|
||||
m_old_isa_flags with m_old_asm_isa_flags.
|
||||
(aarch64_set_asm_isa_flags): Declare.
|
||||
* config/aarch64/aarch64-builtins.cc
|
||||
(aarch64_simd_switcher::aarch64_simd_switcher)
|
||||
(aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore
|
||||
aarch64_asm_isa_flags instead of aarch64_isa_flags.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(check_required_extensions): Use aarch64_asm_isa_flags instead
|
||||
of aarch64_isa_flags.
|
||||
* config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function.
|
||||
(aarch64_override_options, aarch64_handle_attr_arch)
|
||||
(aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use
|
||||
aarch64_set_asm_isa_flags to set the ISA flags.
|
||||
(aarch64_option_print, aarch64_declare_function_name)
|
||||
(aarch64_start_file): Use aarch64_asm_isa_flags instead
|
||||
of aarch64_isa_flags.
|
||||
(aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as
|
||||
aarch64_isa_flags.
|
||||
---
|
||||
gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++
|
||||
gcc/config/aarch64/aarch64-builtins.cc | 6 +--
|
||||
gcc/config/aarch64/aarch64-protos.h | 5 ++-
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++-------
|
||||
gcc/config/aarch64/aarch64.h | 17 ++++++--
|
||||
gcc/config/aarch64/aarch64.opt | 3 ++
|
||||
7 files changed, 68 insertions(+), 22 deletions(-)
|
||||
|
||||
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
index 752ba5632..c64b4987e 100644
|
||||
--- a/gcc/common/config/aarch64/aarch64-common.cc
|
||||
+++ b/gcc/common/config/aarch64/aarch64-common.cc
|
||||
@@ -137,6 +137,17 @@ reset_tsv110_option ()
|
||||
}
|
||||
}
|
||||
|
||||
+/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update
|
||||
+ OPTS->x_aarch64_isa_flags accordingly. */
|
||||
+void
|
||||
+aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
|
||||
+{
|
||||
+ opts->x_aarch64_asm_isa_flags = flags;
|
||||
+ opts->x_aarch64_isa_flags = flags;
|
||||
+ if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
|
||||
+ opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
|
||||
+}
|
||||
+
|
||||
/* Implement TARGET_HANDLE_OPTION.
|
||||
This function handles the target specific options for CPU/target selection.
|
||||
|
||||
@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts,
|
||||
|
||||
case OPT_mgeneral_regs_only:
|
||||
opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
|
||||
+ aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags);
|
||||
return true;
|
||||
|
||||
case OPT_mfix_cortex_a53_835769:
|
||||
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
|
||||
index 42276e7ca..015e9d975 100644
|
||||
--- a/gcc/config/aarch64/aarch64-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-builtins.cc
|
||||
@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
|
||||
/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
|
||||
set. */
|
||||
aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
|
||||
- : m_old_isa_flags (aarch64_isa_flags),
|
||||
+ : m_old_asm_isa_flags (aarch64_asm_isa_flags),
|
||||
m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
|
||||
{
|
||||
/* Changing the ISA flags should be enough here. We shouldn't need to
|
||||
pay the compile-time cost of a full target switch. */
|
||||
- aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
|
||||
global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
|
||||
+ aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
|
||||
}
|
||||
|
||||
aarch64_simd_switcher::~aarch64_simd_switcher ()
|
||||
{
|
||||
if (m_old_general_regs_only)
|
||||
global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
|
||||
- aarch64_isa_flags = m_old_isa_flags;
|
||||
+ aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
|
||||
}
|
||||
|
||||
/* Implement #pragma GCC aarch64 "arm_neon.h". */
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index ef84df731..86e444a60 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -747,7 +747,7 @@ public:
|
||||
~aarch64_simd_switcher ();
|
||||
|
||||
private:
|
||||
- unsigned long m_old_isa_flags;
|
||||
+ unsigned long m_old_asm_isa_flags;
|
||||
bool m_old_general_regs_only;
|
||||
};
|
||||
|
||||
@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
|
||||
machine_mode, bool,
|
||||
aarch64_addr_query_type = ADDR_QUERY_M);
|
||||
|
||||
+void aarch64_set_asm_isa_flags (aarch64_feature_flags);
|
||||
+
|
||||
/* Defined in common/config/aarch64-common.cc. */
|
||||
+void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
|
||||
bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
|
||||
const struct cl_decoded_option *, location_t);
|
||||
const char *aarch64_rewrite_selected_cpu (const char *name);
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index b927a886e..a70e3a6b4 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -696,7 +696,7 @@ static bool
|
||||
check_required_extensions (location_t location, tree fndecl,
|
||||
aarch64_feature_flags required_extensions)
|
||||
{
|
||||
- auto missing_extensions = required_extensions & ~aarch64_isa_flags;
|
||||
+ auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
|
||||
if (missing_extensions == 0)
|
||||
return check_required_registers (location, fndecl);
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 8cb820767..3e83e48ec 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
|
||||
return (int) value / 64;
|
||||
}
|
||||
|
||||
+/* Set the global aarch64_asm_isa_flags to FLAGS and update
|
||||
+ aarch64_isa_flags accordingly. */
|
||||
+
|
||||
+void
|
||||
+aarch64_set_asm_isa_flags (aarch64_feature_flags flags)
|
||||
+{
|
||||
+ aarch64_set_asm_isa_flags (&global_options, flags);
|
||||
+}
|
||||
+
|
||||
/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
|
||||
and is used to parse the -m{cpu,tune,arch} strings and setup the initial
|
||||
tuning structs. In particular it must set selected_tune and
|
||||
- aarch64_isa_flags that define the available ISA features and tuning
|
||||
+ aarch64_asm_isa_flags that define the available ISA features and tuning
|
||||
decisions. It must also set selected_arch as this will be used to
|
||||
output the .arch asm tags for each function. */
|
||||
|
||||
@@ -18444,7 +18453,7 @@ aarch64_override_options (void)
|
||||
{
|
||||
aarch64_feature_flags cpu_isa = 0;
|
||||
aarch64_feature_flags arch_isa = 0;
|
||||
- aarch64_isa_flags = 0;
|
||||
+ aarch64_set_asm_isa_flags (0);
|
||||
|
||||
const struct processor *cpu = NULL;
|
||||
const struct processor *arch = NULL;
|
||||
@@ -18484,25 +18493,25 @@ aarch64_override_options (void)
|
||||
}
|
||||
|
||||
selected_arch = arch->arch;
|
||||
- aarch64_isa_flags = arch_isa;
|
||||
+ aarch64_set_asm_isa_flags (arch_isa);
|
||||
}
|
||||
else if (cpu)
|
||||
{
|
||||
selected_arch = cpu->arch;
|
||||
- aarch64_isa_flags = cpu_isa;
|
||||
+ aarch64_set_asm_isa_flags (cpu_isa);
|
||||
}
|
||||
else if (arch)
|
||||
{
|
||||
cpu = &all_cores[arch->ident];
|
||||
selected_arch = arch->arch;
|
||||
- aarch64_isa_flags = arch_isa;
|
||||
+ aarch64_set_asm_isa_flags (arch_isa);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No -mcpu or -march specified, so use the default CPU. */
|
||||
cpu = &all_cores[TARGET_CPU_DEFAULT];
|
||||
selected_arch = cpu->arch;
|
||||
- aarch64_isa_flags = cpu->flags;
|
||||
+ aarch64_set_asm_isa_flags (cpu->flags);
|
||||
}
|
||||
|
||||
selected_tune = tune ? tune->ident : cpu->ident;
|
||||
@@ -18644,7 +18653,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
|
||||
= aarch64_get_tune_cpu (ptr->x_selected_tune);
|
||||
const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch);
|
||||
std::string extension
|
||||
- = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags,
|
||||
+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_asm_isa_flags,
|
||||
arch->flags);
|
||||
|
||||
fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
|
||||
@@ -18752,13 +18761,15 @@ aarch64_handle_attr_arch (const char *str)
|
||||
{
|
||||
const struct processor *tmp_arch = NULL;
|
||||
std::string invalid_extension;
|
||||
+ aarch64_feature_flags tmp_flags;
|
||||
enum aarch64_parse_opt_result parse_res
|
||||
- = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
|
||||
+ = aarch64_parse_arch (str, &tmp_arch, &tmp_flags, &invalid_extension);
|
||||
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
gcc_assert (tmp_arch);
|
||||
selected_arch = tmp_arch->arch;
|
||||
+ aarch64_set_asm_isa_flags (tmp_flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -18790,14 +18801,16 @@ aarch64_handle_attr_cpu (const char *str)
|
||||
{
|
||||
const struct processor *tmp_cpu = NULL;
|
||||
std::string invalid_extension;
|
||||
+ aarch64_feature_flags tmp_flags;
|
||||
enum aarch64_parse_opt_result parse_res
|
||||
- = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
|
||||
+ = aarch64_parse_cpu (str, &tmp_cpu, &tmp_flags, &invalid_extension);
|
||||
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
gcc_assert (tmp_cpu);
|
||||
selected_tune = tmp_cpu->ident;
|
||||
selected_arch = tmp_cpu->arch;
|
||||
+ aarch64_set_asm_isa_flags (tmp_flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -18891,7 +18904,7 @@ static bool
|
||||
aarch64_handle_attr_isa_flags (char *str)
|
||||
{
|
||||
enum aarch64_parse_opt_result parse_res;
|
||||
- auto isa_flags = aarch64_isa_flags;
|
||||
+ auto isa_flags = aarch64_asm_isa_flags;
|
||||
|
||||
/* We allow "+nothing" in the beginning to clear out all architectural
|
||||
features if the user wants to handpick specific features. */
|
||||
@@ -18906,7 +18919,7 @@ aarch64_handle_attr_isa_flags (char *str)
|
||||
|
||||
if (parse_res == AARCH64_PARSE_OK)
|
||||
{
|
||||
- aarch64_isa_flags = isa_flags;
|
||||
+ aarch64_set_asm_isa_flags (isa_flags);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -19328,8 +19341,12 @@ aarch64_can_inline_p (tree caller, tree callee)
|
||||
: target_option_default_node);
|
||||
|
||||
/* Callee's ISA flags should be a subset of the caller's. */
|
||||
+ if ((caller_opts->x_aarch64_asm_isa_flags
|
||||
+ & callee_opts->x_aarch64_asm_isa_flags)
|
||||
+ != callee_opts->x_aarch64_asm_isa_flags)
|
||||
+ return false;
|
||||
if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
|
||||
- != callee_opts->x_aarch64_isa_flags)
|
||||
+ != callee_opts->x_aarch64_isa_flags)
|
||||
return false;
|
||||
|
||||
/* Allow non-strict aligned functions inlining into strict
|
||||
@@ -22772,7 +22789,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
|
||||
const struct processor *this_arch
|
||||
= aarch64_get_arch (targ_options->x_selected_arch);
|
||||
|
||||
- auto isa_flags = targ_options->x_aarch64_isa_flags;
|
||||
+ auto isa_flags = targ_options->x_aarch64_asm_isa_flags;
|
||||
std::string extension
|
||||
= aarch64_get_extension_string_for_isa_flags (isa_flags,
|
||||
this_arch->flags);
|
||||
@@ -22902,7 +22919,7 @@ aarch64_start_file (void)
|
||||
|
||||
const struct processor *default_arch
|
||||
= aarch64_get_arch (default_options->x_selected_arch);
|
||||
- auto default_isa_flags = default_options->x_aarch64_isa_flags;
|
||||
+ auto default_isa_flags = default_options->x_aarch64_asm_isa_flags;
|
||||
std::string extension
|
||||
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
|
||||
default_arch->flags);
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 50a2ef444..521031efe 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -22,6 +22,17 @@
|
||||
#ifndef GCC_AARCH64_H
|
||||
#define GCC_AARCH64_H
|
||||
|
||||
+/* Make these flags read-only so that all uses go via
|
||||
+ aarch64_set_asm_isa_flags. */
|
||||
+#ifndef GENERATOR_FILE
|
||||
+#undef aarch64_asm_isa_flags
|
||||
+#define aarch64_asm_isa_flags \
|
||||
+ ((aarch64_feature_flags) global_options.x_aarch64_asm_isa_flags)
|
||||
+#undef aarch64_isa_flags
|
||||
+#define aarch64_isa_flags \
|
||||
+ ((aarch64_feature_flags) global_options.x_aarch64_isa_flags)
|
||||
+#endif
|
||||
+
|
||||
/* Target CPU builtins. */
|
||||
#define TARGET_CPU_CPP_BUILTINS() \
|
||||
aarch64_cpu_cpp_builtins (pfile)
|
||||
@@ -51,8 +62,8 @@
|
||||
|
||||
/* AdvSIMD is supported in the default configuration, unless disabled by
|
||||
-mgeneral-regs-only or by the +nosimd extension. */
|
||||
-#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD)
|
||||
-#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP)
|
||||
+#define TARGET_SIMD (AARCH64_ISA_SIMD)
|
||||
+#define TARGET_FLOAT (AARCH64_ISA_FP)
|
||||
|
||||
#define UNITS_PER_WORD 8
|
||||
|
||||
@@ -242,7 +253,7 @@ enum class aarch64_feature : unsigned char {
|
||||
#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD)
|
||||
|
||||
/* SVE instructions, enabled through +sve. */
|
||||
-#define TARGET_SVE (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SVE)
|
||||
+#define TARGET_SVE (AARCH64_ISA_SVE)
|
||||
|
||||
/* SVE2 instructions, enabled through +sve2. */
|
||||
#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2)
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index 47ec7824f..5f507abd4 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -27,6 +27,9 @@ enum aarch64_processor selected_tune = aarch64_none
|
||||
TargetVariable
|
||||
enum aarch64_arch selected_arch = aarch64_no_arch
|
||||
|
||||
+TargetVariable
|
||||
+aarch64_feature_flags aarch64_asm_isa_flags = 0
|
||||
+
|
||||
TargetVariable
|
||||
aarch64_feature_flags aarch64_isa_flags = 0
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
453
0118-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
Normal file
453
0118-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
Normal file
@ -0,0 +1,453 @@
|
||||
From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 29 Sep 2022 11:32:57 +0100
|
||||
Subject: [PATCH 026/157] [Backport][SME] aarch64: Remove redundant TARGET_*
|
||||
checks
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3
|
||||
|
||||
After previous patches, it's possible to remove TARGET_*
|
||||
options that are redundant due to (IMO) obvious dependencies.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4)
|
||||
(TARGET_DOTPROD): Don't depend on TARGET_SIMD.
|
||||
(TARGET_AES, TARGET_SHA2): Likewise. Remove TARGET_CRYPTO test.
|
||||
(TARGET_FP_F16INST): Don't depend on TARGET_FLOAT.
|
||||
(TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend
|
||||
on TARGET_SVE.
|
||||
(TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3)
|
||||
(TARGET_SVE2_SM4): Don't depend on TARGET_SVE2.
|
||||
(TARGET_F32MM, TARGET_F64MM): Delete.
|
||||
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard
|
||||
float macros with just TARGET_FLOAT rather than TARGET_FLOAT
|
||||
|| TARGET_SIMD.
|
||||
* config/aarch64/aarch64-simd.md (copysign<mode>3): Depend
|
||||
only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD.
|
||||
(aarch64_crypto_aes<aes_op>v16qi): Depend only on TARGET_AES,
|
||||
rather than TARGET_SIMD && TARGET_AES.
|
||||
(aarch64_crypto_aes<aesmc_op>v16qi): Likewise.
|
||||
(*aarch64_crypto_aese_fused): Likewise.
|
||||
(*aarch64_crypto_aesd_fused): Likewise.
|
||||
(aarch64_crypto_pmulldi): Likewise.
|
||||
(aarch64_crypto_pmullv2di): Likewise.
|
||||
(aarch64_crypto_sha1hsi): Likewise TARGET_SHA2.
|
||||
(aarch64_crypto_sha1hv4si): Likewise.
|
||||
(aarch64_be_crypto_sha1hv4si): Likewise.
|
||||
(aarch64_crypto_sha1su1v4si): Likewise.
|
||||
(aarch64_crypto_sha1<sha1_op>v4si): Likewise.
|
||||
(aarch64_crypto_sha1su0v4si): Likewise.
|
||||
(aarch64_crypto_sha256h<sha256_op>v4si): Likewise.
|
||||
(aarch64_crypto_sha256su0v4si): Likewise.
|
||||
(aarch64_crypto_sha256su1v4si): Likewise.
|
||||
(aarch64_crypto_sha512h<sha512_op>qv2di): Likewise TARGET_SHA3.
|
||||
(aarch64_crypto_sha512su0qv2di): Likewise.
|
||||
(aarch64_crypto_sha512su1qv2di, eor3q<mode>4): Likewise.
|
||||
(aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq<mode>4): Likewise.
|
||||
(aarch64_sm3ss1qv4si): Likewise TARGET_SM4.
|
||||
(aarch64_sm3tt<sm3tt_op>qv4si): Likewise.
|
||||
(aarch64_sm3partw<sm3part_op>qv4si): Likewise.
|
||||
(aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise.
|
||||
* config/aarch64/aarch64.md (<FLOATUORS:optab>dihf2)
|
||||
(copysign<GPF:mode>3, copysign<GPF:mode>3_insn)
|
||||
(xorsign<mode>3): Remove redundant TARGET_FLOAT condition.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-c.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++---------------
|
||||
gcc/config/aarch64/aarch64.h | 30 ++++++++--------
|
||||
gcc/config/aarch64/aarch64.md | 8 ++---
|
||||
4 files changed, 47 insertions(+), 49 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
||||
index 18c9b975b..2dfe2b8f8 100644
|
||||
--- a/gcc/config/aarch64/aarch64-c.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-c.cc
|
||||
@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
|
||||
aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile);
|
||||
|
||||
- if (TARGET_FLOAT || TARGET_SIMD)
|
||||
+ if (TARGET_FLOAT)
|
||||
{
|
||||
builtin_define_with_int_value ("__ARM_FP", 0x0E);
|
||||
builtin_define ("__ARM_FP16_FORMAT_IEEE");
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index de92802f5..a47b39281 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -693,7 +693,7 @@
|
||||
[(match_operand:VHSDF 0 "register_operand")
|
||||
(match_operand:VHSDF 1 "register_operand")
|
||||
(match_operand:VHSDF 2 "register_operand")]
|
||||
- "TARGET_FLOAT && TARGET_SIMD"
|
||||
+ "TARGET_SIMD"
|
||||
{
|
||||
rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
|
||||
@@ -8352,7 +8352,7 @@
|
||||
(match_operand:V16QI 1 "register_operand" "%0")
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
CRYPTO_AES))]
|
||||
- "TARGET_SIMD && TARGET_AES"
|
||||
+ "TARGET_AES"
|
||||
"aes<aes_op>\\t%0.16b, %2.16b"
|
||||
[(set_attr "type" "crypto_aese")]
|
||||
)
|
||||
@@ -8361,7 +8361,7 @@
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=w")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
|
||||
CRYPTO_AESMC))]
|
||||
- "TARGET_SIMD && TARGET_AES"
|
||||
+ "TARGET_AES"
|
||||
"aes<aesmc_op>\\t%0.16b, %1.16b"
|
||||
[(set_attr "type" "crypto_aesmc")]
|
||||
)
|
||||
@@ -8380,7 +8380,7 @@
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
UNSPEC_AESE)]
|
||||
UNSPEC_AESMC))]
|
||||
- "TARGET_SIMD && TARGET_AES
|
||||
+ "TARGET_AES
|
||||
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
|
||||
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
|
||||
[(set_attr "type" "crypto_aese")
|
||||
@@ -8401,7 +8401,7 @@
|
||||
(match_operand:V16QI 2 "register_operand" "w"))]
|
||||
UNSPEC_AESD)]
|
||||
UNSPEC_AESIMC))]
|
||||
- "TARGET_SIMD && TARGET_AES
|
||||
+ "TARGET_AES
|
||||
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
|
||||
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
|
||||
[(set_attr "type" "crypto_aese")
|
||||
@@ -8415,7 +8415,7 @@
|
||||
(unspec:SI [(match_operand:SI 1
|
||||
"register_operand" "w")]
|
||||
UNSPEC_SHA1H))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha1h\\t%s0, %s1"
|
||||
[(set_attr "type" "crypto_sha1_fast")]
|
||||
)
|
||||
@@ -8425,7 +8425,7 @@
|
||||
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
|
||||
(parallel [(const_int 0)]))]
|
||||
UNSPEC_SHA1H))]
|
||||
- "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
|
||||
+ "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
|
||||
"sha1h\\t%s0, %s1"
|
||||
[(set_attr "type" "crypto_sha1_fast")]
|
||||
)
|
||||
@@ -8435,7 +8435,7 @@
|
||||
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
|
||||
(parallel [(const_int 3)]))]
|
||||
UNSPEC_SHA1H))]
|
||||
- "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
|
||||
+ "TARGET_SHA2 && BYTES_BIG_ENDIAN"
|
||||
"sha1h\\t%s0, %s1"
|
||||
[(set_attr "type" "crypto_sha1_fast")]
|
||||
)
|
||||
@@ -8445,7 +8445,7 @@
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
||||
(match_operand:V4SI 2 "register_operand" "w")]
|
||||
UNSPEC_SHA1SU1))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha1su1\\t%0.4s, %2.4s"
|
||||
[(set_attr "type" "crypto_sha1_fast")]
|
||||
)
|
||||
@@ -8456,7 +8456,7 @@
|
||||
(match_operand:SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
CRYPTO_SHA1))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
|
||||
[(set_attr "type" "crypto_sha1_slow")]
|
||||
)
|
||||
@@ -8467,7 +8467,7 @@
|
||||
(match_operand:V4SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
UNSPEC_SHA1SU0))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha1su0\\t%0.4s, %2.4s, %3.4s"
|
||||
[(set_attr "type" "crypto_sha1_xor")]
|
||||
)
|
||||
@@ -8480,7 +8480,7 @@
|
||||
(match_operand:V4SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
CRYPTO_SHA256))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
|
||||
[(set_attr "type" "crypto_sha256_slow")]
|
||||
)
|
||||
@@ -8490,7 +8490,7 @@
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
||||
(match_operand:V4SI 2 "register_operand" "w")]
|
||||
UNSPEC_SHA256SU0))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha256su0\\t%0.4s, %2.4s"
|
||||
[(set_attr "type" "crypto_sha256_fast")]
|
||||
)
|
||||
@@ -8501,7 +8501,7 @@
|
||||
(match_operand:V4SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
UNSPEC_SHA256SU1))]
|
||||
- "TARGET_SIMD && TARGET_SHA2"
|
||||
+ "TARGET_SHA2"
|
||||
"sha256su1\\t%0.4s, %2.4s, %3.4s"
|
||||
[(set_attr "type" "crypto_sha256_slow")]
|
||||
)
|
||||
@@ -8514,7 +8514,7 @@
|
||||
(match_operand:V2DI 2 "register_operand" "w")
|
||||
(match_operand:V2DI 3 "register_operand" "w")]
|
||||
CRYPTO_SHA512))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"sha512h<sha512_op>\\t%q0, %q2, %3.2d"
|
||||
[(set_attr "type" "crypto_sha512")]
|
||||
)
|
||||
@@ -8524,7 +8524,7 @@
|
||||
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V2DI 2 "register_operand" "w")]
|
||||
UNSPEC_SHA512SU0))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"sha512su0\\t%0.2d, %2.2d"
|
||||
[(set_attr "type" "crypto_sha512")]
|
||||
)
|
||||
@@ -8535,7 +8535,7 @@
|
||||
(match_operand:V2DI 2 "register_operand" "w")
|
||||
(match_operand:V2DI 3 "register_operand" "w")]
|
||||
UNSPEC_SHA512SU1))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"sha512su1\\t%0.2d, %2.2d, %3.2d"
|
||||
[(set_attr "type" "crypto_sha512")]
|
||||
)
|
||||
@@ -8549,7 +8549,7 @@
|
||||
(match_operand:VQ_I 2 "register_operand" "w")
|
||||
(match_operand:VQ_I 3 "register_operand" "w"))
|
||||
(match_operand:VQ_I 1 "register_operand" "w")))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
|
||||
[(set_attr "type" "crypto_sha3")]
|
||||
)
|
||||
@@ -8561,7 +8561,7 @@
|
||||
(match_operand:V2DI 2 "register_operand" "w")
|
||||
(const_int 1))
|
||||
(match_operand:V2DI 1 "register_operand" "w")))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"rax1\\t%0.2d, %1.2d, %2.2d"
|
||||
[(set_attr "type" "crypto_sha3")]
|
||||
)
|
||||
@@ -8573,7 +8573,7 @@
|
||||
(match_operand:V2DI 1 "register_operand" "%w")
|
||||
(match_operand:V2DI 2 "register_operand" "w"))
|
||||
(match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"xar\\t%0.2d, %1.2d, %2.2d, %3"
|
||||
[(set_attr "type" "crypto_sha3")]
|
||||
)
|
||||
@@ -8585,7 +8585,7 @@
|
||||
(not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
|
||||
(match_operand:VQ_I 2 "register_operand" "w"))
|
||||
(match_operand:VQ_I 1 "register_operand" "w")))]
|
||||
- "TARGET_SIMD && TARGET_SHA3"
|
||||
+ "TARGET_SHA3"
|
||||
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
|
||||
[(set_attr "type" "crypto_sha3")]
|
||||
)
|
||||
@@ -8598,7 +8598,7 @@
|
||||
(match_operand:V4SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
UNSPEC_SM3SS1))]
|
||||
- "TARGET_SIMD && TARGET_SM4"
|
||||
+ "TARGET_SM4"
|
||||
"sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
|
||||
[(set_attr "type" "crypto_sm3")]
|
||||
)
|
||||
@@ -8611,7 +8611,7 @@
|
||||
(match_operand:V4SI 3 "register_operand" "w")
|
||||
(match_operand:SI 4 "aarch64_imm2" "Ui2")]
|
||||
CRYPTO_SM3TT))]
|
||||
- "TARGET_SIMD && TARGET_SM4"
|
||||
+ "TARGET_SM4"
|
||||
"sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
|
||||
[(set_attr "type" "crypto_sm3")]
|
||||
)
|
||||
@@ -8622,7 +8622,7 @@
|
||||
(match_operand:V4SI 2 "register_operand" "w")
|
||||
(match_operand:V4SI 3 "register_operand" "w")]
|
||||
CRYPTO_SM3PART))]
|
||||
- "TARGET_SIMD && TARGET_SM4"
|
||||
+ "TARGET_SM4"
|
||||
"sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
|
||||
[(set_attr "type" "crypto_sm3")]
|
||||
)
|
||||
@@ -8634,7 +8634,7 @@
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
|
||||
(match_operand:V4SI 2 "register_operand" "w")]
|
||||
UNSPEC_SM4E))]
|
||||
- "TARGET_SIMD && TARGET_SM4"
|
||||
+ "TARGET_SM4"
|
||||
"sm4e\\t%0.4s, %2.4s"
|
||||
[(set_attr "type" "crypto_sm4")]
|
||||
)
|
||||
@@ -8644,7 +8644,7 @@
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
|
||||
(match_operand:V4SI 2 "register_operand" "w")]
|
||||
UNSPEC_SM4EKEY))]
|
||||
- "TARGET_SIMD && TARGET_SM4"
|
||||
+ "TARGET_SM4"
|
||||
"sm4ekey\\t%0.4s, %1.4s, %2.4s"
|
||||
[(set_attr "type" "crypto_sm4")]
|
||||
)
|
||||
@@ -9230,7 +9230,7 @@
|
||||
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
|
||||
(match_operand:DI 2 "register_operand" "w")]
|
||||
UNSPEC_PMULL))]
|
||||
- "TARGET_SIMD && TARGET_AES"
|
||||
+ "TARGET_AES"
|
||||
"pmull\\t%0.1q, %1.1d, %2.1d"
|
||||
[(set_attr "type" "crypto_pmull")]
|
||||
)
|
||||
@@ -9240,7 +9240,7 @@
|
||||
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
|
||||
(match_operand:V2DI 2 "register_operand" "w")]
|
||||
UNSPEC_PMULL2))]
|
||||
- "TARGET_SIMD && TARGET_AES"
|
||||
+ "TARGET_AES"
|
||||
"pmull2\\t%0.1q, %1.2d, %2.2d"
|
||||
[(set_attr "type" "crypto_pmull")]
|
||||
)
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 521031efe..2a9d2d031 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -222,19 +222,19 @@ enum class aarch64_feature : unsigned char {
|
||||
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
|
||||
|
||||
/* Crypto is an optional extension to AdvSIMD. */
|
||||
-#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
|
||||
+#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO)
|
||||
|
||||
/* SHA2 is an optional extension to AdvSIMD. */
|
||||
-#define TARGET_SHA2 ((TARGET_SIMD && AARCH64_ISA_SHA2) || TARGET_CRYPTO)
|
||||
+#define TARGET_SHA2 (AARCH64_ISA_SHA2)
|
||||
|
||||
/* SHA3 is an optional extension to AdvSIMD. */
|
||||
-#define TARGET_SHA3 (TARGET_SIMD && AARCH64_ISA_SHA3)
|
||||
+#define TARGET_SHA3 (AARCH64_ISA_SHA3)
|
||||
|
||||
/* AES is an optional extension to AdvSIMD. */
|
||||
-#define TARGET_AES ((TARGET_SIMD && AARCH64_ISA_AES) || TARGET_CRYPTO)
|
||||
+#define TARGET_AES (AARCH64_ISA_AES)
|
||||
|
||||
/* SM is an optional extension to AdvSIMD. */
|
||||
-#define TARGET_SM4 (TARGET_SIMD && AARCH64_ISA_SM4)
|
||||
+#define TARGET_SM4 (AARCH64_ISA_SM4)
|
||||
|
||||
/* FP16FML is an optional extension to AdvSIMD. */
|
||||
#define TARGET_F16FML (TARGET_SIMD && AARCH64_ISA_F16FML && TARGET_FP_F16INST)
|
||||
@@ -246,29 +246,29 @@ enum class aarch64_feature : unsigned char {
|
||||
#define TARGET_LSE (AARCH64_ISA_LSE)
|
||||
|
||||
/* ARMv8.2-A FP16 support that can be enabled through the +fp16 extension. */
|
||||
-#define TARGET_FP_F16INST (TARGET_FLOAT && AARCH64_ISA_F16)
|
||||
+#define TARGET_FP_F16INST (AARCH64_ISA_F16)
|
||||
#define TARGET_SIMD_F16INST (TARGET_SIMD && AARCH64_ISA_F16)
|
||||
|
||||
/* Dot Product is an optional extension to AdvSIMD enabled through +dotprod. */
|
||||
-#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD)
|
||||
+#define TARGET_DOTPROD (AARCH64_ISA_DOTPROD)
|
||||
|
||||
/* SVE instructions, enabled through +sve. */
|
||||
#define TARGET_SVE (AARCH64_ISA_SVE)
|
||||
|
||||
/* SVE2 instructions, enabled through +sve2. */
|
||||
-#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2)
|
||||
+#define TARGET_SVE2 (AARCH64_ISA_SVE2)
|
||||
|
||||
/* SVE2 AES instructions, enabled through +sve2-aes. */
|
||||
-#define TARGET_SVE2_AES (TARGET_SVE2 && AARCH64_ISA_SVE2_AES)
|
||||
+#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES)
|
||||
|
||||
/* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */
|
||||
-#define TARGET_SVE2_BITPERM (TARGET_SVE2 && AARCH64_ISA_SVE2_BITPERM)
|
||||
+#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM)
|
||||
|
||||
/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */
|
||||
-#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3)
|
||||
+#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3)
|
||||
|
||||
/* SVE2 SM4 instructions, enabled through +sve2-sm4. */
|
||||
-#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
|
||||
+#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
|
||||
|
||||
/* ARMv8.3-A features. */
|
||||
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
|
||||
@@ -296,12 +296,10 @@ enum class aarch64_feature : unsigned char {
|
||||
#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM)
|
||||
|
||||
/* F32MM instructions are enabled through +f32mm. */
|
||||
-#define TARGET_F32MM (AARCH64_ISA_F32MM)
|
||||
-#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM)
|
||||
+#define TARGET_SVE_F32MM (AARCH64_ISA_F32MM)
|
||||
|
||||
/* F64MM instructions are enabled through +f64mm. */
|
||||
-#define TARGET_F64MM (AARCH64_ISA_F64MM)
|
||||
-#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM)
|
||||
+#define TARGET_SVE_F64MM (AARCH64_ISA_F64MM)
|
||||
|
||||
/* BF16 instructions are enabled through +bf16. */
|
||||
#define TARGET_BF16_FP (AARCH64_ISA_BF16)
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index c0c64a798..7ee26284d 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -6417,7 +6417,7 @@
|
||||
(define_expand "<optab>dihf2"
|
||||
[(set (match_operand:HF 0 "register_operand")
|
||||
(FLOATUORS:HF (match_operand:DI 1 "register_operand")))]
|
||||
- "TARGET_FLOAT && (TARGET_FP_F16INST || TARGET_SIMD)"
|
||||
+ "TARGET_FP_F16INST || TARGET_SIMD"
|
||||
{
|
||||
if (TARGET_FP_F16INST)
|
||||
emit_insn (gen_aarch64_fp16_<optab>dihf2 (operands[0], operands[1]));
|
||||
@@ -6676,7 +6676,7 @@
|
||||
[(match_operand:GPF 0 "register_operand")
|
||||
(match_operand:GPF 1 "register_operand")
|
||||
(match_operand:GPF 2 "register_operand")]
|
||||
- "TARGET_FLOAT && TARGET_SIMD"
|
||||
+ "TARGET_SIMD"
|
||||
{
|
||||
rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
|
||||
emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
|
||||
@@ -6693,7 +6693,7 @@
|
||||
(match_operand:GPF 2 "register_operand" "w,w,0,0")
|
||||
(match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X")]
|
||||
UNSPEC_COPYSIGN))]
|
||||
- "TARGET_FLOAT && TARGET_SIMD"
|
||||
+ "TARGET_SIMD"
|
||||
"@
|
||||
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
|
||||
bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
|
||||
@@ -6714,7 +6714,7 @@
|
||||
[(match_operand:GPF 0 "register_operand")
|
||||
(match_operand:GPF 1 "register_operand")
|
||||
(match_operand:GPF 2 "register_operand")]
|
||||
- "TARGET_FLOAT && TARGET_SIMD"
|
||||
+ "TARGET_SIMD"
|
||||
{
|
||||
|
||||
machine_mode imode = <V_INT_EQUIV>mode;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
132
0119-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
Normal file
132
0119-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
Normal file
@ -0,0 +1,132 @@
|
||||
From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 4 Oct 2022 16:39:18 +0100
|
||||
Subject: [PATCH 027/157] [Backport][SME] aarch64: Define __ARM_FEATURE_RCPC
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630
|
||||
|
||||
https://github.com/ARM-software/acle/pull/199 adds a new feature
|
||||
macro for RCPC, for use in things like inline assembly. This patch
|
||||
adds the associated support to GCC.
|
||||
|
||||
Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a
|
||||
entry didn't include it. This was probably harmless in practice
|
||||
since GCC simply ignored the extension until now. (The GAS
|
||||
definition is OK.)
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro.
|
||||
* config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC.
|
||||
* config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1)
|
||||
(neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores.
|
||||
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
|
||||
__ARM_FEATURE_RCPC when appropriate.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-arches.def | 2 +-
|
||||
gcc/config/aarch64/aarch64-c.cc | 1 +
|
||||
gcc/config/aarch64/aarch64-cores.def | 10 +++++-----
|
||||
gcc/config/aarch64/aarch64.h | 1 +
|
||||
.../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++
|
||||
5 files changed, 28 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
|
||||
index 9f8246618..5a9eff336 100644
|
||||
--- a/gcc/config/aarch64/aarch64-arches.def
|
||||
+++ b/gcc/config/aarch64/aarch64-arches.def
|
||||
@@ -33,7 +33,7 @@
|
||||
AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD))
|
||||
AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA))
|
||||
AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A))
|
||||
-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH))
|
||||
+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC))
|
||||
AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM))
|
||||
AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES))
|
||||
AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16))
|
||||
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
|
||||
index 2dfe2b8f8..4085ad840 100644
|
||||
--- a/gcc/config/aarch64/aarch64-c.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-c.cc
|
||||
@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
|
||||
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
|
||||
aarch64_def_or_undef (TARGET_LS64,
|
||||
"__ARM_FEATURE_LS64", pfile);
|
||||
+ aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
|
||||
|
||||
/* Not for ACLE, but required to keep "float.h" correct if we switch
|
||||
target between implementations that do or do not support ARMv8.2-A
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 60299160b..b50628d6b 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
/* Marvell cores (TX3). */
|
||||
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
|
||||
|
||||
/* ARMv8.4-A Architecture Processors. */
|
||||
|
||||
/* Arm ('A') cores. */
|
||||
-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
|
||||
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
|
||||
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
|
||||
|
||||
/* Qualcomm ('Q') cores. */
|
||||
-AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1)
|
||||
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO), saphira, 0x51, 0xC01, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 2a9d2d031..19b82b4f3 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char {
|
||||
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
|
||||
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
|
||||
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
|
||||
+#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC)
|
||||
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A)
|
||||
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
|
||||
#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
index bfb044f5d..307fa3d67 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
|
||||
@@ -248,6 +248,26 @@
|
||||
#error "__ARM_FEATURE_CRC32 is not defined but should be!"
|
||||
#endif
|
||||
|
||||
+#pragma GCC target ("arch=armv8.2-a")
|
||||
+#ifdef __ARM_FEATURE_RCPC
|
||||
+#error "__ARM_FEATURE_RCPC is defined but should not be!"
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target ("arch=armv8.2-a+rcpc")
|
||||
+#ifndef __ARM_FEATURE_RCPC
|
||||
+#error "__ARM_FEATURE_RCPC is not defined but should be!"
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target ("+norcpc")
|
||||
+#ifdef __ARM_FEATURE_RCPC
|
||||
+#error "__ARM_FEATURE_RCPC is defined but should not be!"
|
||||
+#endif
|
||||
+
|
||||
+#pragma GCC target ("arch=armv8.3-a")
|
||||
+#ifndef __ARM_FEATURE_RCPC
|
||||
+#error "__ARM_FEATURE_RCPC is not defined but should be!"
|
||||
+#endif
|
||||
+
|
||||
int
|
||||
foo (int a)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001
|
||||
From: xiezhiheng <xiezhiheng@huawei.com>
|
||||
Date: Tue, 20 Feb 2024 11:03:47 +0800
|
||||
Subject: [PATCH 028/157] [Backport][SME] Add Ampere-1 and Ampere-1A core
|
||||
definition in aarch64-cores.def
|
||||
|
||||
From commit db2f5d661239737157cf131de7d4df1c17d8d88d and
|
||||
590a06afbf0e96813b5879742f38f3665512c854
|
||||
---
|
||||
gcc/config/aarch64/aarch64-cores.def | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index b50628d6b..f069c81cf 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thu
|
||||
AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1)
|
||||
|
||||
/* Ampere Computing ('\xC0') cores. */
|
||||
-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1)
|
||||
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1)
|
||||
+AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1)
|
||||
/* Do not swap around "emag" and "xgene1",
|
||||
this order is required to handle variant correctly. */
|
||||
AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
968
0121-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
Normal file
968
0121-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
Normal file
@ -0,0 +1,968 @@
|
||||
From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Wed, 7 Sep 2022 10:52:04 +0100
|
||||
Subject: [PATCH 029/157] [Backport][SME] aarch64: Fix +nosimd handling of FPR
|
||||
moves
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6
|
||||
|
||||
8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other
|
||||
moves would handle FPR<-zero inefficiently. This is very much a
|
||||
niche case at the moment, but something like it becomes more
|
||||
important with SME streaming mode.
|
||||
|
||||
The si, di and vector tests already passed, they're just included for
|
||||
completeness.
|
||||
|
||||
We're a bit inconsistent about whether alternatives involving FPRs
|
||||
are marked with arch==fp or arch=* (i.e. default). E.g. FPR loads
|
||||
and stores are sometimes * and sometimes fp.
|
||||
|
||||
IMO * makes more sense. FPRs should not be used at all without
|
||||
TARGET_FLOAT, so TARGET_FLOAT represents the base architecture
|
||||
when FPRs are enabled. I think it's more useful if non-default
|
||||
arches represent a genuine restriction.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Extend
|
||||
w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves
|
||||
in that case. Extend w<-r to w<-Z.
|
||||
(*mov<HFBF:mode>_aarch64): Likewise, but with Y instead of Z.
|
||||
(*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not
|
||||
available.
|
||||
(define_split): Do not apply the floating-point immediate-to-register
|
||||
split to zeros, even if MOVI is not available.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/movqi_1.c: New test.
|
||||
* gcc.target/aarch64/movhi_1.c: Likewise.
|
||||
* gcc.target/aarch64/movsi_1.c: Likewise.
|
||||
* gcc.target/aarch64/movdi_2.c: Likewise.
|
||||
* gcc.target/aarch64/movti_2.c: Likewise.
|
||||
* gcc.target/aarch64/movhf_1.c: Likewise.
|
||||
* gcc.target/aarch64/movsf_1.c: Likewise.
|
||||
* gcc.target/aarch64/movdf_1.c: Likewise.
|
||||
* gcc.target/aarch64/movtf_2.c: Likewise.
|
||||
* gcc.target/aarch64/movv8qi_1.c: Likewise.
|
||||
* gcc.target/aarch64/movv16qi_1.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.md | 38 ++++----
|
||||
gcc/testsuite/gcc.target/aarch64/movdf_1.c | 53 ++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movdi_2.c | 61 +++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movhf_1.c | 53 ++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movhi_1.c | 61 +++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movqi_1.c | 61 +++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movsf_1.c | 53 ++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movsi_1.c | 61 +++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movtf_2.c | 81 +++++++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movti_2.c | 86 +++++++++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++
|
||||
gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 55 ++++++++++++
|
||||
12 files changed, 729 insertions(+), 16 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 7ee26284d..7267a74d6 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1201,7 +1201,7 @@
|
||||
|
||||
(define_insn "*mov<mode>_aarch64"
|
||||
[(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w")
|
||||
- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w"))]
|
||||
+ (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
|
||||
"(register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
|
||||
{
|
||||
@@ -1225,11 +1225,11 @@
|
||||
case 7:
|
||||
return "str\t%<size>1, %0";
|
||||
case 8:
|
||||
- return "umov\t%w0, %1.<v>[0]";
|
||||
+ return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
|
||||
case 9:
|
||||
- return "dup\t%0.<Vallxd>, %w1";
|
||||
+ return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
|
||||
case 10:
|
||||
- return "dup\t%<Vetype>0, %1.<v>[0]";
|
||||
+ return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -1237,7 +1237,7 @@
|
||||
;; The "mov_imm" type for CNT is just a placeholder.
|
||||
[(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
|
||||
store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
|
||||
- (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")]
|
||||
+ (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
|
||||
)
|
||||
|
||||
(define_expand "mov<mode>"
|
||||
@@ -1399,14 +1399,15 @@
|
||||
|
||||
(define_insn "*movti_aarch64"
|
||||
[(set (match_operand:TI 0
|
||||
- "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m")
|
||||
+ "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m")
|
||||
(match_operand:TI 1
|
||||
- "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
|
||||
+ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))]
|
||||
"(register_operand (operands[0], TImode)
|
||||
|| aarch64_reg_or_zero (operands[1], TImode))"
|
||||
"@
|
||||
#
|
||||
movi\\t%0.2d, #0
|
||||
+ fmov\t%d0, xzr
|
||||
#
|
||||
#
|
||||
mov\\t%0.16b, %1.16b
|
||||
@@ -1415,11 +1416,11 @@
|
||||
stp\\txzr, xzr, %0
|
||||
ldr\\t%q0, %1
|
||||
str\\t%q1, %0"
|
||||
- [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
|
||||
+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \
|
||||
load_16,store_16,store_16,\
|
||||
load_16,store_16")
|
||||
- (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
|
||||
- (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
|
||||
+ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4")
|
||||
+ (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")]
|
||||
)
|
||||
|
||||
;; Split a TImode register-register or register-immediate move into
|
||||
@@ -1458,16 +1459,19 @@
|
||||
)
|
||||
|
||||
(define_insn "*mov<mode>_aarch64"
|
||||
- [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r")
|
||||
- (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
|
||||
+ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r")
|
||||
+ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r"))]
|
||||
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
movi\\t%0.4h, #0
|
||||
fmov\\t%h0, %w1
|
||||
dup\\t%w0.4h, %w1
|
||||
+ fmov\\t%s0, %w1
|
||||
umov\\t%w0, %1.h[0]
|
||||
+ fmov\\t%w0, %s1
|
||||
mov\\t%0.h[0], %1.h[0]
|
||||
+ fmov\\t%s0, %s1
|
||||
fmov\\t%h0, %1
|
||||
* return aarch64_output_scalar_simd_mov_immediate (operands[1], HImode);
|
||||
ldr\\t%h0, %1
|
||||
@@ -1475,9 +1479,10 @@
|
||||
ldrh\\t%w0, %1
|
||||
strh\\t%w1, %0
|
||||
mov\\t%w0, %w1"
|
||||
- [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
|
||||
- neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
|
||||
- (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")]
|
||||
+ [(set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc,
|
||||
+ neon_move,fmov,fconsts,neon_move,f_loads,f_stores,
|
||||
+ load_4,store_4,mov_reg")
|
||||
+ (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
|
||||
)
|
||||
|
||||
(define_insn "*movsf_aarch64"
|
||||
@@ -1530,10 +1535,11 @@
|
||||
|
||||
(define_split
|
||||
[(set (match_operand:GPF_HF 0 "nonimmediate_operand")
|
||||
- (match_operand:GPF_HF 1 "general_operand"))]
|
||||
+ (match_operand:GPF_HF 1 "const_double_operand"))]
|
||||
"can_create_pseudo_p ()
|
||||
&& !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
|
||||
&& !aarch64_float_const_representable_p (operands[1])
|
||||
+ && !aarch64_float_const_zero_rtx_p (operands[1])
|
||||
&& aarch64_float_const_rtx_p (operands[1])"
|
||||
[(const_int 0)]
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
|
||||
new file mode 100644
|
||||
index 000000000..a51ded1d6
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov d0, d1
|
||||
+** ret
|
||||
+*/
|
||||
+double
|
||||
+fpr_to_fpr (double q0, double q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov d0, x0
|
||||
+** ret
|
||||
+*/
|
||||
+double
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register double x0 asm ("x0");
|
||||
+ asm volatile ("" : "=r" (x0));
|
||||
+ return x0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov d0, xzr
|
||||
+** ret
|
||||
+*/
|
||||
+double
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov x0, d0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (double q0)
|
||||
+{
|
||||
+ register double x0 asm ("x0");
|
||||
+ x0 = q0;
|
||||
+ asm volatile ("" :: "r" (x0));
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_2.c b/gcc/testsuite/gcc.target/aarch64/movdi_2.c
|
||||
new file mode 100644
|
||||
index 000000000..dd3fc3e8a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movdi_2.c
|
||||
@@ -0,0 +1,61 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov d0, d1
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_fpr (void)
|
||||
+{
|
||||
+ register uint64_t q0 asm ("q0");
|
||||
+ register uint64_t q1 asm ("q1");
|
||||
+ asm volatile ("" : "=w" (q1));
|
||||
+ q0 = q1;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov d0, x0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+gpr_to_fpr (uint64_t x0)
|
||||
+{
|
||||
+ register uint64_t q0 asm ("q0");
|
||||
+ q0 = x0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov d0, xzr
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ register uint64_t q0 asm ("q0");
|
||||
+ q0 = 0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov x0, d0
|
||||
+** ret
|
||||
+*/
|
||||
+uint64_t
|
||||
+fpr_to_gpr ()
|
||||
+{
|
||||
+ register uint64_t q0 asm ("q0");
|
||||
+ asm volatile ("" : "=w" (q0));
|
||||
+ return q0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_1.c b/gcc/testsuite/gcc.target/aarch64/movhf_1.c
|
||||
new file mode 100644
|
||||
index 000000000..cae25d4e5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movhf_1.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov s0, s1
|
||||
+** ret
|
||||
+*/
|
||||
+_Float16
|
||||
+fpr_to_fpr (_Float16 q0, _Float16 q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov s0, w0
|
||||
+** ret
|
||||
+*/
|
||||
+_Float16
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register _Float16 w0 asm ("w0");
|
||||
+ asm volatile ("" : "=r" (w0));
|
||||
+ return w0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+_Float16
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov w0, s0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (_Float16 q0)
|
||||
+{
|
||||
+ register _Float16 w0 asm ("w0");
|
||||
+ w0 = q0;
|
||||
+ asm volatile ("" :: "r" (w0));
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_1.c b/gcc/testsuite/gcc.target/aarch64/movhi_1.c
|
||||
new file mode 100644
|
||||
index 000000000..8017abc5f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movhi_1.c
|
||||
@@ -0,0 +1,61 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov s0, s1
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_fpr (void)
|
||||
+{
|
||||
+ register uint16_t q0 asm ("q0");
|
||||
+ register uint16_t q1 asm ("q1");
|
||||
+ asm volatile ("" : "=w" (q1));
|
||||
+ q0 = q1;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov s0, w0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+gpr_to_fpr (uint16_t w0)
|
||||
+{
|
||||
+ register uint16_t q0 asm ("q0");
|
||||
+ q0 = w0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ register uint16_t q0 asm ("q0");
|
||||
+ q0 = 0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov w0, s0
|
||||
+** ret
|
||||
+*/
|
||||
+uint16_t
|
||||
+fpr_to_gpr ()
|
||||
+{
|
||||
+ register uint16_t q0 asm ("q0");
|
||||
+ asm volatile ("" : "=w" (q0));
|
||||
+ return q0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_1.c b/gcc/testsuite/gcc.target/aarch64/movqi_1.c
|
||||
new file mode 100644
|
||||
index 000000000..401a79630
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movqi_1.c
|
||||
@@ -0,0 +1,61 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov s0, s1
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_fpr (void)
|
||||
+{
|
||||
+ register uint8_t q0 asm ("q0");
|
||||
+ register uint8_t q1 asm ("q1");
|
||||
+ asm volatile ("" : "=w" (q1));
|
||||
+ q0 = q1;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov s0, w0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+gpr_to_fpr (uint8_t w0)
|
||||
+{
|
||||
+ register uint8_t q0 asm ("q0");
|
||||
+ q0 = w0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ register uint8_t q0 asm ("q0");
|
||||
+ q0 = 0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov w0, s0
|
||||
+** ret
|
||||
+*/
|
||||
+uint8_t
|
||||
+fpr_to_gpr ()
|
||||
+{
|
||||
+ register uint8_t q0 asm ("q0");
|
||||
+ asm volatile ("" : "=w" (q0));
|
||||
+ return q0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_1.c b/gcc/testsuite/gcc.target/aarch64/movsf_1.c
|
||||
new file mode 100644
|
||||
index 000000000..09715aa4f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movsf_1.c
|
||||
@@ -0,0 +1,53 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov s0, s1
|
||||
+** ret
|
||||
+*/
|
||||
+float
|
||||
+fpr_to_fpr (float q0, float q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov s0, w0
|
||||
+** ret
|
||||
+*/
|
||||
+float
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register float w0 asm ("w0");
|
||||
+ asm volatile ("" : "=r" (w0));
|
||||
+ return w0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+float
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov w0, s0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (float q0)
|
||||
+{
|
||||
+ register float w0 asm ("w0");
|
||||
+ w0 = q0;
|
||||
+ asm volatile ("" :: "r" (w0));
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_1.c b/gcc/testsuite/gcc.target/aarch64/movsi_1.c
|
||||
new file mode 100644
|
||||
index 000000000..5314139aa
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movsi_1.c
|
||||
@@ -0,0 +1,61 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov s0, s1
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_fpr (void)
|
||||
+{
|
||||
+ register uint32_t q0 asm ("q0");
|
||||
+ register uint32_t q1 asm ("q1");
|
||||
+ asm volatile ("" : "=w" (q1));
|
||||
+ q0 = q1;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov s0, w0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+gpr_to_fpr (uint32_t w0)
|
||||
+{
|
||||
+ register uint32_t q0 asm ("q0");
|
||||
+ q0 = w0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ register uint32_t q0 asm ("q0");
|
||||
+ q0 = 0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov w0, s0
|
||||
+** ret
|
||||
+*/
|
||||
+uint32_t
|
||||
+fpr_to_gpr ()
|
||||
+{
|
||||
+ register uint32_t q0 asm ("q0");
|
||||
+ asm volatile ("" : "=w" (q0));
|
||||
+ return q0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_2.c b/gcc/testsuite/gcc.target/aarch64/movtf_2.c
|
||||
new file mode 100644
|
||||
index 000000000..38b16358d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_2.c
|
||||
@@ -0,0 +1,81 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-require-effective-target large_long_double } */
|
||||
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** sub sp, sp, #16
|
||||
+** str q1, \[sp\]
|
||||
+** ldr q0, \[sp\]
|
||||
+** add sp, sp, #?16
|
||||
+** ret
|
||||
+*/
|
||||
+long double
|
||||
+fpr_to_fpr (long double q0, long double q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_little_endian }
|
||||
+** fmov d0, x0
|
||||
+** fmov v0.d\[1\], x1
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_big_endian }
|
||||
+** fmov d0, x1
|
||||
+** fmov v0.d\[1\], x0
|
||||
+** ret
|
||||
+*/
|
||||
+long double
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register long double x0 asm ("x0");
|
||||
+ asm volatile ("" : "=r" (x0));
|
||||
+ return x0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov s0, wzr
|
||||
+** ret
|
||||
+*/
|
||||
+long double
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_little_endian }
|
||||
+** (
|
||||
+** fmov x0, d0
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** fmov x0, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_big_endian }
|
||||
+** (
|
||||
+** fmov x1, d0
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** fmov x1, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (long double q0)
|
||||
+{
|
||||
+ register long double x0 asm ("x0");
|
||||
+ x0 = q0;
|
||||
+ asm volatile ("" :: "r" (x0));
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_2.c b/gcc/testsuite/gcc.target/aarch64/movti_2.c
|
||||
new file mode 100644
|
||||
index 000000000..c393b1220
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movti_2.c
|
||||
@@ -0,0 +1,86 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** sub sp, sp, #16
|
||||
+** str q1, \[sp\]
|
||||
+** ldr q0, \[sp\]
|
||||
+** add sp, sp, #?16
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_fpr (void)
|
||||
+{
|
||||
+ register __int128_t q0 asm ("q0");
|
||||
+ register __int128_t q1 asm ("q1");
|
||||
+ asm volatile ("" : "=w" (q1));
|
||||
+ q0 = q1;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_little_endian }
|
||||
+** fmov d0, x0
|
||||
+** fmov v0.d\[1\], x1
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_big_endian }
|
||||
+** fmov d0, x1
|
||||
+** fmov v0.d\[1\], x0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+gpr_to_fpr (__int128_t x0)
|
||||
+{
|
||||
+ register __int128_t q0 asm ("q0");
|
||||
+ q0 = x0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov d0, xzr
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ register __int128_t q0 asm ("q0");
|
||||
+ q0 = 0;
|
||||
+ asm volatile ("" :: "w" (q0));
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_little_endian }
|
||||
+** (
|
||||
+** fmov x0, d0
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** fmov x0, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_big_endian }
|
||||
+** (
|
||||
+** fmov x1, d0
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** fmov x1, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+__int128_t
|
||||
+fpr_to_gpr ()
|
||||
+{
|
||||
+ register __int128_t q0 asm ("q0");
|
||||
+ asm volatile ("" : "=w" (q0));
|
||||
+ return q0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
|
||||
new file mode 100644
|
||||
index 000000000..8a6afb13b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
|
||||
@@ -0,0 +1,82 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+typedef unsigned char v16qi __attribute__((vector_size(16)));
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** sub sp, sp, #16
|
||||
+** str q1, \[sp\]
|
||||
+** ldr q0, \[sp\]
|
||||
+** add sp, sp, #?16
|
||||
+** ret
|
||||
+*/
|
||||
+v16qi
|
||||
+fpr_to_fpr (v16qi q0, v16qi q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_little_endian }
|
||||
+** fmov d0, x0
|
||||
+** fmov v0.d\[1\], x1
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** gpr_to_fpr: { target aarch64_big_endian }
|
||||
+** fmov d0, x1
|
||||
+** fmov v0.d\[1\], x0
|
||||
+** ret
|
||||
+*/
|
||||
+v16qi
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register v16qi x0 asm ("x0");
|
||||
+ asm volatile ("" : "=r" (x0));
|
||||
+ return x0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov d0, xzr
|
||||
+** ret
|
||||
+*/
|
||||
+v16qi
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return (v16qi) {};
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_little_endian }
|
||||
+** (
|
||||
+** fmov x0, d0
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x1, v0.d\[1\]
|
||||
+** fmov x0, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+/*
|
||||
+** fpr_to_gpr: { target aarch64_big_endian }
|
||||
+** (
|
||||
+** fmov x1, d0
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** |
|
||||
+** fmov x0, v0.d\[1\]
|
||||
+** fmov x1, d0
|
||||
+** )
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (v16qi q0)
|
||||
+{
|
||||
+ register v16qi x0 asm ("x0");
|
||||
+ x0 = q0;
|
||||
+ asm volatile ("" :: "r" (x0));
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
|
||||
new file mode 100644
|
||||
index 000000000..4c97e6fbc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
|
||||
@@ -0,0 +1,55 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+#pragma GCC target "+nothing+nosimd+fp"
|
||||
+
|
||||
+typedef unsigned char v8qi __attribute__((vector_size(8)));
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_fpr:
|
||||
+** fmov d0, d1
|
||||
+** ret
|
||||
+*/
|
||||
+v8qi
|
||||
+fpr_to_fpr (v8qi q0, v8qi q1)
|
||||
+{
|
||||
+ return q1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** gpr_to_fpr:
|
||||
+** fmov d0, x0
|
||||
+** ret
|
||||
+*/
|
||||
+v8qi
|
||||
+gpr_to_fpr ()
|
||||
+{
|
||||
+ register v8qi x0 asm ("x0");
|
||||
+ asm volatile ("" : "=r" (x0));
|
||||
+ return x0;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** zero_to_fpr:
|
||||
+** fmov d0, xzr
|
||||
+** ret
|
||||
+*/
|
||||
+v8qi
|
||||
+zero_to_fpr ()
|
||||
+{
|
||||
+ return (v8qi) {};
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** fpr_to_gpr:
|
||||
+** fmov x0, d0
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+fpr_to_gpr (v8qi q0)
|
||||
+{
|
||||
+ register v8qi x0 asm ("x0");
|
||||
+ x0 = q0;
|
||||
+ asm volatile ("" :: "r" (x0));
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
83
0122-Backport-SME-aarch64-Commonise-some-folding-code.patch
Normal file
83
0122-Backport-SME-aarch64-Commonise-some-folding-code.patch
Normal file
@ -0,0 +1,83 @@
|
||||
From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Thu, 20 Oct 2022 10:37:35 +0100
|
||||
Subject: [PATCH 030/157] [Backport][SME] aarch64: Commonise some folding code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c
|
||||
|
||||
Add an aarch64_sve::gimple_folder helper for folding calls
|
||||
to integer constants. SME will make more use of this.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h
|
||||
(gimple_folder::fold_to_cstu): New member function.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(gimple_folder::fold_to_cstu): Define.
|
||||
* config/aarch64/aarch64-sve-builtins-base.cc
|
||||
(svcnt_bhwd_impl::fold): Use it.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++-------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +++++++
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 1 +
|
||||
3 files changed, 10 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
index c24c05487..56c9d75e7 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
@@ -516,9 +516,7 @@ public:
|
||||
gimple *
|
||||
fold (gimple_folder &f) const OVERRIDE
|
||||
{
|
||||
- tree count = build_int_cstu (TREE_TYPE (f.lhs),
|
||||
- GET_MODE_NUNITS (m_ref_mode));
|
||||
- return gimple_build_assign (f.lhs, count);
|
||||
+ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
|
||||
}
|
||||
|
||||
rtx
|
||||
@@ -553,10 +551,7 @@ public:
|
||||
unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
|
||||
HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
|
||||
if (value >= 0)
|
||||
- {
|
||||
- tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
|
||||
- return gimple_build_assign (f.lhs, count);
|
||||
- }
|
||||
+ return f.fold_to_cstu (value);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index a70e3a6b4..e168c8334 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance)
|
||||
return call;
|
||||
}
|
||||
|
||||
+/* Fold the call to constant VAL. */
|
||||
+gimple *
|
||||
+gimple_folder::fold_to_cstu (poly_uint64 val)
|
||||
+{
|
||||
+ return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val));
|
||||
+}
|
||||
+
|
||||
/* Fold the call to a PTRUE, taking the element size from type suffix 0. */
|
||||
gimple *
|
||||
gimple_folder::fold_to_ptrue ()
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 63d1db776..0d130b871 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -500,6 +500,7 @@ public:
|
||||
tree load_store_cookie (tree);
|
||||
|
||||
gimple *redirect_call (const function_instance &);
|
||||
+ gimple *fold_to_cstu (poly_uint64);
|
||||
gimple *fold_to_pfalse ();
|
||||
gimple *fold_to_ptrue ();
|
||||
gimple *fold_to_vl_pred (unsigned int);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 7 Nov 2023 15:22:57 +0000
|
||||
Subject: [PATCH 031/157] [Backport][SME] aarch64: Add a %Z operand modifier
|
||||
for SVE registers
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a
|
||||
|
||||
This patch adds a %Z operand modifier that prints registers as SVE z
|
||||
registers. The SME patches need this, but so do Tamar's patches.
|
||||
I'm separating this out to unblock those.
|
||||
|
||||
We should probably document the [wxbhsdqZ] modifiers as
|
||||
user-facing, but doing that for all of them is a separate patch.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z
|
||||
modifier for SVE registers.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 6 ++++++
|
||||
1 file changed, 6 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 3e83e48ec..fd1114b52 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -11901,6 +11901,10 @@ sizetochar (int size)
|
||||
'N': Take the duplicated element in a vector constant
|
||||
and print the negative of it in decimal.
|
||||
'b/h/s/d/q': Print a scalar FP/SIMD register name.
|
||||
+ 'Z': Same for SVE registers. ('z' was already taken.)
|
||||
+ Note that it is not necessary to use %Z for operands
|
||||
+ that have SVE modes. The convention is to use %Z
|
||||
+ only for non-SVE (or potentially non-SVE) modes.
|
||||
'S/T/U/V': Print a FP/SIMD register name for a register list.
|
||||
The register printed is the FP/SIMD register name
|
||||
of X + 0/1/2/3 for S/T/U/V.
|
||||
@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
|
||||
case 's':
|
||||
case 'd':
|
||||
case 'q':
|
||||
+ case 'Z':
|
||||
+ code = TOLOWER (code);
|
||||
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
|
||||
{
|
||||
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
104
0124-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
Normal file
104
0124-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
Normal file
@ -0,0 +1,104 @@
|
||||
From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sun, 5 Nov 2023 18:28:46 +0000
|
||||
Subject: [PATCH 032/157] [Backport][SME] mode-switching: Remove unused bbnum
|
||||
field
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133
|
||||
|
||||
seginfo had an unused bbnum field, presumably dating from before
|
||||
BB information was attached directly to insns.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc: Remove unused forward references.
|
||||
(seginfo): Remove bbnum.
|
||||
(new_seginfo): Remove associated argument.
|
||||
(optimize_mode_switching): Update calls accordingly.
|
||||
---
|
||||
gcc/mode-switching.cc | 18 +++++-------------
|
||||
1 file changed, 5 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 6e3f1dc65..4cf8f03a0 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3. If not see
|
||||
MODE is the mode this insn must be executed in.
|
||||
INSN_PTR is the insn to be executed (may be the note that marks the
|
||||
beginning of a basic block).
|
||||
- BBNUM is the flow graph basic block this insn occurs in.
|
||||
NEXT is the next insn in the same basic block. */
|
||||
struct seginfo
|
||||
{
|
||||
int mode;
|
||||
rtx_insn *insn_ptr;
|
||||
- int bbnum;
|
||||
struct seginfo *next;
|
||||
HARD_REG_SET regs_live;
|
||||
};
|
||||
@@ -84,11 +82,6 @@ struct bb_info
|
||||
int mode_in;
|
||||
};
|
||||
|
||||
-static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET);
|
||||
-static void add_seginfo (struct bb_info *, struct seginfo *);
|
||||
-static void reg_dies (rtx, HARD_REG_SET *);
|
||||
-static void reg_becomes_live (rtx, const_rtx, void *);
|
||||
-
|
||||
/* Clear ode I from entity J in bitmap B. */
|
||||
#define clear_mode_bit(b, j, i) \
|
||||
bitmap_clear_bit (b, (j * max_num_modes) + i)
|
||||
@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
|
||||
}
|
||||
|
||||
/* Allocate a new BBINFO structure, initialized with the MODE, INSN,
|
||||
- and basic block BB parameters.
|
||||
+ and REGS_LIVE parameters.
|
||||
INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
|
||||
basic block; that allows us later to insert instructions in a FIFO-like
|
||||
manner. */
|
||||
|
||||
static struct seginfo *
|
||||
-new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
|
||||
+new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live)
|
||||
{
|
||||
struct seginfo *ptr;
|
||||
|
||||
@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
|
||||
ptr = XNEW (struct seginfo);
|
||||
ptr->mode = mode;
|
||||
ptr->insn_ptr = insn;
|
||||
- ptr->bbnum = bb;
|
||||
ptr->next = NULL;
|
||||
ptr->regs_live = regs_live;
|
||||
return ptr;
|
||||
@@ -604,7 +596,7 @@ optimize_mode_switching (void)
|
||||
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
|
||||
if (ins_pos != BB_END (bb))
|
||||
ins_pos = NEXT_INSN (ins_pos);
|
||||
- ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now);
|
||||
+ ptr = new_seginfo (no_mode, ins_pos, live_now);
|
||||
add_seginfo (info + bb->index, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
@@ -622,7 +614,7 @@ optimize_mode_switching (void)
|
||||
{
|
||||
any_set_required = true;
|
||||
last_mode = mode;
|
||||
- ptr = new_seginfo (mode, insn, bb->index, live_now);
|
||||
+ ptr = new_seginfo (mode, insn, live_now);
|
||||
add_seginfo (info + bb->index, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
@@ -651,7 +643,7 @@ optimize_mode_switching (void)
|
||||
mark the block as nontransparent. */
|
||||
if (!any_set_required)
|
||||
{
|
||||
- ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
|
||||
+ ptr = new_seginfo (no_mode, BB_END (bb), live_now);
|
||||
add_seginfo (info + bb->index, ptr);
|
||||
if (last_mode != no_mode)
|
||||
for (i = 0; i < no_mode; i++)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
311
0125-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
Normal file
311
0125-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
Normal file
@ -0,0 +1,311 @@
|
||||
From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:54 +0000
|
||||
Subject: [PATCH 033/157] [Backport][SME] mode-switching: Tweak the macro/hook
|
||||
documentation
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d
|
||||
|
||||
I found the documentation for the mode-switching macros/hooks
|
||||
a bit hard to follow at first. This patch tries to add the
|
||||
information that I think would have made it easier to understand.
|
||||
|
||||
Of course, documentation preferences are personal, and so I could
|
||||
be changing something that others understood to something that
|
||||
seems impenetrable.
|
||||
|
||||
Some notes on specific changes:
|
||||
|
||||
- "in an optimizing compilation" didn't seem accurate; the pass
|
||||
is run even at -O0, and often needs to be for correctness.
|
||||
|
||||
- "at run time" meant when the compiler was run, rather than when
|
||||
the compiled code was run.
|
||||
|
||||
- Removing the list of optional macros isn't a clarification,
|
||||
but it means that upcoming patches don't create an absurdly
|
||||
long list.
|
||||
|
||||
- I don't really understand the purpose of TARGET_MODE_PRIORITY,
|
||||
so I mostly left that alone.
|
||||
|
||||
gcc/
|
||||
* target.def: Tweak documentation of mode-switching hooks.
|
||||
* doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation.
|
||||
(NUM_MODES_FOR_MODE_SWITCHING): Likewise.
|
||||
* doc/tm.texi: Regenerate.
|
||||
---
|
||||
gcc/doc/tm.texi | 69 ++++++++++++++++++++++++++++------------------
|
||||
gcc/doc/tm.texi.in | 26 +++++++++--------
|
||||
gcc/target.def | 43 ++++++++++++++++++-----------
|
||||
3 files changed, 84 insertions(+), 54 deletions(-)
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 851d31c18..553aa4cf2 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations:
|
||||
|
||||
@defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
|
||||
Define this macro if the port needs extra instructions inserted for mode
|
||||
-switching in an optimizing compilation.
|
||||
+switching.
|
||||
|
||||
For an example, the SH4 can perform both single and double precision
|
||||
floating point operations, but to perform a single precision operation,
|
||||
@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to
|
||||
be inserted before reload, i.e.@: you cannot put this into instruction emitting
|
||||
or @code{TARGET_MACHINE_DEPENDENT_REORG}.
|
||||
|
||||
-You can have multiple entities that are mode-switched, and select at run time
|
||||
-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should
|
||||
-return nonzero for any @var{entity} that needs mode-switching.
|
||||
+You can have multiple entities that are mode-switched, some of which might
|
||||
+only be needed conditionally. The entities are identified by their index
|
||||
+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
|
||||
+of the initializer determining the number of entities.
|
||||
+
|
||||
+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
|
||||
+that needs mode-switching.
|
||||
+
|
||||
If you define this macro, you also have to define
|
||||
@code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED},
|
||||
@code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}.
|
||||
-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT}
|
||||
-are optional.
|
||||
+The other macros in this section are optional.
|
||||
@end defmac
|
||||
|
||||
@defmac NUM_MODES_FOR_MODE_SWITCHING
|
||||
If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as
|
||||
initializer for an array of integers. Each initializer element
|
||||
N refers to an entity that needs mode switching, and specifies the number
|
||||
-of different modes that might need to be set for this entity.
|
||||
-The position of the initializer in the initializer---starting counting at
|
||||
+of different modes that are defined for that entity.
|
||||
+The position of the element in the initializer---starting counting at
|
||||
zero---determines the integer that is used to refer to the mode-switched
|
||||
entity in question.
|
||||
-In macros that take mode arguments / yield a mode result, modes are
|
||||
-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode
|
||||
-switch is needed / supplied.
|
||||
+Modes are represented as numbers 0 @dots{} N @minus{} 1.
|
||||
+In mode arguments and return values, N either represents an unknown
|
||||
+mode or ``no mode'', depending on context.
|
||||
@end defmac
|
||||
|
||||
@deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live})
|
||||
Generate one or more insns to set @var{entity} to @var{mode}.
|
||||
@var{hard_reg_live} is the set of hard registers live at the point where
|
||||
the insn(s) are to be inserted. @var{prev_moxde} indicates the mode
|
||||
-to switch from. Sets of a lower numbered entity will be emitted before
|
||||
+to switch from, or is the number of modes if the previous mode is not
|
||||
+known. Sets of a lower numbered entity will be emitted before
|
||||
sets of a higher numbered entity to a mode of the same or lower priority.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
|
||||
@var{entity} is an integer specifying a mode-switched entity.
|
||||
-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro
|
||||
-to return an integer value not larger than the corresponding element
|
||||
-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}
|
||||
-must be switched into prior to the execution of @var{insn}.
|
||||
+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
|
||||
+to return the mode that @var{entity} must be switched into prior to the
|
||||
+execution of @var{insn}, or the number of modes if @var{insn} has no
|
||||
+such requirement.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
|
||||
@var{entity} is an integer specifying a mode-switched entity.
|
||||
-If this macro is defined, it is evaluated for every @var{insn} during mode
|
||||
-switching. It determines the mode that an insn results
|
||||
-in (if different from the incoming mode).
|
||||
+If this hook is defined, it is evaluated for every @var{insn} during mode
|
||||
+switching. It returns the mode that @var{entity} is in after @var{insn}
|
||||
+has been executed. @var{mode} is the mode that @var{entity} was in
|
||||
+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
|
||||
+
|
||||
+@var{mode} is equal to the number of modes defined for @var{entity}
|
||||
+if the mode before @var{insn} is unknown. The hook should likewise return
|
||||
+the number of modes if it does not know what mode @var{entity} has after
|
||||
+@var{insn}.
|
||||
+
|
||||
+Not defining the hook is equivalent to returning @var{mode}.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
|
||||
-If this macro is defined, it is evaluated for every @var{entity} that
|
||||
-needs mode switching. It should evaluate to an integer, which is a mode
|
||||
-that @var{entity} is assumed to be switched to at function entry.
|
||||
+If this hook is defined, it is evaluated for every @var{entity} that
|
||||
+needs mode switching. It should return the mode that @var{entity} is
|
||||
+guaranteed to be in on entry to the function, or the number of modes
|
||||
+if there is no such guarantee.
|
||||
If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}
|
||||
must be defined.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity})
|
||||
-If this macro is defined, it is evaluated for every @var{entity} that
|
||||
-needs mode switching. It should evaluate to an integer, which is a mode
|
||||
-that @var{entity} is assumed to be switched to at function exit.
|
||||
+If this hook is defined, it is evaluated for every @var{entity} that
|
||||
+needs mode switching. It should return the mode that @var{entity} must
|
||||
+be in on return from the function, or the number of modes if there is no
|
||||
+such requirement.
|
||||
If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
|
||||
must be defined.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
|
||||
-This macro specifies the order in which modes for @var{entity}
|
||||
+This hook specifies the order in which modes for @var{entity}
|
||||
are processed. 0 is the highest priority,
|
||||
@code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest.
|
||||
-The value of the macro should be an integer designating a mode
|
||||
+The hook returns an integer designating a mode
|
||||
for @var{entity}. For any fixed @var{entity}, @code{mode_priority}
|
||||
(@var{entity}, @var{n}) shall be a bijection in 0 @dots{}
|
||||
@code{num_modes_for_mode_switching[@var{entity}] - 1}.
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index ac95cdf7a..9ec11b15c 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations:
|
||||
|
||||
@defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
|
||||
Define this macro if the port needs extra instructions inserted for mode
|
||||
-switching in an optimizing compilation.
|
||||
+switching.
|
||||
|
||||
For an example, the SH4 can perform both single and double precision
|
||||
floating point operations, but to perform a single precision operation,
|
||||
@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to
|
||||
be inserted before reload, i.e.@: you cannot put this into instruction emitting
|
||||
or @code{TARGET_MACHINE_DEPENDENT_REORG}.
|
||||
|
||||
-You can have multiple entities that are mode-switched, and select at run time
|
||||
-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should
|
||||
-return nonzero for any @var{entity} that needs mode-switching.
|
||||
+You can have multiple entities that are mode-switched, some of which might
|
||||
+only be needed conditionally. The entities are identified by their index
|
||||
+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
|
||||
+of the initializer determining the number of entities.
|
||||
+
|
||||
+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
|
||||
+that needs mode-switching.
|
||||
+
|
||||
If you define this macro, you also have to define
|
||||
@code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED},
|
||||
@code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}.
|
||||
-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT}
|
||||
-are optional.
|
||||
+The other macros in this section are optional.
|
||||
@end defmac
|
||||
|
||||
@defmac NUM_MODES_FOR_MODE_SWITCHING
|
||||
If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as
|
||||
initializer for an array of integers. Each initializer element
|
||||
N refers to an entity that needs mode switching, and specifies the number
|
||||
-of different modes that might need to be set for this entity.
|
||||
-The position of the initializer in the initializer---starting counting at
|
||||
+of different modes that are defined for that entity.
|
||||
+The position of the element in the initializer---starting counting at
|
||||
zero---determines the integer that is used to refer to the mode-switched
|
||||
entity in question.
|
||||
-In macros that take mode arguments / yield a mode result, modes are
|
||||
-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode
|
||||
-switch is needed / supplied.
|
||||
+Modes are represented as numbers 0 @dots{} N @minus{} 1.
|
||||
+In mode arguments and return values, N either represents an unknown
|
||||
+mode or ``no mode'', depending on context.
|
||||
@end defmac
|
||||
|
||||
@hook TARGET_MODE_EMIT
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index c9bb2b4c2..b87b0f927 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -6992,51 +6992,62 @@ DEFHOOK
|
||||
"Generate one or more insns to set @var{entity} to @var{mode}.\n\
|
||||
@var{hard_reg_live} is the set of hard registers live at the point where\n\
|
||||
the insn(s) are to be inserted. @var{prev_moxde} indicates the mode\n\
|
||||
-to switch from. Sets of a lower numbered entity will be emitted before\n\
|
||||
+to switch from, or is the number of modes if the previous mode is not\n\
|
||||
+known. Sets of a lower numbered entity will be emitted before\n\
|
||||
sets of a higher numbered entity to a mode of the same or lower priority.",
|
||||
void, (int entity, int mode, int prev_mode, HARD_REG_SET regs_live), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(needed,
|
||||
"@var{entity} is an integer specifying a mode-switched entity.\n\
|
||||
-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro\n\
|
||||
-to return an integer value not larger than the corresponding element\n\
|
||||
-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}\n\
|
||||
-must be switched into prior to the execution of @var{insn}.",
|
||||
+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\
|
||||
+to return the mode that @var{entity} must be switched into prior to the\n\
|
||||
+execution of @var{insn}, or the number of modes if @var{insn} has no\n\
|
||||
+such requirement.",
|
||||
int, (int entity, rtx_insn *insn), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(after,
|
||||
"@var{entity} is an integer specifying a mode-switched entity.\n\
|
||||
-If this macro is defined, it is evaluated for every @var{insn} during mode\n\
|
||||
-switching. It determines the mode that an insn results\n\
|
||||
-in (if different from the incoming mode).",
|
||||
+If this hook is defined, it is evaluated for every @var{insn} during mode\n\
|
||||
+switching. It returns the mode that @var{entity} is in after @var{insn}\n\
|
||||
+has been executed. @var{mode} is the mode that @var{entity} was in\n\
|
||||
+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\
|
||||
+\n\
|
||||
+@var{mode} is equal to the number of modes defined for @var{entity}\n\
|
||||
+if the mode before @var{insn} is unknown. The hook should likewise return\n\
|
||||
+the number of modes if it does not know what mode @var{entity} has after\n\
|
||||
+@var{insn}.\n\
|
||||
+\n\
|
||||
+Not defining the hook is equivalent to returning @var{mode}.",
|
||||
int, (int entity, int mode, rtx_insn *insn), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(entry,
|
||||
- "If this macro is defined, it is evaluated for every @var{entity} that\n\
|
||||
-needs mode switching. It should evaluate to an integer, which is a mode\n\
|
||||
-that @var{entity} is assumed to be switched to at function entry.\n\
|
||||
+ "If this hook is defined, it is evaluated for every @var{entity} that\n\
|
||||
+needs mode switching. It should return the mode that @var{entity} is\n\
|
||||
+guaranteed to be in on entry to the function, or the number of modes\n\
|
||||
+if there is no such guarantee.\n\
|
||||
If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}\n\
|
||||
must be defined.",
|
||||
int, (int entity), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(exit,
|
||||
- "If this macro is defined, it is evaluated for every @var{entity} that\n\
|
||||
-needs mode switching. It should evaluate to an integer, which is a mode\n\
|
||||
-that @var{entity} is assumed to be switched to at function exit.\n\
|
||||
+ "If this hook is defined, it is evaluated for every @var{entity} that\n\
|
||||
+needs mode switching. It should return the mode that @var{entity} must\n\
|
||||
+be in on return from the function, or the number of modes if there is no\n\
|
||||
+such requirement.\n\
|
||||
If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\
|
||||
must be defined.",
|
||||
int, (int entity), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(priority,
|
||||
- "This macro specifies the order in which modes for @var{entity}\n\
|
||||
+ "This hook specifies the order in which modes for @var{entity}\n\
|
||||
are processed. 0 is the highest priority,\n\
|
||||
@code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest.\n\
|
||||
-The value of the macro should be an integer designating a mode\n\
|
||||
+The hook returns an integer designating a mode\n\
|
||||
for @var{entity}. For any fixed @var{entity}, @code{mode_priority}\n\
|
||||
(@var{entity}, @var{n}) shall be a bijection in 0 @dots{}\n\
|
||||
@code{num_modes_for_mode_switching[@var{entity}] - 1}.",
|
||||
--
|
||||
2.33.0
|
||||
|
||||
35
0126-Backport-SME-mode-switching-Add-note-problem.patch
Normal file
35
0126-Backport-SME-mode-switching-Add-note-problem.patch
Normal file
@ -0,0 +1,35 @@
|
||||
From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:55 +0000
|
||||
Subject: [PATCH 034/157] [Backport][SME] mode-switching: Add note problem
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b
|
||||
|
||||
optimize_mode_switching uses REG_DEAD notes to track register
|
||||
liveness, but it failed to tell DF to calculate up-to-date notes.
|
||||
|
||||
Noticed by inspection. I don't have a testcase that fails
|
||||
because of this.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (optimize_mode_switching): Call
|
||||
df_note_add_problem.
|
||||
---
|
||||
gcc/mode-switching.cc | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 4cf8f03a0..2a9f98793 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -540,6 +540,7 @@ optimize_mode_switching (void)
|
||||
pre_exit = create_pre_exit (n_entities, entity_map, num_modes);
|
||||
}
|
||||
|
||||
+ df_note_add_problem ();
|
||||
df_analyze ();
|
||||
|
||||
/* Create the bitmap vectors. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,90 @@
|
||||
From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:55 +0000
|
||||
Subject: [PATCH 035/157] [Backport][SME] mode-switching: Avoid quadractic list
|
||||
operation
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4
|
||||
|
||||
add_seginfo chained insn information to the end of a list
|
||||
by starting at the head of the list. This patch avoids the
|
||||
quadraticness by keeping track of the tail pointer.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (add_seginfo): Replace head pointer with
|
||||
a pointer to the tail pointer.
|
||||
(optimize_mode_switching): Update calls accordingly.
|
||||
---
|
||||
gcc/mode-switching.cc | 24 ++++++++----------------
|
||||
1 file changed, 8 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 2a9f98793..6a13951c9 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live)
|
||||
}
|
||||
|
||||
/* Add a seginfo element to the end of a list.
|
||||
- HEAD is a pointer to the list beginning.
|
||||
+ TAIL is a pointer to the list's null terminator.
|
||||
INFO is the structure to be linked in. */
|
||||
|
||||
static void
|
||||
-add_seginfo (struct bb_info *head, struct seginfo *info)
|
||||
+add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info)
|
||||
{
|
||||
- struct seginfo *ptr;
|
||||
-
|
||||
- if (head->seginfo == NULL)
|
||||
- head->seginfo = info;
|
||||
- else
|
||||
- {
|
||||
- ptr = head->seginfo;
|
||||
- while (ptr->next != NULL)
|
||||
- ptr = ptr->next;
|
||||
- ptr->next = info;
|
||||
- }
|
||||
+ **tail_ptr = info;
|
||||
+ *tail_ptr = &info->next;
|
||||
}
|
||||
|
||||
/* Record in LIVE that register REG died. */
|
||||
@@ -573,6 +564,7 @@ optimize_mode_switching (void)
|
||||
Also compute the initial transparency settings. */
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
{
|
||||
+ struct seginfo **tail_ptr = &info[bb->index].seginfo;
|
||||
struct seginfo *ptr;
|
||||
int last_mode = no_mode;
|
||||
bool any_set_required = false;
|
||||
@@ -598,7 +590,7 @@ optimize_mode_switching (void)
|
||||
if (ins_pos != BB_END (bb))
|
||||
ins_pos = NEXT_INSN (ins_pos);
|
||||
ptr = new_seginfo (no_mode, ins_pos, live_now);
|
||||
- add_seginfo (info + bb->index, ptr);
|
||||
+ add_seginfo (&tail_ptr, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
}
|
||||
@@ -616,7 +608,7 @@ optimize_mode_switching (void)
|
||||
any_set_required = true;
|
||||
last_mode = mode;
|
||||
ptr = new_seginfo (mode, insn, live_now);
|
||||
- add_seginfo (info + bb->index, ptr);
|
||||
+ add_seginfo (&tail_ptr, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
}
|
||||
@@ -645,7 +637,7 @@ optimize_mode_switching (void)
|
||||
if (!any_set_required)
|
||||
{
|
||||
ptr = new_seginfo (no_mode, BB_END (bb), live_now);
|
||||
- add_seginfo (info + bb->index, ptr);
|
||||
+ add_seginfo (&tail_ptr, ptr);
|
||||
if (last_mode != no_mode)
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
136
0128-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
Normal file
136
0128-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
Normal file
@ -0,0 +1,136 @@
|
||||
From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:56 +0000
|
||||
Subject: [PATCH 036/157] [Backport][SME] mode-switching: Fix the mode passed
|
||||
to the emit hook
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436
|
||||
|
||||
optimize_mode_switching passes an entity's current mode (if known)
|
||||
to the emit hook. However, the mode that it passed ignored the
|
||||
effect of the after hook. Instead, the mode for the first emit
|
||||
call in a block was taken from the incoming mode, whereas the
|
||||
mode for each subsequent emit call was taken from the result
|
||||
of the previous call.
|
||||
|
||||
The previous pass through the insns already calculated the
|
||||
correct mode, so this patch records it in the seginfo structure.
|
||||
(There was a 32-bit hole on 64-bit hosts, so this doesn't increase
|
||||
the size of the structure for them.)
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (seginfo): Add a prev_mode field.
|
||||
(new_seginfo): Take and initialize the prev_mode.
|
||||
(optimize_mode_switching): Update calls accordingly.
|
||||
Use the recorded modes during the emit phase, rather than
|
||||
computing one on the fly.
|
||||
---
|
||||
gcc/mode-switching.cc | 30 +++++++++++++++++-------------
|
||||
1 file changed, 17 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 6a13951c9..584cd4f67 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
NEXT is the next insn in the same basic block. */
|
||||
struct seginfo
|
||||
{
|
||||
+ int prev_mode;
|
||||
int mode;
|
||||
rtx_insn *insn_ptr;
|
||||
struct seginfo *next;
|
||||
@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
|
||||
return need_commit;
|
||||
}
|
||||
|
||||
-/* Allocate a new BBINFO structure, initialized with the MODE, INSN,
|
||||
- and REGS_LIVE parameters.
|
||||
+/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE,
|
||||
+ INSN, and REGS_LIVE parameters.
|
||||
INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
|
||||
basic block; that allows us later to insert instructions in a FIFO-like
|
||||
manner. */
|
||||
|
||||
static struct seginfo *
|
||||
-new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET ®s_live)
|
||||
+new_seginfo (int prev_mode, int mode, rtx_insn *insn,
|
||||
+ const HARD_REG_SET ®s_live)
|
||||
{
|
||||
struct seginfo *ptr;
|
||||
|
||||
gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn)
|
||||
|| insn == BB_END (NOTE_BASIC_BLOCK (insn)));
|
||||
ptr = XNEW (struct seginfo);
|
||||
+ ptr->prev_mode = prev_mode;
|
||||
ptr->mode = mode;
|
||||
ptr->insn_ptr = insn;
|
||||
ptr->next = NULL;
|
||||
@@ -589,7 +592,7 @@ optimize_mode_switching (void)
|
||||
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
|
||||
if (ins_pos != BB_END (bb))
|
||||
ins_pos = NEXT_INSN (ins_pos);
|
||||
- ptr = new_seginfo (no_mode, ins_pos, live_now);
|
||||
+ ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
@@ -605,12 +608,12 @@ optimize_mode_switching (void)
|
||||
|
||||
if (mode != no_mode && mode != last_mode)
|
||||
{
|
||||
- any_set_required = true;
|
||||
- last_mode = mode;
|
||||
- ptr = new_seginfo (mode, insn, live_now);
|
||||
+ ptr = new_seginfo (last_mode, mode, insn, live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
+ any_set_required = true;
|
||||
+ last_mode = mode;
|
||||
}
|
||||
|
||||
if (targetm.mode_switching.after)
|
||||
@@ -636,7 +639,7 @@ optimize_mode_switching (void)
|
||||
mark the block as nontransparent. */
|
||||
if (!any_set_required)
|
||||
{
|
||||
- ptr = new_seginfo (no_mode, BB_END (bb), live_now);
|
||||
+ ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
if (last_mode != no_mode)
|
||||
for (i = 0; i < no_mode; i++)
|
||||
@@ -777,9 +780,9 @@ optimize_mode_switching (void)
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
{
|
||||
struct seginfo *ptr, *next;
|
||||
- int cur_mode = bb_info[j][bb->index].mode_in;
|
||||
+ struct seginfo *first = bb_info[j][bb->index].seginfo;
|
||||
|
||||
- for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
|
||||
+ for (ptr = first; ptr; ptr = next)
|
||||
{
|
||||
next = ptr->next;
|
||||
if (ptr->mode != no_mode)
|
||||
@@ -789,14 +792,15 @@ optimize_mode_switching (void)
|
||||
rtl_profile_for_bb (bb);
|
||||
start_sequence ();
|
||||
|
||||
+ int cur_mode = (ptr == first && ptr->prev_mode == no_mode
|
||||
+ ? bb_info[j][bb->index].mode_in
|
||||
+ : ptr->prev_mode);
|
||||
+
|
||||
targetm.mode_switching.emit (entity_map[j], ptr->mode,
|
||||
cur_mode, ptr->regs_live);
|
||||
mode_set = get_insns ();
|
||||
end_sequence ();
|
||||
|
||||
- /* modes kill each other inside a basic block. */
|
||||
- cur_mode = ptr->mode;
|
||||
-
|
||||
/* Insert MODE_SET only if it is nonempty. */
|
||||
if (mode_set != NULL_RTX)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
103
0129-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
Normal file
103
0129-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:56 +0000
|
||||
Subject: [PATCH 037/157] [Backport][SME] mode-switching: Simplify recording of
|
||||
transparency
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02
|
||||
|
||||
For a given block, an entity is either transparent for
|
||||
all modes or for none. Each update to the transparency set
|
||||
therefore used a loop like:
|
||||
|
||||
for (i = 0; i < no_mode; i++)
|
||||
clear_mode_bit (transp[bb->index], j, i);
|
||||
|
||||
This patch instead starts out with a bit-per-block bitmap
|
||||
and updates the main bitmap at the end.
|
||||
|
||||
This isn't much of a simplification on its own. The main
|
||||
purpose is to simplify later patches.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (optimize_mode_switching): Initially
|
||||
compute transparency in a bit-per-block bitmap.
|
||||
---
|
||||
gcc/mode-switching.cc | 19 +++++++++++--------
|
||||
1 file changed, 11 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 584cd4f67..4d2b9e284 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -555,6 +555,8 @@ optimize_mode_switching (void)
|
||||
bitmap_vector_clear (antic, last_basic_block_for_fn (cfun));
|
||||
bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
|
||||
|
||||
+ auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
|
||||
+
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int e = entity_map[j];
|
||||
@@ -562,6 +564,8 @@ optimize_mode_switching (void)
|
||||
struct bb_info *info = bb_info[j];
|
||||
rtx_insn *insn;
|
||||
|
||||
+ bitmap_ones (transp_all);
|
||||
+
|
||||
/* Determine what the first use (if any) need for a mode of entity E is.
|
||||
This will be the mode that is anticipatable for this block.
|
||||
Also compute the initial transparency settings. */
|
||||
@@ -594,8 +598,7 @@ optimize_mode_switching (void)
|
||||
ins_pos = NEXT_INSN (ins_pos);
|
||||
ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- clear_mode_bit (transp[bb->index], j, i);
|
||||
+ bitmap_clear_bit (transp_all, bb->index);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -610,8 +613,7 @@ optimize_mode_switching (void)
|
||||
{
|
||||
ptr = new_seginfo (last_mode, mode, insn, live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- clear_mode_bit (transp[bb->index], j, i);
|
||||
+ bitmap_clear_bit (transp_all, bb->index);
|
||||
any_set_required = true;
|
||||
last_mode = mode;
|
||||
}
|
||||
@@ -642,8 +644,7 @@ optimize_mode_switching (void)
|
||||
ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
if (last_mode != no_mode)
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- clear_mode_bit (transp[bb->index], j, i);
|
||||
+ bitmap_clear_bit (transp_all, bb->index);
|
||||
}
|
||||
}
|
||||
if (targetm.mode_switching.entry && targetm.mode_switching.exit)
|
||||
@@ -666,8 +667,7 @@ optimize_mode_switching (void)
|
||||
an extra check in make_preds_opaque. We also
|
||||
need this to avoid confusing pre_edge_lcm when
|
||||
antic is cleared but transp and comp are set. */
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- clear_mode_bit (transp[bb->index], j, i);
|
||||
+ bitmap_clear_bit (transp_all, bb->index);
|
||||
|
||||
/* Insert a fake computing definition of MODE into entry
|
||||
blocks which compute no mode. This represents the mode on
|
||||
@@ -687,6 +687,9 @@ optimize_mode_switching (void)
|
||||
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
{
|
||||
+ if (!bitmap_bit_p (transp_all, bb->index))
|
||||
+ clear_mode_bit (transp[bb->index], j, m);
|
||||
+
|
||||
if (info[bb->index].seginfo->mode == m)
|
||||
set_mode_bit (antic[bb->index], j, m);
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,92 @@
|
||||
From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:57 +0000
|
||||
Subject: [PATCH 038/157] [Backport][SME] mode-switching: Tweak entry/exit
|
||||
handling
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e
|
||||
|
||||
An entity isn't transparent in a block that requires a specific mode.
|
||||
optimize_mode_switching took that into account for normal insns,
|
||||
but didn't for the exit block. Later patches misbehaved because
|
||||
of this.
|
||||
|
||||
In contrast, an entity was correctly marked as non-transparent
|
||||
in the entry block, but the reasoning seemed a bit convoluted.
|
||||
It also referred to a function that no longer exists.
|
||||
Since KILL = ~TRANSP, the entity is by definition not transparent
|
||||
in a block that defines the entity, so I think we can make it so
|
||||
without comment.
|
||||
|
||||
Finally, the exit handling was nested in the entry handling,
|
||||
but that doesn't seem necessary. A target could say that an
|
||||
entity is undefined on entry but must be defined on return,
|
||||
on a "be liberal in what you accept, be conservative in what
|
||||
you do" principle.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (optimize_mode_switching): Mark the exit
|
||||
block as nontransparent if it requires a specific mode.
|
||||
Handle the entry and exit mode as sibling rather than nested
|
||||
concepts. Remove outdated comment.
|
||||
---
|
||||
gcc/mode-switching.cc | 34 +++++++++++++++-------------------
|
||||
1 file changed, 15 insertions(+), 19 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 4d2b9e284..4761c2ff0 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -649,34 +649,30 @@ optimize_mode_switching (void)
|
||||
}
|
||||
if (targetm.mode_switching.entry && targetm.mode_switching.exit)
|
||||
{
|
||||
- int mode = targetm.mode_switching.entry (e);
|
||||
-
|
||||
info[post_entry->index].mode_out =
|
||||
info[post_entry->index].mode_in = no_mode;
|
||||
- if (pre_exit)
|
||||
- {
|
||||
- info[pre_exit->index].mode_out =
|
||||
- info[pre_exit->index].mode_in = no_mode;
|
||||
- }
|
||||
|
||||
+ int mode = targetm.mode_switching.entry (e);
|
||||
if (mode != no_mode)
|
||||
{
|
||||
- bb = post_entry;
|
||||
-
|
||||
- /* By always making this nontransparent, we save
|
||||
- an extra check in make_preds_opaque. We also
|
||||
- need this to avoid confusing pre_edge_lcm when
|
||||
- antic is cleared but transp and comp are set. */
|
||||
- bitmap_clear_bit (transp_all, bb->index);
|
||||
-
|
||||
/* Insert a fake computing definition of MODE into entry
|
||||
blocks which compute no mode. This represents the mode on
|
||||
entry. */
|
||||
- info[bb->index].computing = mode;
|
||||
+ info[post_entry->index].computing = mode;
|
||||
+ bitmap_clear_bit (transp_all, post_entry->index);
|
||||
+ }
|
||||
|
||||
- if (pre_exit)
|
||||
- info[pre_exit->index].seginfo->mode =
|
||||
- targetm.mode_switching.exit (e);
|
||||
+ if (pre_exit)
|
||||
+ {
|
||||
+ info[pre_exit->index].mode_out =
|
||||
+ info[pre_exit->index].mode_in = no_mode;
|
||||
+
|
||||
+ int mode = targetm.mode_switching.exit (e);
|
||||
+ if (mode != no_mode)
|
||||
+ {
|
||||
+ info[pre_exit->index].seginfo->mode = mode;
|
||||
+ bitmap_clear_bit (transp_all, pre_exit->index);
|
||||
+ }
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,93 @@
|
||||
From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:57 +0000
|
||||
Subject: [PATCH 039/157] [Backport][SME] mode-switching: Allow targets to set
|
||||
the mode for EH handlers
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa
|
||||
|
||||
The mode-switching pass already had hooks to say what mode
|
||||
an entity is in on entry to a function and what mode it must
|
||||
be in on return. For SME, we also want to say what mode an
|
||||
entity is guaranteed to be in on entry to an exception handler.
|
||||
|
||||
gcc/
|
||||
* target.def (mode_switching.eh_handler): New hook.
|
||||
* doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* mode-switching.cc (optimize_mode_switching): Use eh_handler
|
||||
to get the mode on entry to an exception handler.
|
||||
---
|
||||
gcc/doc/tm.texi | 6 ++++++
|
||||
gcc/doc/tm.texi.in | 2 ++
|
||||
gcc/mode-switching.cc | 5 ++++-
|
||||
gcc/target.def | 7 +++++++
|
||||
4 files changed, 19 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 553aa4cf2..4788b3f7a 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
|
||||
must be defined.
|
||||
@end deftypefn
|
||||
|
||||
+@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity})
|
||||
+If this hook is defined, it should return the mode that @var{entity} is
|
||||
+guaranteed to be in on entry to an exception handler, or the number of modes
|
||||
+if there is no such guarantee.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
|
||||
This hook specifies the order in which modes for @var{entity}
|
||||
are processed. 0 is the highest priority,
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index 9ec11b15c..ad343504f 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context.
|
||||
|
||||
@hook TARGET_MODE_EXIT
|
||||
|
||||
+@hook TARGET_MODE_EH_HANDLER
|
||||
+
|
||||
@hook TARGET_MODE_PRIORITY
|
||||
|
||||
@node Target Attributes
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 4761c2ff0..9a6ba6cca 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -596,7 +596,10 @@ optimize_mode_switching (void)
|
||||
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
|
||||
if (ins_pos != BB_END (bb))
|
||||
ins_pos = NEXT_INSN (ins_pos);
|
||||
- ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
|
||||
+ if (bb_has_eh_pred (bb)
|
||||
+ && targetm.mode_switching.eh_handler)
|
||||
+ last_mode = targetm.mode_switching.eh_handler (e);
|
||||
+ ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now);
|
||||
add_seginfo (&tail_ptr, ptr);
|
||||
bitmap_clear_bit (transp_all, bb->index);
|
||||
}
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index b87b0f927..bbb482de6 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\
|
||||
must be defined.",
|
||||
int, (int entity), NULL)
|
||||
|
||||
+DEFHOOK
|
||||
+(eh_handler,
|
||||
+ "If this hook is defined, it should return the mode that @var{entity} is\n\
|
||||
+guaranteed to be in on entry to an exception handler, or the number of modes\n\
|
||||
+if there is no such guarantee.",
|
||||
+ int, (int entity), NULL)
|
||||
+
|
||||
DEFHOOK
|
||||
(priority,
|
||||
"This hook specifies the order in which modes for @var{entity}\n\
|
||||
--
|
||||
2.33.0
|
||||
|
||||
211
0132-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
Normal file
211
0132-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
Normal file
@ -0,0 +1,211 @@
|
||||
From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:58 +0000
|
||||
Subject: [PATCH 040/157] [Backport][SME] mode-switching: Pass set of live
|
||||
registers to the needed hook
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218
|
||||
|
||||
The emit hook already takes the set of live hard registers as input.
|
||||
This patch passes it to the needed hook too. SME uses this to
|
||||
optimise the mode choice based on whether state is live or dead.
|
||||
|
||||
The main caller already had access to the required info, but the
|
||||
special handling of return values did not.
|
||||
|
||||
gcc/
|
||||
* target.def (mode_switching.needed): Add a regs_live parameter.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update
|
||||
accordingly.
|
||||
* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
|
||||
* config/epiphany/mode-switch-use.cc (insert_uses): Likewise.
|
||||
* config/i386/i386.cc (ix86_mode_needed): Likewise.
|
||||
* config/riscv/riscv.cc (riscv_mode_needed): Likewise.
|
||||
* config/sh/sh.cc (sh_mode_needed): Likewise.
|
||||
* mode-switching.cc (optimize_mode_switching): Likewise.
|
||||
(create_pre_exit): Likewise, using the DF simulate functions
|
||||
to calculate the required information.
|
||||
---
|
||||
gcc/config/epiphany/epiphany-protos.h | 4 +++-
|
||||
gcc/config/epiphany/epiphany.cc | 2 +-
|
||||
gcc/config/epiphany/mode-switch-use.cc | 2 +-
|
||||
gcc/config/i386/i386.cc | 2 +-
|
||||
gcc/config/sh/sh.cc | 4 ++--
|
||||
gcc/doc/tm.texi | 5 +++--
|
||||
gcc/mode-switching.cc | 14 ++++++++++++--
|
||||
gcc/target.def | 5 +++--
|
||||
8 files changed, 26 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
|
||||
index 61b63234e..d463e5483 100644
|
||||
--- a/gcc/config/epiphany/epiphany-protos.h
|
||||
+++ b/gcc/config/epiphany/epiphany-protos.h
|
||||
@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode,
|
||||
#endif
|
||||
extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
|
||||
extern void epiphany_expand_set_fp_mode (rtx *operands);
|
||||
-extern int epiphany_mode_needed (int entity, rtx_insn *insn);
|
||||
+#ifdef HARD_CONST
|
||||
+extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
|
||||
+#endif
|
||||
extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
|
||||
extern bool epiphany_epilogue_uses (int regno);
|
||||
extern bool epiphany_optimize_mode_switching (int entity);
|
||||
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
|
||||
index f8c049340..be0fbc68c 100644
|
||||
--- a/gcc/config/epiphany/epiphany.cc
|
||||
+++ b/gcc/config/epiphany/epiphany.cc
|
||||
@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority)
|
||||
}
|
||||
|
||||
int
|
||||
-epiphany_mode_needed (int entity, rtx_insn *insn)
|
||||
+epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
|
||||
{
|
||||
enum attr_fp_mode mode;
|
||||
|
||||
diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc
|
||||
index 887550a33..cacb1ce5a 100644
|
||||
--- a/gcc/config/epiphany/mode-switch-use.cc
|
||||
+++ b/gcc/config/epiphany/mode-switch-use.cc
|
||||
@@ -58,7 +58,7 @@ insert_uses (void)
|
||||
{
|
||||
if (!INSN_P (insn))
|
||||
continue;
|
||||
- mode = epiphany_mode_needed (e, insn);
|
||||
+ mode = epiphany_mode_needed (e, insn, {});
|
||||
if (mode == no_mode)
|
||||
continue;
|
||||
if (target_insert_mode_switch_use)
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index 60f3296b0..4d591d217 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
|
||||
prior to the execution of insn. */
|
||||
|
||||
static int
|
||||
-ix86_mode_needed (int entity, rtx_insn *insn)
|
||||
+ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
|
||||
{
|
||||
switch (entity)
|
||||
{
|
||||
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
|
||||
index 03e1c04ec..85e83e12e 100644
|
||||
--- a/gcc/config/sh/sh.cc
|
||||
+++ b/gcc/config/sh/sh.cc
|
||||
@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *);
|
||||
static HOST_WIDE_INT rounded_frame_size (int);
|
||||
static bool sh_frame_pointer_required (void);
|
||||
static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
|
||||
-static int sh_mode_needed (int, rtx_insn *);
|
||||
+static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
|
||||
static int sh_mode_after (int, int, rtx_insn *);
|
||||
static int sh_mode_entry (int);
|
||||
static int sh_mode_exit (int);
|
||||
@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
|
||||
}
|
||||
|
||||
static int
|
||||
-sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
|
||||
+sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
|
||||
{
|
||||
return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
|
||||
}
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 4788b3f7a..d8ac6c4d6 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10280,12 +10280,13 @@ known. Sets of a lower numbered entity will be emitted before
|
||||
sets of a higher numbered entity to a mode of the same or lower priority.
|
||||
@end deftypefn
|
||||
|
||||
-@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
|
||||
+@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
|
||||
@var{entity} is an integer specifying a mode-switched entity.
|
||||
If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
|
||||
to return the mode that @var{entity} must be switched into prior to the
|
||||
execution of @var{insn}, or the number of modes if @var{insn} has no
|
||||
-such requirement.
|
||||
+such requirement. @var{regs_live} contains the set of hard registers
|
||||
+that are live before @var{insn}.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 9a6ba6cca..6bbda5058 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
|
||||
&& GET_CODE (PATTERN (last_insn)) == USE
|
||||
&& GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG)
|
||||
{
|
||||
+ auto_bitmap live;
|
||||
+ df_simulate_initialize_backwards (src_bb, live);
|
||||
+
|
||||
int ret_start = REGNO (ret_reg);
|
||||
int nregs = REG_NREGS (ret_reg);
|
||||
int ret_end = ret_start + nregs;
|
||||
@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
|
||||
bool forced_late_switch = false;
|
||||
rtx_insn *before_return_copy;
|
||||
|
||||
+ df_simulate_one_insn_backwards (src_bb, last_insn, live);
|
||||
+
|
||||
do
|
||||
{
|
||||
rtx_insn *return_copy = PREV_INSN (last_insn);
|
||||
@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
|
||||
int copy_start, copy_num;
|
||||
int j;
|
||||
|
||||
+ df_simulate_one_insn_backwards (src_bb, return_copy, live);
|
||||
+
|
||||
if (NONDEBUG_INSN_P (return_copy))
|
||||
{
|
||||
/* When using SJLJ exceptions, the call to the
|
||||
@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
|
||||
the case for floating point on SH4 - then it might
|
||||
be set by an arithmetic operation that needs a
|
||||
different mode than the exit block. */
|
||||
+ HARD_REG_SET hard_regs_live;
|
||||
+ REG_SET_TO_HARD_REG_SET (hard_regs_live, live);
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int e = entity_map[j];
|
||||
int mode =
|
||||
- targetm.mode_switching.needed (e, return_copy);
|
||||
+ targetm.mode_switching.needed (e, return_copy,
|
||||
+ hard_regs_live);
|
||||
|
||||
if (mode != num_modes[e]
|
||||
&& mode != targetm.mode_switching.exit (e))
|
||||
@@ -609,7 +619,7 @@ optimize_mode_switching (void)
|
||||
{
|
||||
if (INSN_P (insn))
|
||||
{
|
||||
- int mode = targetm.mode_switching.needed (e, insn);
|
||||
+ int mode = targetm.mode_switching.needed (e, insn, live_now);
|
||||
rtx link;
|
||||
|
||||
if (mode != no_mode && mode != last_mode)
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index bbb482de6..06a52bdaf 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -7003,8 +7003,9 @@ DEFHOOK
|
||||
If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\
|
||||
to return the mode that @var{entity} must be switched into prior to the\n\
|
||||
execution of @var{insn}, or the number of modes if @var{insn} has no\n\
|
||||
-such requirement.",
|
||||
- int, (int entity, rtx_insn *insn), NULL)
|
||||
+such requirement. @var{regs_live} contains the set of hard registers\n\
|
||||
+that are live before @var{insn}.",
|
||||
+ int, (int entity, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(after,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
177
0133-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
Normal file
177
0133-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
Normal file
@ -0,0 +1,177 @@
|
||||
From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:58 +0000
|
||||
Subject: [PATCH 041/157] [Backport][SME] mode-switching: Pass the set of live
|
||||
registers to the after hook
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d
|
||||
|
||||
This patch passes the set of live hard registers to the after hook,
|
||||
like the previous one did for the needed hook.
|
||||
|
||||
gcc/
|
||||
* target.def (mode_switching.after): Add a regs_live parameter.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* config/epiphany/epiphany-protos.h (epiphany_mode_after): Update
|
||||
accordingly.
|
||||
* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
|
||||
(epiphany_mode_after): Likewise.
|
||||
* config/i386/i386.cc (ix86_mode_after): Likewise.
|
||||
* config/riscv/riscv.cc (riscv_mode_after): Likewise.
|
||||
* config/sh/sh.cc (sh_mode_after): Likewise.
|
||||
* mode-switching.cc (optimize_mode_switching): Likewise.
|
||||
---
|
||||
gcc/config/epiphany/epiphany-protos.h | 3 ++-
|
||||
gcc/config/epiphany/epiphany.cc | 5 +++--
|
||||
gcc/config/i386/i386.cc | 2 +-
|
||||
gcc/config/sh/sh.cc | 5 +++--
|
||||
gcc/doc/tm.texi | 4 +++-
|
||||
gcc/mode-switching.cc | 8 ++++----
|
||||
gcc/target.def | 4 +++-
|
||||
7 files changed, 19 insertions(+), 12 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
|
||||
index d463e5483..6326b7e80 100644
|
||||
--- a/gcc/config/epiphany/epiphany-protos.h
|
||||
+++ b/gcc/config/epiphany/epiphany-protos.h
|
||||
@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
|
||||
extern void epiphany_expand_set_fp_mode (rtx *operands);
|
||||
#ifdef HARD_CONST
|
||||
extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
|
||||
+extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
|
||||
+ HARD_REG_SET);
|
||||
#endif
|
||||
-extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
|
||||
extern bool epiphany_epilogue_uses (int regno);
|
||||
extern bool epiphany_optimize_mode_switching (int entity);
|
||||
extern bool epiphany_is_interrupt_p (tree);
|
||||
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
|
||||
index be0fbc68c..62636b1ec 100644
|
||||
--- a/gcc/config/epiphany/epiphany.cc
|
||||
+++ b/gcc/config/epiphany/epiphany.cc
|
||||
@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
|
||||
return 2;
|
||||
case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
|
||||
if (recog_memoized (insn) == CODE_FOR_set_fp_mode)
|
||||
- mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn);
|
||||
+ mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {});
|
||||
/* Fall through. */
|
||||
case EPIPHANY_MSW_ENTITY_NEAREST:
|
||||
case EPIPHANY_MSW_ENTITY_TRUNC:
|
||||
@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit)
|
||||
}
|
||||
|
||||
int
|
||||
-epiphany_mode_after (int entity, int last_mode, rtx_insn *insn)
|
||||
+epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
|
||||
+ HARD_REG_SET)
|
||||
{
|
||||
/* We have too few call-saved registers to hope to keep the masks across
|
||||
calls. */
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index 4d591d217..593185fa6 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
|
||||
/* Return the mode that an insn results in. */
|
||||
|
||||
static int
|
||||
-ix86_mode_after (int entity, int mode, rtx_insn *insn)
|
||||
+ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
|
||||
{
|
||||
switch (entity)
|
||||
{
|
||||
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
|
||||
index 85e83e12e..74d61c43b 100644
|
||||
--- a/gcc/config/sh/sh.cc
|
||||
+++ b/gcc/config/sh/sh.cc
|
||||
@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int);
|
||||
static bool sh_frame_pointer_required (void);
|
||||
static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
|
||||
static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
|
||||
-static int sh_mode_after (int, int, rtx_insn *);
|
||||
+static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET);
|
||||
static int sh_mode_entry (int);
|
||||
static int sh_mode_exit (int);
|
||||
static int sh_mode_priority (int entity, int n);
|
||||
@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
|
||||
}
|
||||
|
||||
static int
|
||||
-sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
|
||||
+sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn,
|
||||
+ HARD_REG_SET)
|
||||
{
|
||||
if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
|
||||
get_attr_fp_set (insn) != FP_SET_NONE)
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index d8ac6c4d6..7fce485b2 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10289,12 +10289,14 @@ such requirement. @var{regs_live} contains the set of hard registers
|
||||
that are live before @var{insn}.
|
||||
@end deftypefn
|
||||
|
||||
-@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
|
||||
+@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
|
||||
@var{entity} is an integer specifying a mode-switched entity.
|
||||
If this hook is defined, it is evaluated for every @var{insn} during mode
|
||||
switching. It returns the mode that @var{entity} is in after @var{insn}
|
||||
has been executed. @var{mode} is the mode that @var{entity} was in
|
||||
before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
|
||||
+@var{regs_live} is the set of hard registers that are live after @var{insn}
|
||||
+has been executed.
|
||||
|
||||
@var{mode} is equal to the number of modes defined for @var{entity}
|
||||
if the mode before @var{insn} is unknown. The hook should likewise return
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 6bbda5058..4f0445894 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -631,10 +631,6 @@ optimize_mode_switching (void)
|
||||
last_mode = mode;
|
||||
}
|
||||
|
||||
- if (targetm.mode_switching.after)
|
||||
- last_mode = targetm.mode_switching.after (e, last_mode,
|
||||
- insn);
|
||||
-
|
||||
/* Update LIVE_NOW. */
|
||||
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
|
||||
if (REG_NOTE_KIND (link) == REG_DEAD)
|
||||
@@ -644,6 +640,10 @@ optimize_mode_switching (void)
|
||||
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
|
||||
if (REG_NOTE_KIND (link) == REG_UNUSED)
|
||||
reg_dies (XEXP (link, 0), &live_now);
|
||||
+
|
||||
+ if (targetm.mode_switching.after)
|
||||
+ last_mode = targetm.mode_switching.after (e, last_mode,
|
||||
+ insn, live_now);
|
||||
}
|
||||
}
|
||||
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index 06a52bdaf..67c20bbb0 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\
|
||||
switching. It returns the mode that @var{entity} is in after @var{insn}\n\
|
||||
has been executed. @var{mode} is the mode that @var{entity} was in\n\
|
||||
before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\
|
||||
+@var{regs_live} is the set of hard registers that are live after @var{insn}\n\
|
||||
+has been executed.\n\
|
||||
\n\
|
||||
@var{mode} is equal to the number of modes defined for @var{entity}\n\
|
||||
if the mode before @var{insn} is unknown. The hook should likewise return\n\
|
||||
@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\
|
||||
@var{insn}.\n\
|
||||
\n\
|
||||
Not defining the hook is equivalent to returning @var{mode}.",
|
||||
- int, (int entity, int mode, rtx_insn *insn), NULL)
|
||||
+ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
|
||||
|
||||
DEFHOOK
|
||||
(entry,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,56 @@
|
||||
From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:59 +0000
|
||||
Subject: [PATCH 042/157] [Backport][SME] mode-switching: Use 1-based edge aux
|
||||
fields
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9
|
||||
|
||||
The pass used the edge aux field to record which mode change
|
||||
should happen on the edge, with -1 meaning "none". It's more
|
||||
convenient for later patches to leave aux zero for "none",
|
||||
and use numbers based at 1 to record a change.
|
||||
|
||||
gcc/
|
||||
* mode-switching.cc (commit_mode_sets): Use 1-based edge aux values.
|
||||
---
|
||||
gcc/mode-switching.cc | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 4f0445894..89a8494c6 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
|
||||
for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--)
|
||||
{
|
||||
edge eg = INDEX_EDGE (edge_list, ed);
|
||||
- int mode;
|
||||
|
||||
- if ((mode = (int)(intptr_t)(eg->aux)) != -1)
|
||||
+ if (eg->aux)
|
||||
{
|
||||
+ int mode = (int) (intptr_t) eg->aux - 1;
|
||||
HARD_REG_SET live_at_edge;
|
||||
basic_block src_bb = eg->src;
|
||||
int cur_mode = info[src_bb->index].mode_out;
|
||||
@@ -727,14 +727,14 @@ optimize_mode_switching (void)
|
||||
{
|
||||
edge eg = INDEX_EDGE (edge_list, ed);
|
||||
|
||||
- eg->aux = (void *)(intptr_t)-1;
|
||||
+ eg->aux = (void *) (intptr_t) 0;
|
||||
|
||||
for (i = 0; i < no_mode; i++)
|
||||
{
|
||||
int m = targetm.mode_switching.priority (entity_map[j], i);
|
||||
if (mode_bit_p (insert[ed], j, m))
|
||||
{
|
||||
- eg->aux = (void *)(intptr_t)m;
|
||||
+ eg->aux = (void *) (intptr_t) (m + 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
337
0135-Backport-SME-mode-switching-Add-a-target-configurabl.patch
Normal file
337
0135-Backport-SME-mode-switching-Add-a-target-configurabl.patch
Normal file
@ -0,0 +1,337 @@
|
||||
From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:28:59 +0000
|
||||
Subject: [PATCH 043/157] [Backport][SME] mode-switching: Add a
|
||||
target-configurable confluence operator
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27
|
||||
|
||||
The mode-switching pass assumed that all of an entity's modes
|
||||
were mutually exclusive. However, the upcoming SME changes
|
||||
have an entity with some overlapping modes, so that there is
|
||||
sometimes a "superunion" mode that contains two given modes.
|
||||
We can use this relationship to pass something more helpful than
|
||||
"don't know" to the emit hook.
|
||||
|
||||
This patch adds a new hook that targets can use to specify
|
||||
a mode confluence operator.
|
||||
|
||||
With mutually exclusive modes, it's possible to compute a block's
|
||||
incoming and outgoing modes by looking at its availability sets.
|
||||
With the confluence operator, we instead need to solve a full
|
||||
dataflow problem.
|
||||
|
||||
However, when emitting a mode transition, the upcoming SME use of
|
||||
mode-switching benefits from having as much information as possible
|
||||
about the starting mode. Calculating this information is definitely
|
||||
worth the compile time.
|
||||
|
||||
The dataflow problem is written to work before and after the LCM
|
||||
problem has been solved. A later patch makes use of this.
|
||||
|
||||
While there (since git blame would ping me for the reindented code),
|
||||
I used a lambda to avoid the cut-&-pasted loops.
|
||||
|
||||
gcc/
|
||||
* target.def (mode_switching.confluence): New hook.
|
||||
* doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook.
|
||||
* doc/tm.texi.in: Regenerate.
|
||||
* mode-switching.cc (confluence_info): New variable.
|
||||
(mode_confluence, forward_confluence_n, forward_transfer): New
|
||||
functions.
|
||||
(optimize_mode_switching): Use them to calculate mode_in when
|
||||
TARGET_MODE_CONFLUENCE is defined.
|
||||
---
|
||||
gcc/doc/tm.texi | 16 ++++
|
||||
gcc/doc/tm.texi.in | 2 +
|
||||
gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++-------
|
||||
gcc/target.def | 17 ++++
|
||||
4 files changed, 186 insertions(+), 28 deletions(-)
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 7fce485b2..d7053ec9e 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after
|
||||
Not defining the hook is equivalent to returning @var{mode}.
|
||||
@end deftypefn
|
||||
|
||||
+@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2})
|
||||
+By default, the mode-switching pass assumes that a given entity's modes
|
||||
+are mutually exclusive. This means that the pass can only tell
|
||||
+@code{TARGET_MODE_EMIT} about an entity's previous mode if all
|
||||
+incoming paths of execution leave the entity in the same state.
|
||||
+
|
||||
+However, some entities might have overlapping, non-exclusive modes,
|
||||
+so that it is sometimes possible to represent ``mode @var{mode1} or mode
|
||||
+@var{mode2}'' with something more specific than ``mode not known''.
|
||||
+If this is true for at least one entity, you should define this hook
|
||||
+and make it return a mode that includes @var{mode1} and @var{mode2}
|
||||
+as possibilities. (The mode can include other possibilities too.)
|
||||
+The hook should return the number of modes if no suitable mode exists
|
||||
+for the given arguments.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
|
||||
If this hook is defined, it is evaluated for every @var{entity} that
|
||||
needs mode switching. It should return the mode that @var{entity} is
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index ad343504f..d420e62fd 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context.
|
||||
|
||||
@hook TARGET_MODE_AFTER
|
||||
|
||||
+@hook TARGET_MODE_CONFLUENCE
|
||||
+
|
||||
@hook TARGET_MODE_ENTRY
|
||||
|
||||
@hook TARGET_MODE_EXIT
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 89a8494c6..065767902 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
|
||||
return pre_exit;
|
||||
}
|
||||
|
||||
+/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY,
|
||||
+ using NO_MODE to represent an unknown mode if nothing more precise
|
||||
+ is available. */
|
||||
+
|
||||
+int
|
||||
+mode_confluence (int entity, int mode1, int mode2, int no_mode)
|
||||
+{
|
||||
+ if (mode1 == mode2)
|
||||
+ return mode1;
|
||||
+
|
||||
+ if (mode1 != no_mode
|
||||
+ && mode2 != no_mode
|
||||
+ && targetm.mode_switching.confluence)
|
||||
+ return targetm.mode_switching.confluence (entity, mode1, mode2);
|
||||
+
|
||||
+ return no_mode;
|
||||
+}
|
||||
+
|
||||
+/* Information for the dataflow problems below. */
|
||||
+struct
|
||||
+{
|
||||
+ /* Information about each basic block, indexed by block id. */
|
||||
+ struct bb_info *bb_info;
|
||||
+
|
||||
+ /* The entity that we're processing. */
|
||||
+ int entity;
|
||||
+
|
||||
+ /* The number of modes defined for the entity, and thus the identifier
|
||||
+ of the "don't know" mode. */
|
||||
+ int no_mode;
|
||||
+} confluence_info;
|
||||
+
|
||||
+/* Propagate information about any mode change on edge E to the
|
||||
+ destination block's mode_in. Return true if something changed.
|
||||
+
|
||||
+ The mode_in and mode_out fields use no_mode + 1 to mean "not yet set". */
|
||||
+
|
||||
+static bool
|
||||
+forward_confluence_n (edge e)
|
||||
+{
|
||||
+ /* The entry and exit blocks have no useful mode information. */
|
||||
+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK)
|
||||
+ return false;
|
||||
+
|
||||
+ /* We don't control mode changes across abnormal edges. */
|
||||
+ if (e->flags & EDGE_ABNORMAL)
|
||||
+ return false;
|
||||
+
|
||||
+ /* E->aux is nonzero if we have computed the LCM problem and scheduled
|
||||
+ E to change the mode to E->aux - 1. Otherwise model the change
|
||||
+ from the source to the destination. */
|
||||
+ struct bb_info *bb_info = confluence_info.bb_info;
|
||||
+ int no_mode = confluence_info.no_mode;
|
||||
+ int src_mode = bb_info[e->src->index].mode_out;
|
||||
+ if (e->aux)
|
||||
+ src_mode = (int) (intptr_t) e->aux - 1;
|
||||
+ if (src_mode == no_mode + 1)
|
||||
+ return false;
|
||||
+
|
||||
+ int dest_mode = bb_info[e->dest->index].mode_in;
|
||||
+ if (dest_mode == no_mode + 1)
|
||||
+ {
|
||||
+ bb_info[e->dest->index].mode_in = src_mode;
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ int entity = confluence_info.entity;
|
||||
+ int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode);
|
||||
+ if (dest_mode == new_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ bb_info[e->dest->index].mode_in = new_mode;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* Update block BB_INDEX's mode_out based on its mode_in. Return true if
|
||||
+ something changed. */
|
||||
+
|
||||
+static bool
|
||||
+forward_transfer (int bb_index)
|
||||
+{
|
||||
+ /* The entry and exit blocks have no useful mode information. */
|
||||
+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Only propagate through a block if the entity is transparent. */
|
||||
+ struct bb_info *bb_info = confluence_info.bb_info;
|
||||
+ if (bb_info[bb_index].computing != confluence_info.no_mode
|
||||
+ || bb_info[bb_index].mode_out == bb_info[bb_index].mode_in)
|
||||
+ return false;
|
||||
+
|
||||
+ bb_info[bb_index].mode_out = bb_info[bb_index].mode_in;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Find all insns that need a particular mode setting, and insert the
|
||||
necessary mode switches. Return true if we did work. */
|
||||
|
||||
@@ -567,6 +662,39 @@ optimize_mode_switching (void)
|
||||
|
||||
auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
|
||||
|
||||
+ auto_bitmap blocks;
|
||||
+
|
||||
+ /* Forward-propagate mode information through blocks where the entity
|
||||
+ is transparent, so that mode_in describes the mode on entry to each
|
||||
+ block and mode_out describes the mode on exit from each block. */
|
||||
+ auto forwprop_mode_info = [&](struct bb_info *info,
|
||||
+ int entity, int no_mode)
|
||||
+ {
|
||||
+ /* Use no_mode + 1 to mean "not yet set". */
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ if (bb_has_abnormal_pred (bb))
|
||||
+ info[bb->index].mode_in = info[bb->index].seginfo->mode;
|
||||
+ else
|
||||
+ info[bb->index].mode_in = no_mode + 1;
|
||||
+ if (info[bb->index].computing != no_mode)
|
||||
+ info[bb->index].mode_out = info[bb->index].computing;
|
||||
+ else
|
||||
+ info[bb->index].mode_out = no_mode + 1;
|
||||
+ }
|
||||
+
|
||||
+ confluence_info.bb_info = info;
|
||||
+ confluence_info.entity = entity;
|
||||
+ confluence_info.no_mode = no_mode;
|
||||
+
|
||||
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
|
||||
+ df_simple_dataflow (DF_FORWARD, NULL, NULL, forward_confluence_n,
|
||||
+ forward_transfer, blocks,
|
||||
+ df_get_postorder (DF_FORWARD),
|
||||
+ df_get_n_blocks (DF_FORWARD));
|
||||
+
|
||||
+ };
|
||||
+
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int e = entity_map[j];
|
||||
@@ -720,6 +848,7 @@ optimize_mode_switching (void)
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int no_mode = num_modes[entity_map[j]];
|
||||
+ struct bb_info *info = bb_info[j];
|
||||
|
||||
/* Insert all mode sets that have been inserted by lcm. */
|
||||
|
||||
@@ -740,39 +869,33 @@ optimize_mode_switching (void)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* mode_in and mode_out can be calculated directly from avin and
|
||||
+ avout if all the modes are mutually exclusive. Use the target-
|
||||
+ provided confluence function otherwise. */
|
||||
+ if (targetm.mode_switching.confluence)
|
||||
+ forwprop_mode_info (info, entity_map[j], no_mode);
|
||||
+
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
{
|
||||
- struct bb_info *info = bb_info[j];
|
||||
- int last_mode = no_mode;
|
||||
-
|
||||
- /* intialize mode in availability for bb. */
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- if (mode_bit_p (avout[bb->index], j, i))
|
||||
- {
|
||||
- if (last_mode == no_mode)
|
||||
- last_mode = i;
|
||||
- if (last_mode != i)
|
||||
+ auto modes_confluence = [&](sbitmap *av)
|
||||
+ {
|
||||
+ for (int i = 0; i < no_mode; ++i)
|
||||
+ if (mode_bit_p (av[bb->index], j, i))
|
||||
{
|
||||
- last_mode = no_mode;
|
||||
- break;
|
||||
+ for (int i2 = i + 1; i2 < no_mode; ++i2)
|
||||
+ if (mode_bit_p (av[bb->index], j, i2))
|
||||
+ return no_mode;
|
||||
+ return i;
|
||||
}
|
||||
- }
|
||||
- info[bb->index].mode_out = last_mode;
|
||||
+ return no_mode;
|
||||
+ };
|
||||
|
||||
- /* intialize mode out availability for bb. */
|
||||
- last_mode = no_mode;
|
||||
- for (i = 0; i < no_mode; i++)
|
||||
- if (mode_bit_p (avin[bb->index], j, i))
|
||||
- {
|
||||
- if (last_mode == no_mode)
|
||||
- last_mode = i;
|
||||
- if (last_mode != i)
|
||||
- {
|
||||
- last_mode = no_mode;
|
||||
- break;
|
||||
- }
|
||||
- }
|
||||
- info[bb->index].mode_in = last_mode;
|
||||
+ /* intialize mode in/out availability for bb. */
|
||||
+ if (!targetm.mode_switching.confluence)
|
||||
+ {
|
||||
+ info[bb->index].mode_out = modes_confluence (avout);
|
||||
+ info[bb->index].mode_in = modes_confluence (avin);
|
||||
+ }
|
||||
|
||||
for (i = 0; i < no_mode; i++)
|
||||
if (mode_bit_p (del[bb->index], j, i))
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index 67c20bbb0..1e2091ed3 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -7025,6 +7025,23 @@ the number of modes if it does not know what mode @var{entity} has after\n\
|
||||
Not defining the hook is equivalent to returning @var{mode}.",
|
||||
int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
|
||||
|
||||
+DEFHOOK
|
||||
+(confluence,
|
||||
+ "By default, the mode-switching pass assumes that a given entity's modes\n\
|
||||
+are mutually exclusive. This means that the pass can only tell\n\
|
||||
+@code{TARGET_MODE_EMIT} about an entity's previous mode if all\n\
|
||||
+incoming paths of execution leave the entity in the same state.\n\
|
||||
+\n\
|
||||
+However, some entities might have overlapping, non-exclusive modes,\n\
|
||||
+so that it is sometimes possible to represent ``mode @var{mode1} or mode\n\
|
||||
+@var{mode2}'' with something more specific than ``mode not known''.\n\
|
||||
+If this is true for at least one entity, you should define this hook\n\
|
||||
+and make it return a mode that includes @var{mode1} and @var{mode2}\n\
|
||||
+as possibilities. (The mode can include other possibilities too.)\n\
|
||||
+The hook should return the number of modes if no suitable mode exists\n\
|
||||
+for the given arguments.",
|
||||
+ int, (int entity, int mode1, int mode2), NULL)
|
||||
+
|
||||
DEFHOOK
|
||||
(entry,
|
||||
"If this hook is defined, it is evaluated for every @var{entity} that\n\
|
||||
--
|
||||
2.33.0
|
||||
|
||||
483
0136-Backport-SME-mode-switching-Add-a-backprop-hook.patch
Normal file
483
0136-Backport-SME-mode-switching-Add-a-backprop-hook.patch
Normal file
@ -0,0 +1,483 @@
|
||||
From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Sat, 11 Nov 2023 17:29:00 +0000
|
||||
Subject: [PATCH 044/157] [Backport][SME] mode-switching: Add a backprop hook
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84
|
||||
|
||||
This patch adds a way for targets to ask that selected mode changes
|
||||
be brought forward, through a combination of:
|
||||
|
||||
(1) requiring a mode in blocks where the entity was previously
|
||||
transparent
|
||||
|
||||
(2) pushing the transition at the head of a block onto incomging edges
|
||||
|
||||
SME has two uses for this:
|
||||
|
||||
- A "one-shot" entity that, for any given path of execution,
|
||||
either stays off or makes exactly one transition from off to on.
|
||||
This relies only on (1) above; see the hook description for more info.
|
||||
|
||||
The main purpose of using mode-switching for this entity is to
|
||||
shrink-wrap the code that requires it.
|
||||
|
||||
- A second entity for which all transitions must be from known
|
||||
modes, which is enforced using a combination of (1) and (2).
|
||||
More specifically, (1) looks for edges B1->B2 for which:
|
||||
|
||||
- B2 requires a specific mode and
|
||||
- B1 does not guarantee a specific starting mode
|
||||
|
||||
In this system, such an edge is only possible if the entity is
|
||||
transparent in B1. (1) then forces B1 to require some safe common
|
||||
mode. Applying this inductively means that all incoming edges are
|
||||
from known modes. If different edges give different starting modes,
|
||||
(2) pushes the transitions onto the edges themselves; this only
|
||||
happens if the entity is not transparent in some predecessor block.
|
||||
|
||||
The patch also uses the back-propagation as an excuse to do a simple
|
||||
on-the-fly optimisation.
|
||||
|
||||
Hopefully the comments in the patch explain things a bit better.
|
||||
|
||||
gcc/
|
||||
* target.def (mode_switching.backprop): New hook.
|
||||
* doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* mode-switching.cc (struct bb_info): Add single_succ.
|
||||
(confluence_info): Add transp field.
|
||||
(single_succ_confluence_n, single_succ_transfer): New functions.
|
||||
(backprop_confluence_n, backprop_transfer): Likewise.
|
||||
(optimize_mode_switching): Use them. Push mode transitions onto
|
||||
a block's incoming edges, if the backprop hook requires it.
|
||||
---
|
||||
gcc/doc/tm.texi | 28 +++++
|
||||
gcc/doc/tm.texi.in | 2 +
|
||||
gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++
|
||||
gcc/target.def | 29 +++++
|
||||
4 files changed, 334 insertions(+)
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index d7053ec9e..5f0972356 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists
|
||||
for the given arguments.
|
||||
@end deftypefn
|
||||
|
||||
+@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2})
|
||||
+If defined, the mode-switching pass uses this hook to back-propagate mode
|
||||
+requirements through blocks that have no mode requirements of their own.
|
||||
+Specifically, @var{mode1} is the mode that @var{entity} has on exit
|
||||
+from a block B1 (say) and @var{mode2} is the mode that the next block
|
||||
+requires @var{entity} to have. B1 does not have any mode requirements
|
||||
+of its own.
|
||||
+
|
||||
+The hook should return the mode that it prefers or requires @var{entity}
|
||||
+to have in B1, or the number of modes if there is no such requirement.
|
||||
+If the hook returns a required mode for more than one of B1's outgoing
|
||||
+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.
|
||||
+
|
||||
+For example, suppose there is a ``one-shot'' entity that,
|
||||
+for a given execution of a function, either stays off or makes exactly
|
||||
+one transition from off to on. It is safe to make the transition at any
|
||||
+time, but it is better not to do so unnecessarily. This hook allows the
|
||||
+function to manage such an entity without having to track its state at
|
||||
+runtime. Specifically. the entity would have two modes, 0 for off and
|
||||
+1 for on, with 2 representing ``don't know''. The system is forbidden from
|
||||
+transitioning from 2 to 1, since 2 represents the possibility that the
|
||||
+entity is already on (and the aim is to avoid having to emit code to
|
||||
+check for that case). This hook would therefore return 1 when @var{mode1}
|
||||
+is 2 and @var{mode2} is 1, which would force the entity to be on in the
|
||||
+source block. Applying this inductively would remove all transitions
|
||||
+in which the previous state is unknown.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
|
||||
If this hook is defined, it is evaluated for every @var{entity} that
|
||||
needs mode switching. It should return the mode that @var{entity} is
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index d420e62fd..fcab21744 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context.
|
||||
|
||||
@hook TARGET_MODE_CONFLUENCE
|
||||
|
||||
+@hook TARGET_MODE_BACKPROP
|
||||
+
|
||||
@hook TARGET_MODE_ENTRY
|
||||
|
||||
@hook TARGET_MODE_EXIT
|
||||
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
|
||||
index 065767902..c2a0f0294 100644
|
||||
--- a/gcc/mode-switching.cc
|
||||
+++ b/gcc/mode-switching.cc
|
||||
@@ -81,6 +81,7 @@ struct bb_info
|
||||
int computing;
|
||||
int mode_out;
|
||||
int mode_in;
|
||||
+ int single_succ;
|
||||
};
|
||||
|
||||
/* Clear ode I from entity J in bitmap B. */
|
||||
@@ -508,6 +509,9 @@ struct
|
||||
/* Information about each basic block, indexed by block id. */
|
||||
struct bb_info *bb_info;
|
||||
|
||||
+ /* A bitmap of blocks for which the current entity is transparent. */
|
||||
+ sbitmap transp;
|
||||
+
|
||||
/* The entity that we're processing. */
|
||||
int entity;
|
||||
|
||||
@@ -579,6 +583,210 @@ forward_transfer (int bb_index)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* A backwards confluence function. Update the the bb_info single_succ
|
||||
+ field for E's source block, based on changes to E's destination block.
|
||||
+ At the end of the dataflow problem, single_succ is the single mode
|
||||
+ that all successors require (directly or indirectly), or no_mode
|
||||
+ if there are conflicting requirements.
|
||||
+
|
||||
+ Initially, a value of no_mode + 1 means "don't know". */
|
||||
+
|
||||
+static bool
|
||||
+single_succ_confluence_n (edge e)
|
||||
+{
|
||||
+ /* The entry block has no associated mode information. */
|
||||
+ if (e->src->index == ENTRY_BLOCK)
|
||||
+ return false;
|
||||
+
|
||||
+ /* We don't control mode changes across abnormal edges. */
|
||||
+ if (e->flags & EDGE_ABNORMAL)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Do nothing if we've already found a conflict. */
|
||||
+ struct bb_info *bb_info = confluence_info.bb_info;
|
||||
+ int no_mode = confluence_info.no_mode;
|
||||
+ int src_mode = bb_info[e->src->index].single_succ;
|
||||
+ if (src_mode == no_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Work out what mode the destination block (or its successors) require. */
|
||||
+ int dest_mode;
|
||||
+ if (e->dest->index == EXIT_BLOCK)
|
||||
+ dest_mode = no_mode;
|
||||
+ else if (bitmap_bit_p (confluence_info.transp, e->dest->index))
|
||||
+ dest_mode = bb_info[e->dest->index].single_succ;
|
||||
+ else
|
||||
+ dest_mode = bb_info[e->dest->index].seginfo->mode;
|
||||
+
|
||||
+ /* Do nothing if the destination block has no new information. */
|
||||
+ if (dest_mode == no_mode + 1 || dest_mode == src_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Detect conflicting modes. */
|
||||
+ if (src_mode != no_mode + 1)
|
||||
+ dest_mode = no_mode;
|
||||
+
|
||||
+ bb_info[e->src->index].single_succ = dest_mode;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* A backward transfer function for computing the bb_info single_succ
|
||||
+ fields, as described above single_succ_confluence. */
|
||||
+
|
||||
+static bool
|
||||
+single_succ_transfer (int bb_index)
|
||||
+{
|
||||
+ /* We don't have any field to transfer to. Assume that, after the
|
||||
+ first iteration, we are only called if single_succ has changed.
|
||||
+ We should then process incoming edges if the entity is transparent. */
|
||||
+ return bitmap_bit_p (confluence_info.transp, bb_index);
|
||||
+}
|
||||
+
|
||||
+/* Check whether the target wants to back-propagate a mode change across
|
||||
+ edge E, and update the source block's computed mode if so. Return true
|
||||
+ if something changed. */
|
||||
+
|
||||
+static bool
|
||||
+backprop_confluence_n (edge e)
|
||||
+{
|
||||
+ /* The entry and exit blocks have no useful mode information. */
|
||||
+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK)
|
||||
+ return false;
|
||||
+
|
||||
+ /* We don't control mode changes across abnormal edges. */
|
||||
+ if (e->flags & EDGE_ABNORMAL)
|
||||
+ return false;
|
||||
+
|
||||
+ /* We can only require a new mode in the source block if the entity
|
||||
+ was originally transparent there. */
|
||||
+ if (!bitmap_bit_p (confluence_info.transp, e->src->index))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Exit now if there is no required mode, or if all paths into the
|
||||
+ source block leave the entity in the required mode. */
|
||||
+ struct bb_info *bb_info = confluence_info.bb_info;
|
||||
+ int no_mode = confluence_info.no_mode;
|
||||
+ int src_mode = bb_info[e->src->index].mode_out;
|
||||
+ int dest_mode = bb_info[e->dest->index].mode_in;
|
||||
+ if (dest_mode == no_mode || src_mode == dest_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ /* See what the target thinks about this transition. */
|
||||
+ int entity = confluence_info.entity;
|
||||
+ int new_mode = targetm.mode_switching.backprop (entity, src_mode,
|
||||
+ dest_mode);
|
||||
+ if (new_mode == no_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ /* The target doesn't like the current transition, but would be happy
|
||||
+ with a transition from NEW_MODE.
|
||||
+
|
||||
+ If we force the source block to use NEW_MODE, we might introduce a
|
||||
+ double transition on at least one path through the function (one to
|
||||
+ NEW_MODE and then one to DEST_MODE). Therefore, if all destination
|
||||
+ blocks require the same mode, it is usually better to bring that
|
||||
+ mode requirement forward.
|
||||
+
|
||||
+ If that isn't possible, merge the preference for this edge with
|
||||
+ the preferences for other edges. no_mode + 1 indicates that there
|
||||
+ was no previous preference. */
|
||||
+ int old_mode = bb_info[e->src->index].computing;
|
||||
+ if (bb_info[e->src->index].single_succ != no_mode)
|
||||
+ new_mode = bb_info[e->src->index].single_succ;
|
||||
+ else if (old_mode != no_mode + 1)
|
||||
+ new_mode = mode_confluence (entity, old_mode, new_mode, no_mode);
|
||||
+
|
||||
+ if (old_mode == new_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ bb_info[e->src->index].computing = new_mode;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* If the current entity was originally transparent in block BB_INDEX,
|
||||
+ update the incoming mode to match the outgoing mode. Register a mode
|
||||
+ change if the entity is no longer transparent.
|
||||
+
|
||||
+ Also, as an on-the-fly optimization, check whether the entity was
|
||||
+ originally transparent in BB_INDEX and if all successor blocks require
|
||||
+ the same mode. If so, anticipate the mode change in BB_INDEX if
|
||||
+ doing it on the incoming edges would require no more mode changes than
|
||||
+ doing it on the outgoing edges. The aim is to reduce the total number
|
||||
+ of mode changes emitted for the function (and thus reduce code size and
|
||||
+ cfg complexity) without increasing the number of mode changes on any
|
||||
+ given path through the function. A typical case where it helps is:
|
||||
+
|
||||
+ T
|
||||
+ / \
|
||||
+ T M
|
||||
+ \ /
|
||||
+ M
|
||||
+
|
||||
+ where the entity is transparent in the T blocks and is required to have
|
||||
+ mode M in the M blocks. If there are no redundancies leading up to this,
|
||||
+ there will be two mutually-exclusive changes to mode M, one on each of
|
||||
+ the T->M edges. The optimization instead converts it to:
|
||||
+
|
||||
+ T T M
|
||||
+ / \ / \ / \
|
||||
+ T M -> M M -> M M
|
||||
+ \ / \ / \ /
|
||||
+ M M M
|
||||
+
|
||||
+ which creates a single transition to M for both paths through the diamond.
|
||||
+
|
||||
+ Return true if something changed. */
|
||||
+
|
||||
+static bool
|
||||
+backprop_transfer (int bb_index)
|
||||
+{
|
||||
+ /* The entry and exit blocks have no useful mode information. */
|
||||
+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
|
||||
+ return false;
|
||||
+
|
||||
+ /* We can only require a new mode if the entity was previously
|
||||
+ transparent. */
|
||||
+ if (!bitmap_bit_p (confluence_info.transp, bb_index))
|
||||
+ return false;
|
||||
+
|
||||
+ struct bb_info *bb_info = confluence_info.bb_info;
|
||||
+ basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
|
||||
+ int no_mode = confluence_info.no_mode;
|
||||
+ int mode_in = bb_info[bb_index].mode_in;
|
||||
+ int mode_out = bb_info[bb_index].computing;
|
||||
+ if (mode_out == no_mode + 1)
|
||||
+ {
|
||||
+ /* The entity is still transparent for this block. See whether
|
||||
+ all successor blocks need the same mode, either directly or
|
||||
+ indirectly. */
|
||||
+ mode_out = bb_info[bb_index].single_succ;
|
||||
+ if (mode_out == no_mode)
|
||||
+ return false;
|
||||
+
|
||||
+ /* Get a minimum bound on the number of transitions that would be
|
||||
+ removed if BB itself required MODE_OUT. */
|
||||
+ unsigned int moved = 0;
|
||||
+ for (edge e : bb->succs)
|
||||
+ if (e->dest->index != EXIT_BLOCK
|
||||
+ && mode_out == bb_info[e->dest->index].seginfo->mode)
|
||||
+ moved += 1;
|
||||
+
|
||||
+ /* See whether making the mode change on all incoming edges would
|
||||
+ be no worse than making it on MOVED outgoing edges. */
|
||||
+ if (moved < EDGE_COUNT (bb->preds))
|
||||
+ return false;
|
||||
+
|
||||
+ bb_info[bb_index].mode_out = mode_out;
|
||||
+ bb_info[bb_index].computing = mode_out;
|
||||
+ }
|
||||
+ else if (mode_out == mode_in)
|
||||
+ return false;
|
||||
+
|
||||
+ bb_info[bb_index].mode_in = mode_out;
|
||||
+ bb_info[bb_index].seginfo->mode = mode_out;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Find all insns that need a particular mode setting, and insert the
|
||||
necessary mode switches. Return true if we did work. */
|
||||
|
||||
@@ -684,6 +892,7 @@ optimize_mode_switching (void)
|
||||
}
|
||||
|
||||
confluence_info.bb_info = info;
|
||||
+ confluence_info.transp = nullptr;
|
||||
confluence_info.entity = entity;
|
||||
confluence_info.no_mode = no_mode;
|
||||
|
||||
@@ -695,6 +904,9 @@ optimize_mode_switching (void)
|
||||
|
||||
};
|
||||
|
||||
+ if (targetm.mode_switching.backprop)
|
||||
+ clear_aux_for_edges ();
|
||||
+
|
||||
for (j = n_entities - 1; j >= 0; j--)
|
||||
{
|
||||
int e = entity_map[j];
|
||||
@@ -817,6 +1029,53 @@ optimize_mode_switching (void)
|
||||
}
|
||||
}
|
||||
|
||||
+ /* If the target requests it, back-propagate selected mode requirements
|
||||
+ through transparent blocks. */
|
||||
+ if (targetm.mode_switching.backprop)
|
||||
+ {
|
||||
+ /* First work out the mode on entry to and exit from each block. */
|
||||
+ forwprop_mode_info (info, e, no_mode);
|
||||
+
|
||||
+ /* Compute the single_succ fields, as described above
|
||||
+ single_succ_confluence. */
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ info[bb->index].single_succ = no_mode + 1;
|
||||
+
|
||||
+ confluence_info.transp = transp_all;
|
||||
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
|
||||
+ df_simple_dataflow (DF_BACKWARD, NULL, NULL,
|
||||
+ single_succ_confluence_n,
|
||||
+ single_succ_transfer, blocks,
|
||||
+ df_get_postorder (DF_BACKWARD),
|
||||
+ df_get_n_blocks (DF_BACKWARD));
|
||||
+
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ /* Repurpose mode_in as the first mode required by the block,
|
||||
+ or the output mode if none. */
|
||||
+ if (info[bb->index].seginfo->mode != no_mode)
|
||||
+ info[bb->index].mode_in = info[bb->index].seginfo->mode;
|
||||
+
|
||||
+ /* In transparent blocks, use computing == no_mode + 1
|
||||
+ to indicate that no propagation has taken place. */
|
||||
+ if (info[bb->index].computing == no_mode)
|
||||
+ info[bb->index].computing = no_mode + 1;
|
||||
+ }
|
||||
+
|
||||
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
|
||||
+ df_simple_dataflow (DF_BACKWARD, NULL, NULL, backprop_confluence_n,
|
||||
+ backprop_transfer, blocks,
|
||||
+ df_get_postorder (DF_BACKWARD),
|
||||
+ df_get_n_blocks (DF_BACKWARD));
|
||||
+
|
||||
+ /* Any block that now computes a mode is no longer transparent. */
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ if (info[bb->index].computing == no_mode + 1)
|
||||
+ info[bb->index].computing = no_mode;
|
||||
+ else if (info[bb->index].computing != no_mode)
|
||||
+ bitmap_clear_bit (transp_all, bb->index);
|
||||
+ }
|
||||
+
|
||||
/* Set the anticipatable and computing arrays. */
|
||||
for (i = 0; i < no_mode; i++)
|
||||
{
|
||||
@@ -900,6 +1159,22 @@ optimize_mode_switching (void)
|
||||
for (i = 0; i < no_mode; i++)
|
||||
if (mode_bit_p (del[bb->index], j, i))
|
||||
info[bb->index].seginfo->mode = no_mode;
|
||||
+
|
||||
+ /* See whether the target can perform the first transition.
|
||||
+ If not, push it onto the incoming edges. The earlier backprop
|
||||
+ pass should ensure that the resulting transitions are valid. */
|
||||
+ if (targetm.mode_switching.backprop)
|
||||
+ {
|
||||
+ int from_mode = info[bb->index].mode_in;
|
||||
+ int to_mode = info[bb->index].seginfo->mode;
|
||||
+ if (targetm.mode_switching.backprop (entity_map[j], from_mode,
|
||||
+ to_mode) != no_mode)
|
||||
+ {
|
||||
+ for (edge e : bb->preds)
|
||||
+ e->aux = (void *) (intptr_t) (to_mode + 1);
|
||||
+ info[bb->index].mode_in = to_mode;
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Now output the remaining mode sets in all the segments. */
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index 1e2091ed3..4d77c1523 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -7042,6 +7042,35 @@ The hook should return the number of modes if no suitable mode exists\n\
|
||||
for the given arguments.",
|
||||
int, (int entity, int mode1, int mode2), NULL)
|
||||
|
||||
+DEFHOOK
|
||||
+(backprop,
|
||||
+ "If defined, the mode-switching pass uses this hook to back-propagate mode\n\
|
||||
+requirements through blocks that have no mode requirements of their own.\n\
|
||||
+Specifically, @var{mode1} is the mode that @var{entity} has on exit\n\
|
||||
+from a block B1 (say) and @var{mode2} is the mode that the next block\n\
|
||||
+requires @var{entity} to have. B1 does not have any mode requirements\n\
|
||||
+of its own.\n\
|
||||
+\n\
|
||||
+The hook should return the mode that it prefers or requires @var{entity}\n\
|
||||
+to have in B1, or the number of modes if there is no such requirement.\n\
|
||||
+If the hook returns a required mode for more than one of B1's outgoing\n\
|
||||
+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.\n\
|
||||
+\n\
|
||||
+For example, suppose there is a ``one-shot'' entity that,\n\
|
||||
+for a given execution of a function, either stays off or makes exactly\n\
|
||||
+one transition from off to on. It is safe to make the transition at any\n\
|
||||
+time, but it is better not to do so unnecessarily. This hook allows the\n\
|
||||
+function to manage such an entity without having to track its state at\n\
|
||||
+runtime. Specifically. the entity would have two modes, 0 for off and\n\
|
||||
+1 for on, with 2 representing ``don't know''. The system is forbidden from\n\
|
||||
+transitioning from 2 to 1, since 2 represents the possibility that the\n\
|
||||
+entity is already on (and the aim is to avoid having to emit code to\n\
|
||||
+check for that case). This hook would therefore return 1 when @var{mode1}\n\
|
||||
+is 2 and @var{mode2} is 1, which would force the entity to be on in the\n\
|
||||
+source block. Applying this inductively would remove all transitions\n\
|
||||
+in which the previous state is unknown.",
|
||||
+ int, (int entity, int mode1, int mode2), NULL)
|
||||
+
|
||||
DEFHOOK
|
||||
(entry,
|
||||
"If this hook is defined, it is evaluated for every @var{entity} that\n\
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,81 @@
|
||||
From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Fri, 1 Dec 2023 08:36:15 +0000
|
||||
Subject: [PATCH 045/157] [Backport][SME] aarch64: Add a result_mode helper
|
||||
function
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45
|
||||
|
||||
SME will add more intrinsics whose expansion code requires
|
||||
the mode of the function return value. This patch adds an
|
||||
associated helper routine.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h
|
||||
(function_expander::result_mode): New member function.
|
||||
* config/aarch64/aarch64-sve-builtins-base.cc
|
||||
(svld234_impl::expand): Use it.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(function_expander::get_reg_target): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 9 +++++++++
|
||||
3 files changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
index 56c9d75e7..c9bf13792 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
|
||||
@@ -1272,7 +1272,7 @@ public:
|
||||
rtx
|
||||
expand (function_expander &e) const OVERRIDE
|
||||
{
|
||||
- machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
|
||||
+ machine_mode tuple_mode = e.result_mode ();
|
||||
insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
|
||||
tuple_mode, e.vector_mode (0));
|
||||
return e.use_contiguous_load_insn (icode);
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index e168c8334..91af96687 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
|
||||
rtx
|
||||
function_expander::get_reg_target ()
|
||||
{
|
||||
- machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
|
||||
+ machine_mode target_mode = result_mode ();
|
||||
if (!possible_target || GET_MODE (possible_target) != target_mode)
|
||||
possible_target = gen_reg_rtx (target_mode);
|
||||
return possible_target;
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 0d130b871..52994cde0 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -528,6 +528,8 @@ public:
|
||||
insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0,
|
||||
machine_mode = E_VOIDmode);
|
||||
|
||||
+ machine_mode result_mode () const;
|
||||
+
|
||||
bool overlaps_input_p (rtx);
|
||||
|
||||
rtx convert_to_pmode (rtx);
|
||||
@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const
|
||||
return flags;
|
||||
}
|
||||
|
||||
+/* Return the mode of the result of a call. */
|
||||
+inline machine_mode
|
||||
+function_expander::result_mode () const
|
||||
+{
|
||||
+ return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
|
||||
+}
|
||||
+
|
||||
}
|
||||
|
||||
#endif
|
||||
--
|
||||
2.33.0
|
||||
|
||||
232
0138-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
Normal file
232
0138-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
Normal file
@ -0,0 +1,232 @@
|
||||
From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001
|
||||
From: Kewen Lin <linkw@linux.ibm.com>
|
||||
Date: Tue, 15 Nov 2022 20:26:07 -0600
|
||||
Subject: [PATCH 046/157] [Backport][SME] rtl: Try to remove EH edges after
|
||||
{pro,epi}logue generation [PR90259]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858
|
||||
|
||||
After prologue and epilogue generation, the judgement on whether
|
||||
one memory access onto stack frame may trap or not could change,
|
||||
since we get more exact stack information by now.
|
||||
|
||||
As PR90259 shows, some memory access becomes impossible to trap
|
||||
any more after prologue and epilogue generation, it can make
|
||||
subsequent optimization be able to remove it if safe, but it
|
||||
results in unexpected control flow status due to REG_EH_REGION
|
||||
note missing.
|
||||
|
||||
This patch proposes to try to remove EH edges with function
|
||||
purge_all_dead_edges after prologue and epilogue generation,
|
||||
it simplifies CFG as early as we can and don't need any fixup
|
||||
in downstream passes.
|
||||
|
||||
CFG simplification result with PR90259's case as example:
|
||||
|
||||
*before*
|
||||
|
||||
18: %1:TF=call [`__gcc_qdiv'] argc:0
|
||||
REG_EH_REGION 0x2
|
||||
77: NOTE_INSN_BASIC_BLOCK 3
|
||||
19: NOTE_INSN_DELETED
|
||||
20: NOTE_INSN_DELETED
|
||||
110: [%31:SI+0x20]=%1:DF
|
||||
REG_EH_REGION 0x2
|
||||
116: NOTE_INSN_BASIC_BLOCK 4
|
||||
111: [%31:SI+0x28]=%2:DF
|
||||
REG_EH_REGION 0x2
|
||||
22: NOTE_INSN_BASIC_BLOCK 5
|
||||
108: %0:DF=[%31:SI+0x20]
|
||||
REG_EH_REGION 0x2
|
||||
117: NOTE_INSN_BASIC_BLOCK 6
|
||||
109: %1:DF=[%31:SI+0x28]
|
||||
REG_EH_REGION 0x2
|
||||
79: NOTE_INSN_BASIC_BLOCK 7
|
||||
26: [%31:SI+0x18]=%0:DF
|
||||
104: pc=L69
|
||||
105: barrier
|
||||
|
||||
*after*
|
||||
|
||||
18: %1:TF=call [`__gcc_qdiv'] argc:0
|
||||
REG_EH_REGION 0x2
|
||||
77: NOTE_INSN_BASIC_BLOCK 3
|
||||
19: NOTE_INSN_DELETED
|
||||
20: NOTE_INSN_DELETED
|
||||
110: [%31:SI+0x20]=%1:DF
|
||||
111: [%31:SI+0x28]=%2:DF
|
||||
108: %0:DF=[%31:SI+0x20]
|
||||
109: %1:DF=[%31:SI+0x28]
|
||||
26: [%31:SI+0x18]=%0:DF
|
||||
104: pc=L69
|
||||
105: barrier
|
||||
|
||||
PR rtl-optimization/90259
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* function.cc (rest_of_handle_thread_prologue_and_epilogue): Add
|
||||
parameter fun, and call function purge_all_dead_edges.
|
||||
(pass_thread_prologue_and_epilogue::execute): Name unamed parameter
|
||||
as fun, and use it for rest_of_handle_thread_prologue_and_epilogue.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* g++.target/powerpc/pr90259.C: New.
|
||||
---
|
||||
gcc/function.cc | 13 ++-
|
||||
gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++
|
||||
2 files changed, 113 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C
|
||||
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index 49c7ccf4b..28de39dd6 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
-rest_of_handle_thread_prologue_and_epilogue (void)
|
||||
+rest_of_handle_thread_prologue_and_epilogue (function *fun)
|
||||
{
|
||||
/* prepare_shrink_wrap is sensitive to the block structure of the control
|
||||
flow graph, so clean it up first. */
|
||||
@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void)
|
||||
Fix that up. */
|
||||
fixup_partitions ();
|
||||
|
||||
+ /* After prologue and epilogue generation, the judgement on whether
|
||||
+ one memory access onto stack frame may trap or not could change,
|
||||
+ since we get more exact stack information by now. So try to
|
||||
+ remove any EH edges here, see PR90259. */
|
||||
+ if (fun->can_throw_non_call_exceptions)
|
||||
+ purge_all_dead_edges ();
|
||||
+
|
||||
/* Shrink-wrapping can result in unreachable edges in the epilogue,
|
||||
see PR57320. */
|
||||
cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
|
||||
@@ -6614,9 +6621,9 @@ public:
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
- virtual unsigned int execute (function *)
|
||||
+ unsigned int execute (function * fun) final override
|
||||
{
|
||||
- return rest_of_handle_thread_prologue_and_epilogue ();
|
||||
+ return rest_of_handle_thread_prologue_and_epilogue (fun);
|
||||
}
|
||||
|
||||
}; // class pass_thread_prologue_and_epilogue
|
||||
diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C
|
||||
new file mode 100644
|
||||
index 000000000..db75ac7fe
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/g++.target/powerpc/pr90259.C
|
||||
@@ -0,0 +1,103 @@
|
||||
+/* { dg-require-effective-target long_double_ibm128 } */
|
||||
+/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */
|
||||
+/* { dg-add-options long_double_ibm128 } */
|
||||
+
|
||||
+/* Verify there is no ICE. */
|
||||
+
|
||||
+template <int a> struct b
|
||||
+{
|
||||
+ static constexpr int c = a;
|
||||
+};
|
||||
+template <bool a> using d = b<a>;
|
||||
+struct e
|
||||
+{
|
||||
+ int f;
|
||||
+ int
|
||||
+ g ()
|
||||
+ {
|
||||
+ return __builtin_ceil (f / (long double) h);
|
||||
+ }
|
||||
+ float h;
|
||||
+};
|
||||
+template <typename, typename> using k = d<!bool ()>;
|
||||
+template <typename> class n
|
||||
+{
|
||||
+public:
|
||||
+ e ae;
|
||||
+ void af ();
|
||||
+};
|
||||
+template <typename l>
|
||||
+void
|
||||
+n<l>::af ()
|
||||
+{
|
||||
+ ae.g ();
|
||||
+}
|
||||
+template <bool> using m = int;
|
||||
+template <typename ag, typename ah, typename ai = m<k<ag, ah>::c>>
|
||||
+using aj = n<ai>;
|
||||
+struct o
|
||||
+{
|
||||
+ void
|
||||
+ af ()
|
||||
+ {
|
||||
+ al.af ();
|
||||
+ }
|
||||
+ aj<int, int> al;
|
||||
+};
|
||||
+template <typename> class am;
|
||||
+template <typename i> class ao
|
||||
+{
|
||||
+protected:
|
||||
+ static i *ap (int);
|
||||
+};
|
||||
+template <typename, typename> class p;
|
||||
+template <typename ar, typename i, typename... j> class p<ar (j...), i> : ao<i>
|
||||
+{
|
||||
+public:
|
||||
+ static ar
|
||||
+ as (const int &p1, j...)
|
||||
+ {
|
||||
+ (*ao<i>::ap (p1)) (j ()...);
|
||||
+ }
|
||||
+};
|
||||
+template <typename ar, typename... j> class am<ar (j...)>
|
||||
+{
|
||||
+ template <typename, typename> using av = int;
|
||||
+
|
||||
+public:
|
||||
+ template <typename i, typename = av<d<!bool ()>, void>,
|
||||
+ typename = av<i, void>>
|
||||
+ am (i);
|
||||
+ using aw = ar (*) (const int &, j...);
|
||||
+ aw ax;
|
||||
+};
|
||||
+template <typename ar, typename... j>
|
||||
+template <typename i, typename, typename>
|
||||
+am<ar (j...)>::am (i)
|
||||
+{
|
||||
+ ax = p<ar (j...), i>::as;
|
||||
+}
|
||||
+struct G
|
||||
+{
|
||||
+ void ba (am<void (o)>);
|
||||
+};
|
||||
+struct q
|
||||
+{
|
||||
+ q ()
|
||||
+ {
|
||||
+ G a;
|
||||
+ a.ba (r ());
|
||||
+ }
|
||||
+ struct r
|
||||
+ {
|
||||
+ void
|
||||
+ operator() (o p1)
|
||||
+ try
|
||||
+ {
|
||||
+ p1.af ();
|
||||
+ }
|
||||
+ catch (int)
|
||||
+ {
|
||||
+ }
|
||||
+ };
|
||||
+} s;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,71 @@
|
||||
From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001
|
||||
From: Andrew Pinski <apinski@marvell.com>
|
||||
Date: Thu, 17 Nov 2022 22:03:08 +0000
|
||||
Subject: [PATCH 047/157] [Backport][SME] Fix PR middle-end/107705: ICE after
|
||||
reclaration error
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e
|
||||
|
||||
The problem here is after we created a call expression
|
||||
in the C front-end, we replace the decl type with
|
||||
an error mark node. We then end up calling
|
||||
aggregate_value_p with the call expression
|
||||
with the decl with the error mark as the type
|
||||
and we ICE.
|
||||
|
||||
The fix is to check the function type
|
||||
after we process the call expression inside
|
||||
aggregate_value_p to get it.
|
||||
|
||||
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
|
||||
|
||||
Thanks,
|
||||
Andrew Pinski
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR middle-end/107705
|
||||
* function.cc (aggregate_value_p): Return 0 if
|
||||
the function type was an error operand.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.dg/redecl-22.c: New test.
|
||||
---
|
||||
gcc/function.cc | 3 +++
|
||||
gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++
|
||||
2 files changed, 12 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c
|
||||
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index 28de39dd6..99aa738eb 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
if (VOID_TYPE_P (type))
|
||||
return 0;
|
||||
|
||||
+ if (error_operand_p (fntype))
|
||||
+ return 0;
|
||||
+
|
||||
/* If a record should be passed the same as its first (and only) member
|
||||
don't pass it as an aggregate. */
|
||||
if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type))
|
||||
diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c
|
||||
new file mode 100644
|
||||
index 000000000..7758570fa
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/redecl-22.c
|
||||
@@ -0,0 +1,9 @@
|
||||
+/* We used to ICE in the gimplifier, PR 107705 */
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w" } */
|
||||
+int f (void)
|
||||
+{
|
||||
+ int (*p) (void) = 0; // { dg-note "" }
|
||||
+ return p ();
|
||||
+ int p = 1; // { dg-error "" }
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
351
0140-Backport-SME-function-Change-return-type-of-predicat.patch
Normal file
351
0140-Backport-SME-function-Change-return-type-of-predicat.patch
Normal file
@ -0,0 +1,351 @@
|
||||
From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001
|
||||
From: Uros Bizjak <ubizjak@gmail.com>
|
||||
Date: Wed, 21 Jun 2023 21:55:30 +0200
|
||||
Subject: [PATCH 048/157] [Backport][SME] function: Change return type of
|
||||
predicate function from int to bool
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086
|
||||
|
||||
Also change some internal variables to bool and some functions to void.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* function.h (emit_initial_value_sets):
|
||||
Change return type from int to void.
|
||||
(aggregate_value_p): Change return type from int to bool.
|
||||
(prologue_contains): Ditto.
|
||||
(epilogue_contains): Ditto.
|
||||
(prologue_epilogue_contains): Ditto.
|
||||
* function.cc (temp_slot): Make "in_use" variable bool.
|
||||
(make_slot_available): Update for changed "in_use" variable.
|
||||
(assign_stack_temp_for_type): Ditto.
|
||||
(emit_initial_value_sets): Change return type from int to void
|
||||
and update function body accordingly.
|
||||
(instantiate_virtual_regs): Ditto.
|
||||
(rest_of_handle_thread_prologue_and_epilogue): Ditto.
|
||||
(safe_insn_predicate): Change return type from int to bool.
|
||||
(aggregate_value_p): Change return type from int to bool
|
||||
and update function body accordingly.
|
||||
(prologue_contains): Change return type from int to bool.
|
||||
(prologue_epilogue_contains): Ditto.
|
||||
---
|
||||
gcc/function.cc | 77 ++++++++++++++++++++++++-------------------------
|
||||
gcc/function.h | 10 +++----
|
||||
2 files changed, 42 insertions(+), 45 deletions(-)
|
||||
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index 99aa738eb..fc8eb5812 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -578,8 +578,8 @@ public:
|
||||
tree type;
|
||||
/* The alignment (in bits) of the slot. */
|
||||
unsigned int align;
|
||||
- /* Nonzero if this temporary is currently in use. */
|
||||
- char in_use;
|
||||
+ /* True if this temporary is currently in use. */
|
||||
+ bool in_use;
|
||||
/* Nesting level at which this slot is being used. */
|
||||
int level;
|
||||
/* The offset of the slot from the frame_pointer, including extra space
|
||||
@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp)
|
||||
{
|
||||
cut_slot_from_list (temp, temp_slots_at_level (temp->level));
|
||||
insert_slot_to_list (temp, &avail_temp_slots);
|
||||
- temp->in_use = 0;
|
||||
+ temp->in_use = false;
|
||||
temp->level = -1;
|
||||
n_temp_slots_in_use--;
|
||||
}
|
||||
@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
|
||||
if (known_ge (best_p->size - rounded_size, alignment))
|
||||
{
|
||||
p = ggc_alloc<temp_slot> ();
|
||||
- p->in_use = 0;
|
||||
+ p->in_use = false;
|
||||
p->size = best_p->size - rounded_size;
|
||||
p->base_offset = best_p->base_offset + rounded_size;
|
||||
p->full_size = best_p->full_size - rounded_size;
|
||||
@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
|
||||
}
|
||||
|
||||
p = selected;
|
||||
- p->in_use = 1;
|
||||
+ p->in_use = true;
|
||||
p->type = type;
|
||||
p->level = temp_slot_level;
|
||||
n_temp_slots_in_use++;
|
||||
@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno)
|
||||
return NULL_RTX;
|
||||
}
|
||||
|
||||
-unsigned int
|
||||
+void
|
||||
emit_initial_value_sets (void)
|
||||
{
|
||||
struct initial_value_struct *ivs = crtl->hard_reg_initial_vals;
|
||||
@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void)
|
||||
rtx_insn *seq;
|
||||
|
||||
if (ivs == 0)
|
||||
- return 0;
|
||||
+ return;
|
||||
|
||||
start_sequence ();
|
||||
for (i = 0; i < ivs->num_entries; i++)
|
||||
@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void)
|
||||
end_sequence ();
|
||||
|
||||
emit_insn_at_entry (seq);
|
||||
- return 0;
|
||||
}
|
||||
|
||||
/* Return the hardreg-pseudoreg initial values pair entry I and
|
||||
@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc)
|
||||
/* A subroutine of instantiate_virtual_regs_in_insn. Return true if X
|
||||
matches the predicate for insn CODE operand OPERAND. */
|
||||
|
||||
-static int
|
||||
+static bool
|
||||
safe_insn_predicate (int code, int operand, rtx x)
|
||||
{
|
||||
return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x);
|
||||
@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl)
|
||||
/* Pass through the INSNS of function FNDECL and convert virtual register
|
||||
references to hard register references. */
|
||||
|
||||
-static unsigned int
|
||||
+static void
|
||||
instantiate_virtual_regs (void)
|
||||
{
|
||||
rtx_insn *insn;
|
||||
@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void)
|
||||
/* Indicate that, from now on, assign_stack_local should use
|
||||
frame_pointer_rtx. */
|
||||
virtuals_instantiated = 1;
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
@@ -2031,7 +2028,8 @@ public:
|
||||
/* opt_pass methods: */
|
||||
virtual unsigned int execute (function *)
|
||||
{
|
||||
- return instantiate_virtual_regs ();
|
||||
+ instantiate_virtual_regs ();
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
}; // class pass_instantiate_virtual_regs
|
||||
@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt)
|
||||
}
|
||||
|
||||
|
||||
-/* Return 1 if EXP is an aggregate type (or a value with aggregate type).
|
||||
+/* Return true if EXP is an aggregate type (or a value with aggregate type).
|
||||
This means a type for which function calls must pass an address to the
|
||||
function or get an address back from the function.
|
||||
EXP may be a type node or an expression (whose type is tested). */
|
||||
|
||||
-int
|
||||
+bool
|
||||
aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
{
|
||||
const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp);
|
||||
@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
else
|
||||
/* For internal functions, assume nothing needs to be
|
||||
returned in memory. */
|
||||
- return 0;
|
||||
+ return false;
|
||||
}
|
||||
break;
|
||||
case FUNCTION_DECL:
|
||||
@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
}
|
||||
|
||||
if (VOID_TYPE_P (type))
|
||||
- return 0;
|
||||
+ return false;
|
||||
|
||||
if (error_operand_p (fntype))
|
||||
- return 0;
|
||||
+ return false;
|
||||
|
||||
/* If a record should be passed the same as its first (and only) member
|
||||
don't pass it as an aggregate. */
|
||||
@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
reference, do so. */
|
||||
if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL)
|
||||
&& DECL_BY_REFERENCE (exp))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
/* Function types that are TREE_ADDRESSABLE force return in memory. */
|
||||
if (fntype && TREE_ADDRESSABLE (fntype))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
/* Types that are TREE_ADDRESSABLE must be constructed in memory,
|
||||
and thus can't be returned in registers. */
|
||||
if (TREE_ADDRESSABLE (type))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
if (TYPE_EMPTY_P (type))
|
||||
- return 0;
|
||||
+ return false;
|
||||
|
||||
if (flag_pcc_struct_return && AGGREGATE_TYPE_P (type))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
if (targetm.calls.return_in_memory (type, fntype))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
/* Make sure we have suitable call-clobbered regs to return
|
||||
the value in; if not, we must return it in memory. */
|
||||
@@ -2129,7 +2127,7 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
/* If we have something other than a REG (e.g. a PARALLEL), then assume
|
||||
it is OK. */
|
||||
if (!REG_P (reg))
|
||||
- return 0;
|
||||
+ return false;
|
||||
|
||||
/* Use the default ABI if the type of the function isn't known.
|
||||
The scheme for handling interoperability between different ABIs
|
||||
@@ -2142,9 +2140,9 @@ aggregate_value_p (const_tree exp, const_tree fntype)
|
||||
nregs = hard_regno_nregs (regno, TYPE_MODE (type));
|
||||
for (i = 0; i < nregs; i++)
|
||||
if (!fixed_regs[regno + i] && !abi.clobbers_full_reg_p (regno + i))
|
||||
- return 1;
|
||||
+ return true;
|
||||
|
||||
- return 0;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Return true if we should assign DECL a pseudo register; false if it
|
||||
@@ -5741,26 +5739,26 @@ contains (const rtx_insn *insn, hash_table<insn_cache_hasher> *hash)
|
||||
return hash->find (const_cast<rtx_insn *> (insn)) != NULL;
|
||||
}
|
||||
|
||||
-int
|
||||
+bool
|
||||
prologue_contains (const rtx_insn *insn)
|
||||
{
|
||||
return contains (insn, prologue_insn_hash);
|
||||
}
|
||||
|
||||
-int
|
||||
+bool
|
||||
epilogue_contains (const rtx_insn *insn)
|
||||
{
|
||||
return contains (insn, epilogue_insn_hash);
|
||||
}
|
||||
|
||||
-int
|
||||
+bool
|
||||
prologue_epilogue_contains (const rtx_insn *insn)
|
||||
{
|
||||
if (contains (insn, prologue_insn_hash))
|
||||
- return 1;
|
||||
+ return true;
|
||||
if (contains (insn, epilogue_insn_hash))
|
||||
- return 1;
|
||||
- return 0;
|
||||
+ return true;
|
||||
+ return false;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -6386,14 +6384,13 @@ current_function_name (void)
|
||||
}
|
||||
|
||||
|
||||
-static unsigned int
|
||||
+static void
|
||||
rest_of_handle_check_leaf_regs (void)
|
||||
{
|
||||
#ifdef LEAF_REGISTERS
|
||||
crtl->uses_only_leaf_regs
|
||||
= optimize > 0 && only_leaf_regs_used () && leaf_function_p ();
|
||||
#endif
|
||||
- return 0;
|
||||
}
|
||||
|
||||
/* Insert a TYPE into the used types hash table of CFUN. */
|
||||
@@ -6518,7 +6515,8 @@ public:
|
||||
/* opt_pass methods: */
|
||||
virtual unsigned int execute (function *)
|
||||
{
|
||||
- return rest_of_handle_check_leaf_regs ();
|
||||
+ rest_of_handle_check_leaf_regs ();
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
}; // class pass_leaf_regs
|
||||
@@ -6531,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
|
||||
return new pass_leaf_regs (ctxt);
|
||||
}
|
||||
|
||||
-static unsigned int
|
||||
+static void
|
||||
rest_of_handle_thread_prologue_and_epilogue (function *fun)
|
||||
{
|
||||
/* prepare_shrink_wrap is sensitive to the block structure of the control
|
||||
@@ -6563,8 +6561,6 @@ rest_of_handle_thread_prologue_and_epilogue (function *fun)
|
||||
/* The stack usage info is finalized during prologue expansion. */
|
||||
if (flag_stack_usage_info || flag_callgraph_info)
|
||||
output_stack_usage ();
|
||||
-
|
||||
- return 0;
|
||||
}
|
||||
|
||||
/* Record a final call to CALLEE at LOCATION. */
|
||||
@@ -6626,7 +6622,8 @@ public:
|
||||
/* opt_pass methods: */
|
||||
unsigned int execute (function * fun) final override
|
||||
{
|
||||
- return rest_of_handle_thread_prologue_and_epilogue (fun);
|
||||
+ rest_of_handle_thread_prologue_and_epilogue (fun);
|
||||
+ return 0;
|
||||
}
|
||||
|
||||
}; // class pass_thread_prologue_and_epilogue
|
||||
diff --git a/gcc/function.h b/gcc/function.h
|
||||
index a53fb24d2..4e8131706 100644
|
||||
--- a/gcc/function.h
|
||||
+++ b/gcc/function.h
|
||||
@@ -653,11 +653,11 @@ extern rtx get_hard_reg_initial_val (machine_mode, unsigned int);
|
||||
extern rtx has_hard_reg_initial_val (machine_mode, unsigned int);
|
||||
|
||||
/* Called from gimple_expand_cfg. */
|
||||
-extern unsigned int emit_initial_value_sets (void);
|
||||
+extern void emit_initial_value_sets (void);
|
||||
|
||||
extern bool initial_value_entry (int i, rtx *, rtx *);
|
||||
extern void instantiate_decl_rtl (rtx x);
|
||||
-extern int aggregate_value_p (const_tree, const_tree);
|
||||
+extern bool aggregate_value_p (const_tree, const_tree);
|
||||
extern bool use_register_for_decl (const_tree);
|
||||
extern gimple_seq gimplify_parameters (gimple_seq *);
|
||||
extern void locate_and_pad_parm (machine_mode, tree, int, int, int,
|
||||
@@ -698,9 +698,9 @@ extern void clobber_return_register (void);
|
||||
extern void expand_function_end (void);
|
||||
extern rtx get_arg_pointer_save_area (void);
|
||||
extern void maybe_copy_prologue_epilogue_insn (rtx, rtx);
|
||||
-extern int prologue_contains (const rtx_insn *);
|
||||
-extern int epilogue_contains (const rtx_insn *);
|
||||
-extern int prologue_epilogue_contains (const rtx_insn *);
|
||||
+extern bool prologue_contains (const rtx_insn *);
|
||||
+extern bool epilogue_contains (const rtx_insn *);
|
||||
+extern bool prologue_epilogue_contains (const rtx_insn *);
|
||||
extern void record_prologue_seq (rtx_insn *);
|
||||
extern void record_epilogue_seq (rtx_insn *);
|
||||
extern void emit_return_into_block (bool simple_p, basic_block bb);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
233
0141-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
Normal file
233
0141-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
Normal file
@ -0,0 +1,233 @@
|
||||
From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 09:28:46 +0000
|
||||
Subject: [PATCH 049/157] [Backport][SME] Allow prologues and epilogues to be
|
||||
inserted later
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702
|
||||
|
||||
Arm's SME adds a new processor mode called streaming mode.
|
||||
This mode enables some new (matrix-oriented) instructions and
|
||||
disables several existing groups of instructions, such as most
|
||||
Advanced SIMD vector instructions and a much smaller set of SVE
|
||||
instructions. It can also change the current vector length.
|
||||
|
||||
There are instructions to switch in and out of streaming mode.
|
||||
However, their effect on the ISA and vector length can't be represented
|
||||
directly in RTL, so they need to be emitted late in the pass pipeline,
|
||||
close to md_reorg.
|
||||
|
||||
It's sometimes the responsibility of the prologue and epilogue to
|
||||
switch modes, which means we need to emit the prologue and epilogue
|
||||
sequences late as well. (This loses shrink-wrapping and scheduling
|
||||
opportunities, but that's a price worth paying.)
|
||||
|
||||
This patch therefore adds a target hook for forcing prologue
|
||||
and epilogue insertion to happen later in the pipeline.
|
||||
|
||||
gcc/
|
||||
* target.def (use_late_prologue_epilogue): New hook.
|
||||
* doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* passes.def (pass_late_thread_prologue_and_epilogue): New pass.
|
||||
* tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare.
|
||||
* function.cc (pass_thread_prologue_and_epilogue::gate): New function.
|
||||
(pass_data_late_thread_prologue_and_epilogue): New pass variable.
|
||||
(pass_late_thread_prologue_and_epilogue): New pass class.
|
||||
(make_pass_late_thread_prologue_and_epilogue): New function.
|
||||
---
|
||||
gcc/doc/tm.texi | 19 ++++++++++++++++++
|
||||
gcc/doc/tm.texi.in | 2 ++
|
||||
gcc/function.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
gcc/passes.def | 3 +++
|
||||
gcc/target.def | 21 +++++++++++++++++++
|
||||
gcc/tree-pass.h | 2 ++
|
||||
6 files changed, 97 insertions(+)
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 5f0972356..d930d233d 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
|
||||
to by @var{ce_info}.
|
||||
@end defmac
|
||||
|
||||
+@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE ()
|
||||
+Return true if the current function's prologue and epilogue should
|
||||
+be emitted late in the pass pipeline, instead of at the usual point.
|
||||
+
|
||||
+Normally, the prologue and epilogue sequences are introduced soon after
|
||||
+register allocation is complete. The advantage of this approach is that
|
||||
+it allows the prologue and epilogue instructions to be optimized and
|
||||
+scheduled with other code in the function. However, some targets
|
||||
+require the prologue and epilogue to be the first and last sequences
|
||||
+executed by the function, with no variation allowed. This hook should
|
||||
+return true on such targets.
|
||||
+
|
||||
+The default implementation returns false, which is correct for most
|
||||
+targets. The hook should only return true if there is a specific
|
||||
+target limitation that cannot be described in RTL. For example,
|
||||
+the hook might return true if the prologue and epilogue need to switch
|
||||
+between instruction sets.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
|
||||
If non-null, this hook performs a target-specific pass over the
|
||||
instruction stream. The compiler will run it at all optimization levels,
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index fcab21744..19eabec48 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
|
||||
to by @var{ce_info}.
|
||||
@end defmac
|
||||
|
||||
+@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
|
||||
+
|
||||
@hook TARGET_MACHINE_DEPENDENT_REORG
|
||||
|
||||
@hook TARGET_INIT_BUILTINS
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index fc8eb5812..7c90b5f23 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "function-abi.h"
|
||||
#include "value-range.h"
|
||||
#include "gimple-range.h"
|
||||
+#include "insn-attr.h"
|
||||
|
||||
/* So we can assign to cfun in this file. */
|
||||
#undef cfun
|
||||
@@ -6620,6 +6621,11 @@ public:
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
+ bool gate (function *) final override
|
||||
+ {
|
||||
+ return !targetm.use_late_prologue_epilogue ();
|
||||
+ }
|
||||
+
|
||||
unsigned int execute (function * fun) final override
|
||||
{
|
||||
rest_of_handle_thread_prologue_and_epilogue (fun);
|
||||
@@ -6628,6 +6634,44 @@ public:
|
||||
|
||||
}; // class pass_thread_prologue_and_epilogue
|
||||
|
||||
+const pass_data pass_data_late_thread_prologue_and_epilogue =
|
||||
+{
|
||||
+ RTL_PASS, /* type */
|
||||
+ "late_pro_and_epilogue", /* name */
|
||||
+ OPTGROUP_NONE, /* optinfo_flags */
|
||||
+ TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */
|
||||
+ 0, /* properties_required */
|
||||
+ 0, /* properties_provided */
|
||||
+ 0, /* properties_destroyed */
|
||||
+ 0, /* todo_flags_start */
|
||||
+ ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
|
||||
+};
|
||||
+
|
||||
+class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass
|
||||
+{
|
||||
+public:
|
||||
+ pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
|
||||
+ : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt)
|
||||
+ {}
|
||||
+
|
||||
+ /* opt_pass methods: */
|
||||
+ bool gate (function *) final override
|
||||
+ {
|
||||
+ return targetm.use_late_prologue_epilogue ();
|
||||
+ }
|
||||
+
|
||||
+ unsigned int execute (function *fn) final override
|
||||
+ {
|
||||
+ /* It's not currently possible to have both delay slots and
|
||||
+ late prologue/epilogue, since the latter has to run before
|
||||
+ the former, and the former won't honor whatever restrictions
|
||||
+ the latter is trying to enforce. */
|
||||
+ gcc_assert (!DELAY_SLOTS);
|
||||
+ rest_of_handle_thread_prologue_and_epilogue (fn);
|
||||
+ return 0;
|
||||
+ }
|
||||
+}; // class pass_late_thread_prologue_and_epilogue
|
||||
+
|
||||
} // anon namespace
|
||||
|
||||
rtl_opt_pass *
|
||||
@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt)
|
||||
return new pass_thread_prologue_and_epilogue (ctxt);
|
||||
}
|
||||
|
||||
+rtl_opt_pass *
|
||||
+make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
|
||||
+{
|
||||
+ return new pass_late_thread_prologue_and_epilogue (ctxt);
|
||||
+}
|
||||
+
|
||||
namespace {
|
||||
|
||||
const pass_data pass_data_zero_call_used_regs =
|
||||
diff --git a/gcc/passes.def b/gcc/passes.def
|
||||
index cdc600298..8797f166f 100644
|
||||
--- a/gcc/passes.def
|
||||
+++ b/gcc/passes.def
|
||||
@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
NEXT_PASS (pass_stack_regs_run);
|
||||
POP_INSERT_PASSES ()
|
||||
POP_INSERT_PASSES ()
|
||||
+ NEXT_PASS (pass_late_thread_prologue_and_epilogue);
|
||||
+ /* No target-independent code motion is allowed beyond this point,
|
||||
+ excepting the legacy delayed-branch pass. */
|
||||
NEXT_PASS (pass_late_compilation);
|
||||
PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
|
||||
NEXT_PASS (pass_zero_call_used_regs);
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index 4d77c1523..fd4899612 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.",
|
||||
machine_mode, (machine_mode m1, machine_mode m2),
|
||||
default_cc_modes_compatible)
|
||||
|
||||
+DEFHOOK
|
||||
+(use_late_prologue_epilogue,
|
||||
+ "Return true if the current function's prologue and epilogue should\n\
|
||||
+be emitted late in the pass pipeline, instead of at the usual point.\n\
|
||||
+\n\
|
||||
+Normally, the prologue and epilogue sequences are introduced soon after\n\
|
||||
+register allocation is complete. The advantage of this approach is that\n\
|
||||
+it allows the prologue and epilogue instructions to be optimized and\n\
|
||||
+scheduled with other code in the function. However, some targets\n\
|
||||
+require the prologue and epilogue to be the first and last sequences\n\
|
||||
+executed by the function, with no variation allowed. This hook should\n\
|
||||
+return true on such targets.\n\
|
||||
+\n\
|
||||
+The default implementation returns false, which is correct for most\n\
|
||||
+targets. The hook should only return true if there is a specific\n\
|
||||
+target limitation that cannot be described in RTL. For example,\n\
|
||||
+the hook might return true if the prologue and epilogue need to switch\n\
|
||||
+between instruction sets.",
|
||||
+ bool, (),
|
||||
+ hook_bool_void_false)
|
||||
+
|
||||
/* Do machine-dependent code transformations. Called just before
|
||||
delayed-branch scheduling. */
|
||||
DEFHOOK
|
||||
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||
index 34e60bc38..1c983ef71 100644
|
||||
--- a/gcc/tree-pass.h
|
||||
+++ b/gcc/tree-pass.h
|
||||
@@ -612,6 +612,8 @@ extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
|
||||
*ctxt);
|
||||
+extern rtl_opt_pass *make_pass_late_thread_prologue_and_epilogue (gcc::context
|
||||
+ *ctxt);
|
||||
extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
239
0142-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
Normal file
239
0142-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
Normal file
@ -0,0 +1,239 @@
|
||||
From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 09:35:57 +0000
|
||||
Subject: [PATCH 050/157] [Backport][SME] Add a target hook for sibcall
|
||||
epilogues
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a
|
||||
|
||||
Epilogues for sibling calls are generated using the
|
||||
sibcall_epilogue pattern. One disadvantage of this approach
|
||||
is that the target doesn't know which call the epilogue is for,
|
||||
even though the code that generates the pattern has the call
|
||||
to hand.
|
||||
|
||||
Although call instructions are currently rtxes, and so could be
|
||||
passed as an operand to the pattern, the main point of introducing
|
||||
rtx_insn was to move towards separating the rtx and insn types
|
||||
(a good thing IMO). There also isn't an existing practice of
|
||||
passing genuine instructions (as opposed to labels) to
|
||||
instruction patterns.
|
||||
|
||||
This patch therefore adds a hook that can be defined as an
|
||||
alternative to sibcall_epilogue. The advantage is that it
|
||||
can be passed the call; the disadvantage is that it can't
|
||||
use .md conveniences like generating instructions from
|
||||
textual patterns (although most epilogues are too complex
|
||||
to benefit much from that anyway).
|
||||
|
||||
gcc/
|
||||
* doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* target.def (emit_epilogue_for_sibcall): New hook.
|
||||
* calls.cc (can_implement_as_sibling_call_p): Use it.
|
||||
* function.cc (thread_prologue_and_epilogue_insns): Likewise.
|
||||
(reposition_prologue_and_epilogue_notes): Likewise.
|
||||
* config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take
|
||||
an rtx_call_insn * rather than a bool.
|
||||
* config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise.
|
||||
(TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define.
|
||||
* config/aarch64/aarch64.md (epilogue): Update call.
|
||||
(sibcall_epilogue): Delete.
|
||||
---
|
||||
gcc/calls.cc | 3 ++-
|
||||
gcc/config/aarch64/aarch64-protos.h | 2 +-
|
||||
gcc/config/aarch64/aarch64.cc | 11 +++++++----
|
||||
gcc/config/aarch64/aarch64.md | 11 +----------
|
||||
gcc/doc/tm.texi | 8 ++++++++
|
||||
gcc/doc/tm.texi.in | 2 ++
|
||||
gcc/function.cc | 15 +++++++++++++--
|
||||
gcc/target.def | 9 +++++++++
|
||||
8 files changed, 43 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/gcc/calls.cc b/gcc/calls.cc
|
||||
index 4d0bc45be..c1db66883 100644
|
||||
--- a/gcc/calls.cc
|
||||
+++ b/gcc/calls.cc
|
||||
@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp,
|
||||
tree addr,
|
||||
const args_size &args_size)
|
||||
{
|
||||
- if (!targetm.have_sibcall_epilogue ())
|
||||
+ if (!targetm.have_sibcall_epilogue ()
|
||||
+ && !targetm.emit_epilogue_for_sibcall)
|
||||
{
|
||||
maybe_complain_about_tail_call
|
||||
(exp,
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index 86e444a60..97984f3ab 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
|
||||
const char * aarch64_output_probe_stack_range (rtx, rtx);
|
||||
const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
|
||||
void aarch64_err_no_fpadvsimd (machine_mode);
|
||||
-void aarch64_expand_epilogue (bool);
|
||||
+void aarch64_expand_epilogue (rtx_call_insn *);
|
||||
rtx aarch64_ptrue_all (unsigned int);
|
||||
opt_machine_mode aarch64_ptrue_all_mode (rtx);
|
||||
rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index fd1114b52..055b436b1 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void)
|
||||
from a deallocated stack, and we optimize the unwind records by
|
||||
emitting them all together if possible. */
|
||||
void
|
||||
-aarch64_expand_epilogue (bool for_sibcall)
|
||||
+aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
{
|
||||
poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
|
||||
HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
|
||||
@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall)
|
||||
explicitly authenticate.
|
||||
*/
|
||||
if (aarch64_return_address_signing_enabled ()
|
||||
- && (for_sibcall || !TARGET_ARMV8_3))
|
||||
+ && (sibcall || !TARGET_ARMV8_3))
|
||||
{
|
||||
switch (aarch64_ra_sign_key)
|
||||
{
|
||||
@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall)
|
||||
}
|
||||
|
||||
/* Stack adjustment for exception handler. */
|
||||
- if (crtl->calls_eh_return && !for_sibcall)
|
||||
+ if (crtl->calls_eh_return && !sibcall)
|
||||
{
|
||||
/* We need to unwind the stack by the offset computed by
|
||||
EH_RETURN_STACKADJ_RTX. We have already reset the CFA
|
||||
@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall)
|
||||
}
|
||||
|
||||
emit_use (gen_rtx_REG (DImode, LR_REGNUM));
|
||||
- if (!for_sibcall)
|
||||
+ if (!sibcall)
|
||||
emit_jump_insn (ret_rtx);
|
||||
}
|
||||
|
||||
@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p
|
||||
#undef TARGET_HAVE_SHADOW_CALL_STACK
|
||||
#define TARGET_HAVE_SHADOW_CALL_STACK true
|
||||
|
||||
+#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL
|
||||
+#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue
|
||||
+
|
||||
struct gcc_target targetm = TARGET_INITIALIZER;
|
||||
|
||||
#include "gt-aarch64.h"
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 7267a74d6..a78476c8a 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -871,16 +871,7 @@
|
||||
[(clobber (const_int 0))]
|
||||
""
|
||||
"
|
||||
- aarch64_expand_epilogue (false);
|
||||
- DONE;
|
||||
- "
|
||||
-)
|
||||
-
|
||||
-(define_expand "sibcall_epilogue"
|
||||
- [(clobber (const_int 0))]
|
||||
- ""
|
||||
- "
|
||||
- aarch64_expand_epilogue (true);
|
||||
+ aarch64_expand_epilogue (nullptr);
|
||||
DONE;
|
||||
"
|
||||
)
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index d930d233d..369f4b8da 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch
|
||||
between instruction sets.
|
||||
@end deftypefn
|
||||
|
||||
+@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call})
|
||||
+If defined, this hook emits an epilogue sequence for sibling (tail)
|
||||
+call instruction @var{call}. Another way of providing epilogues
|
||||
+for sibling calls is to define the @code{sibcall_epilogue} instruction
|
||||
+pattern; the main advantage of this hook over the pattern is that it
|
||||
+has access to the call instruction.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
|
||||
If non-null, this hook performs a target-specific pass over the
|
||||
instruction stream. The compiler will run it at all optimization levels,
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index 19eabec48..748b0777a 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -7710,6 +7710,8 @@ to by @var{ce_info}.
|
||||
|
||||
@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
|
||||
|
||||
+@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL
|
||||
+
|
||||
@hook TARGET_MACHINE_DEPENDENT_REORG
|
||||
|
||||
@hook TARGET_INIT_BUILTINS
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index 7c90b5f23..ddab43ca4 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void)
|
||||
if (!(CALL_P (insn) && SIBLING_CALL_P (insn)))
|
||||
continue;
|
||||
|
||||
- if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ())
|
||||
+ rtx_insn *ep_seq;
|
||||
+ if (targetm.emit_epilogue_for_sibcall)
|
||||
+ {
|
||||
+ start_sequence ();
|
||||
+ targetm.emit_epilogue_for_sibcall (as_a<rtx_call_insn *> (insn));
|
||||
+ ep_seq = get_insns ();
|
||||
+ end_sequence ();
|
||||
+ }
|
||||
+ else
|
||||
+ ep_seq = targetm.gen_sibcall_epilogue ();
|
||||
+ if (ep_seq)
|
||||
{
|
||||
start_sequence ();
|
||||
emit_note (NOTE_INSN_EPILOGUE_BEG);
|
||||
@@ -6259,7 +6269,8 @@ reposition_prologue_and_epilogue_notes (void)
|
||||
{
|
||||
if (!targetm.have_prologue ()
|
||||
&& !targetm.have_epilogue ()
|
||||
- && !targetm.have_sibcall_epilogue ())
|
||||
+ && !targetm.have_sibcall_epilogue ()
|
||||
+ && !targetm.emit_epilogue_for_sibcall)
|
||||
return;
|
||||
|
||||
/* Since the hash table is created on demand, the fact that it is
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index fd4899612..cf9f96eba 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -4141,6 +4141,15 @@ between instruction sets.",
|
||||
bool, (),
|
||||
hook_bool_void_false)
|
||||
|
||||
+DEFHOOK
|
||||
+(emit_epilogue_for_sibcall,
|
||||
+ "If defined, this hook emits an epilogue sequence for sibling (tail)\n\
|
||||
+call instruction @var{call}. Another way of providing epilogues\n\
|
||||
+for sibling calls is to define the @code{sibcall_epilogue} instruction\n\
|
||||
+pattern; the main advantage of this hook over the pattern is that it\n\
|
||||
+has access to the call instruction.",
|
||||
+ void, (rtx_call_insn *call), NULL)
|
||||
+
|
||||
/* Do machine-dependent code transformations. Called just before
|
||||
delayed-branch scheduling. */
|
||||
DEFHOOK
|
||||
--
|
||||
2.33.0
|
||||
|
||||
461
0143-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
Normal file
461
0143-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
Normal file
@ -0,0 +1,461 @@
|
||||
From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 09:44:52 +0000
|
||||
Subject: [PATCH 051/157] [Backport][SME] Add a new target hook:
|
||||
TARGET_START_CALL_ARGS
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2
|
||||
|
||||
We have the following two hooks into the call expansion code:
|
||||
|
||||
- TARGET_CALL_ARGS is called for each argument before arguments
|
||||
are moved into hard registers.
|
||||
|
||||
- TARGET_END_CALL_ARGS is called after the end of the call
|
||||
sequence (specifically, after any return value has been
|
||||
moved to a pseudo).
|
||||
|
||||
This patch adds a TARGET_START_CALL_ARGS hook that is called before
|
||||
the TARGET_CALL_ARGS sequence. This means that TARGET_START_CALL_REGS
|
||||
and TARGET_END_CALL_REGS bracket the region in which argument registers
|
||||
might be live. They also bracket a region in which the only call
|
||||
emiitted by target-independent code is the call to the target function
|
||||
itself. (For example, TARGET_START_CALL_ARGS happens after any use of
|
||||
memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any
|
||||
use of memcpy to copy the result.)
|
||||
|
||||
Also, the patch adds the cumulative argument structure as an argument
|
||||
to the hooks, so that the target can use it to record and retrieve
|
||||
information about the call as a whole.
|
||||
|
||||
The TARGET_CALL_ARGS docs said:
|
||||
|
||||
While generating RTL for a function call, this target hook is invoked once
|
||||
for each argument passed to the function, either a register returned by
|
||||
``TARGET_FUNCTION_ARG`` or a memory location. It is called just
|
||||
- before the point where argument registers are stored.
|
||||
|
||||
The last bit was true for normal calls, but for libcalls the hook was
|
||||
invoked earlier, before stack arguments have been copied. I don't think
|
||||
this caused a practical difference for nvptx (the only port to use the
|
||||
hooks) since I wouldn't expect any libcalls to take stack parameters.
|
||||
|
||||
gcc/
|
||||
* doc/tm.texi.in: Add TARGET_START_CALL_ARGS.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* target.def (start_call_args): New hook.
|
||||
(call_args, end_call_args): Add a parameter for the cumulative
|
||||
argument information.
|
||||
* hooks.h (hook_void_rtx_tree): Delete.
|
||||
* hooks.cc (hook_void_rtx_tree): Likewise.
|
||||
* targhooks.h (hook_void_CUMULATIVE_ARGS): Declare.
|
||||
(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
|
||||
* targhooks.cc (hook_void_CUMULATIVE_ARGS): New function.
|
||||
(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
|
||||
* calls.cc (expand_call): Call start_call_args before computing
|
||||
and storing stack parameters. Pass the cumulative argument
|
||||
information to call_args and end_call_args.
|
||||
(emit_library_call_value_1): Likewise.
|
||||
* config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative
|
||||
argument parameter.
|
||||
(nvptx_end_call_args): Likewise.
|
||||
---
|
||||
gcc/calls.cc | 61 +++++++++++++++++++++------------------
|
||||
gcc/config/nvptx/nvptx.cc | 4 +--
|
||||
gcc/doc/tm.texi | 53 +++++++++++++++++++++++++++-------
|
||||
gcc/doc/tm.texi.in | 2 ++
|
||||
gcc/hooks.cc | 5 ----
|
||||
gcc/hooks.h | 1 -
|
||||
gcc/target.def | 59 +++++++++++++++++++++++++++++--------
|
||||
gcc/targhooks.cc | 10 +++++++
|
||||
gcc/targhooks.h | 5 ++--
|
||||
9 files changed, 140 insertions(+), 60 deletions(-)
|
||||
|
||||
diff --git a/gcc/calls.cc b/gcc/calls.cc
|
||||
index c1db66883..4a8535cc6 100644
|
||||
--- a/gcc/calls.cc
|
||||
+++ b/gcc/calls.cc
|
||||
@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore)
|
||||
sibcall_failure = 1;
|
||||
}
|
||||
|
||||
+ /* Set up the next argument register. For sibling calls on machines
|
||||
+ with register windows this should be the incoming register. */
|
||||
+ if (pass == 0)
|
||||
+ next_arg_reg = targetm.calls.function_incoming_arg
|
||||
+ (args_so_far, function_arg_info::end_marker ());
|
||||
+ else
|
||||
+ next_arg_reg = targetm.calls.function_arg
|
||||
+ (args_so_far, function_arg_info::end_marker ());
|
||||
+
|
||||
+ targetm.calls.start_call_args (args_so_far);
|
||||
+
|
||||
bool any_regs = false;
|
||||
for (i = 0; i < num_actuals; i++)
|
||||
if (args[i].reg != NULL_RTX)
|
||||
{
|
||||
any_regs = true;
|
||||
- targetm.calls.call_args (args[i].reg, funtype);
|
||||
+ targetm.calls.call_args (args_so_far, args[i].reg, funtype);
|
||||
}
|
||||
if (!any_regs)
|
||||
- targetm.calls.call_args (pc_rtx, funtype);
|
||||
+ targetm.calls.call_args (args_so_far, pc_rtx, funtype);
|
||||
|
||||
/* Figure out the register where the value, if any, will come back. */
|
||||
valreg = 0;
|
||||
@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore)
|
||||
later safely search backwards to find the CALL_INSN. */
|
||||
before_call = get_last_insn ();
|
||||
|
||||
- /* Set up next argument register. For sibling calls on machines
|
||||
- with register windows this should be the incoming register. */
|
||||
- if (pass == 0)
|
||||
- next_arg_reg = targetm.calls.function_incoming_arg
|
||||
- (args_so_far, function_arg_info::end_marker ());
|
||||
- else
|
||||
- next_arg_reg = targetm.calls.function_arg
|
||||
- (args_so_far, function_arg_info::end_marker ());
|
||||
-
|
||||
if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
|
||||
{
|
||||
int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
|
||||
@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore)
|
||||
for (i = 0; i < num_actuals; ++i)
|
||||
free (args[i].aligned_regs);
|
||||
|
||||
- targetm.calls.end_call_args ();
|
||||
+ targetm.calls.end_call_args (args_so_far);
|
||||
|
||||
insns = get_insns ();
|
||||
end_sequence ();
|
||||
@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
|
||||
}
|
||||
#endif
|
||||
|
||||
- /* When expanding a normal call, args are stored in push order,
|
||||
- which is the reverse of what we have here. */
|
||||
- bool any_regs = false;
|
||||
- for (int i = nargs; i-- > 0; )
|
||||
- if (argvec[i].reg != NULL_RTX)
|
||||
- {
|
||||
- targetm.calls.call_args (argvec[i].reg, NULL_TREE);
|
||||
- any_regs = true;
|
||||
- }
|
||||
- if (!any_regs)
|
||||
- targetm.calls.call_args (pc_rtx, NULL_TREE);
|
||||
+ rtx call_cookie
|
||||
+ = targetm.calls.function_arg (args_so_far,
|
||||
+ function_arg_info::end_marker ());
|
||||
|
||||
/* Push the args that need to be pushed. */
|
||||
|
||||
@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
|
||||
|
||||
fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
|
||||
|
||||
+ targetm.calls.start_call_args (args_so_far);
|
||||
+
|
||||
+ /* When expanding a normal call, args are stored in push order,
|
||||
+ which is the reverse of what we have here. */
|
||||
+ bool any_regs = false;
|
||||
+ for (int i = nargs; i-- > 0; )
|
||||
+ if (argvec[i].reg != NULL_RTX)
|
||||
+ {
|
||||
+ targetm.calls.call_args (args_so_far, argvec[i].reg, NULL_TREE);
|
||||
+ any_regs = true;
|
||||
+ }
|
||||
+ if (!any_regs)
|
||||
+ targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE);
|
||||
+
|
||||
/* Now load any reg parms into their regs. */
|
||||
|
||||
/* ARGNUM indexes the ARGVEC array in the order in which the arguments
|
||||
@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
|
||||
get_identifier (XSTR (orgfun, 0)),
|
||||
build_function_type (tfom, NULL_TREE),
|
||||
original_args_size.constant, args_size.constant,
|
||||
- struct_value_size,
|
||||
- targetm.calls.function_arg (args_so_far,
|
||||
- function_arg_info::end_marker ()),
|
||||
- valreg,
|
||||
+ struct_value_size, call_cookie, valreg,
|
||||
old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
|
||||
|
||||
if (flag_ipa_ra)
|
||||
@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
|
||||
valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg));
|
||||
}
|
||||
|
||||
- targetm.calls.end_call_args ();
|
||||
+ targetm.calls.end_call_args (args_so_far);
|
||||
|
||||
/* For calls to `setjmp', etc., inform function.cc:setjmp_warnings
|
||||
that it should complain if nonvolatile values are live. For
|
||||
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
|
||||
index 3634a49de..7f2103ba6 100644
|
||||
--- a/gcc/config/nvptx/nvptx.cc
|
||||
+++ b/gcc/config/nvptx/nvptx.cc
|
||||
@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void)
|
||||
argument to the next call. */
|
||||
|
||||
static void
|
||||
-nvptx_call_args (rtx arg, tree fntype)
|
||||
+nvptx_call_args (cumulative_args_t, rtx arg, tree fntype)
|
||||
{
|
||||
if (!cfun->machine->doing_call)
|
||||
{
|
||||
@@ -1808,7 +1808,7 @@ nvptx_call_args (rtx arg, tree fntype)
|
||||
information we recorded. */
|
||||
|
||||
static void
|
||||
-nvptx_end_call_args (void)
|
||||
+nvptx_end_call_args (cumulative_args_t)
|
||||
{
|
||||
cfun->machine->doing_call = false;
|
||||
free_EXPR_LIST_list (&cfun->machine->call_args);
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 369f4b8da..357c29a4d 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -5392,26 +5392,59 @@ except the last are treated as named.
|
||||
You need not define this hook if it always returns @code{false}.
|
||||
@end deftypefn
|
||||
|
||||
-@deftypefn {Target Hook} void TARGET_CALL_ARGS (rtx, @var{tree})
|
||||
+@deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t @var{complete_args})
|
||||
+This target hook is invoked while generating RTL for a function call,
|
||||
+after the argument values have been computed, and after stack arguments
|
||||
+have been initialized, but before register arguments have been moved into
|
||||
+their ABI-defined hard register locations. It precedes calls to the related
|
||||
+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.
|
||||
+The significance of this position in the call expansion is that:
|
||||
+
|
||||
+@itemize @bullet
|
||||
+@item
|
||||
+No argument registers are live.
|
||||
+@item
|
||||
+Although a call sequence can in general involve subcalls (such as using
|
||||
+@code{memcpy} to copy large arguments), no such subcall will occur between
|
||||
+the call to this hook and the generation of the main call instruction.
|
||||
+@end itemize
|
||||
+
|
||||
+The single argument @var{complete_args} is the state of the target
|
||||
+function's cumulative argument information after the final call to
|
||||
+@code{TARGET_FUNCTION_ARG}.
|
||||
+
|
||||
+The hook can be used for things like switching processor mode, in cases
|
||||
+where different calls need different processor modes. Most ports do not
|
||||
+need to implement anything for this hook.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} void TARGET_CALL_ARGS (cumulative_args_t @var{complete_args}, rtx @var{loc}, tree @var{type})
|
||||
While generating RTL for a function call, this target hook is invoked once
|
||||
for each argument passed to the function, either a register returned by
|
||||
@code{TARGET_FUNCTION_ARG} or a memory location. It is called just
|
||||
-before the point where argument registers are stored. The type of the
|
||||
-function to be called is also passed as the second argument; it is
|
||||
-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is
|
||||
-invoked just after the code to copy the return reg has been emitted.
|
||||
-This functionality can be used to perform special setup of call argument
|
||||
-registers if a target needs it.
|
||||
+before the point where argument registers are stored.
|
||||
+
|
||||
+@var{complete_args} is the state of the target function's cumulative
|
||||
+argument information after the final call to @code{TARGET_FUNCTION_ARG}.
|
||||
+@var{loc} is the location of the argument. @var{type} is the type of
|
||||
+the function being called, or @code{NULL_TREE} for libcalls.
|
||||
+
|
||||
For functions without arguments, the hook is called once with @code{pc_rtx}
|
||||
passed instead of an argument register.
|
||||
-Most ports do not need to implement anything for this hook.
|
||||
+
|
||||
+This functionality can be used to perform special setup of call argument
|
||||
+registers, if a target needs it. Most ports do not need to implement
|
||||
+anything for this hook.
|
||||
@end deftypefn
|
||||
|
||||
-@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (void)
|
||||
+@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (cumulative_args_t @var{complete_args})
|
||||
This target hook is invoked while generating RTL for a function call,
|
||||
just after the point where the return reg is copied into a pseudo. It
|
||||
signals that all the call argument and return registers for the just
|
||||
-emitted call are now no longer in use.
|
||||
+emitted call are now no longer in use. @var{complete_args} is the
|
||||
+state of the target function's cumulative argument information after
|
||||
+the final call to @code{TARGET_FUNCTION_ARG}.
|
||||
+
|
||||
Most ports do not need to implement anything for this hook.
|
||||
@end deftypefn
|
||||
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index 748b0777a..4ebc9afbf 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -3774,6 +3774,8 @@ These machine description macros help implement varargs:
|
||||
|
||||
@hook TARGET_STRICT_ARGUMENT_NAMING
|
||||
|
||||
+@hook TARGET_START_CALL_ARGS
|
||||
+
|
||||
@hook TARGET_CALL_ARGS
|
||||
|
||||
@hook TARGET_END_CALL_ARGS
|
||||
diff --git a/gcc/hooks.cc b/gcc/hooks.cc
|
||||
index b29233f4f..0f4e7ce10 100644
|
||||
--- a/gcc/hooks.cc
|
||||
+++ b/gcc/hooks.cc
|
||||
@@ -280,11 +280,6 @@ hook_void_FILEptr_tree (FILE *, tree)
|
||||
{
|
||||
}
|
||||
|
||||
-void
|
||||
-hook_void_rtx_tree (rtx, tree)
|
||||
-{
|
||||
-}
|
||||
-
|
||||
void
|
||||
hook_void_constcharptr (const char *)
|
||||
{
|
||||
diff --git a/gcc/hooks.h b/gcc/hooks.h
|
||||
index 1056e1e9e..e2a742f43 100644
|
||||
--- a/gcc/hooks.h
|
||||
+++ b/gcc/hooks.h
|
||||
@@ -83,7 +83,6 @@ extern void hook_void_FILEptr_constcharptr (FILE *, const char *);
|
||||
extern void hook_void_FILEptr_constcharptr_const_tree (FILE *, const char *,
|
||||
const_tree);
|
||||
extern bool hook_bool_FILEptr_rtx_false (FILE *, rtx);
|
||||
-extern void hook_void_rtx_tree (rtx, tree);
|
||||
extern void hook_void_FILEptr_tree (FILE *, tree);
|
||||
extern void hook_void_tree (tree);
|
||||
extern void hook_void_tree_treeptr (tree, tree *);
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index cf9f96eba..a57e51b0d 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -4784,32 +4784,67 @@ not generate any instructions in this case.",
|
||||
int *pretend_args_size, int second_time),
|
||||
default_setup_incoming_varargs)
|
||||
|
||||
+DEFHOOK
|
||||
+(start_call_args,
|
||||
+ "This target hook is invoked while generating RTL for a function call,\n\
|
||||
+after the argument values have been computed, and after stack arguments\n\
|
||||
+have been initialized, but before register arguments have been moved into\n\
|
||||
+their ABI-defined hard register locations. It precedes calls to the related\n\
|
||||
+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.\n\
|
||||
+The significance of this position in the call expansion is that:\n\
|
||||
+\n\
|
||||
+@itemize @bullet\n\
|
||||
+@item\n\
|
||||
+No argument registers are live.\n\
|
||||
+@item\n\
|
||||
+Although a call sequence can in general involve subcalls (such as using\n\
|
||||
+@code{memcpy} to copy large arguments), no such subcall will occur between\n\
|
||||
+the call to this hook and the generation of the main call instruction.\n\
|
||||
+@end itemize\n\
|
||||
+\n\
|
||||
+The single argument @var{complete_args} is the state of the target\n\
|
||||
+function's cumulative argument information after the final call to\n\
|
||||
+@code{TARGET_FUNCTION_ARG}.\n\
|
||||
+\n\
|
||||
+The hook can be used for things like switching processor mode, in cases\n\
|
||||
+where different calls need different processor modes. Most ports do not\n\
|
||||
+need to implement anything for this hook.",
|
||||
+ void, (cumulative_args_t complete_args),
|
||||
+ hook_void_CUMULATIVE_ARGS)
|
||||
+
|
||||
DEFHOOK
|
||||
(call_args,
|
||||
"While generating RTL for a function call, this target hook is invoked once\n\
|
||||
for each argument passed to the function, either a register returned by\n\
|
||||
@code{TARGET_FUNCTION_ARG} or a memory location. It is called just\n\
|
||||
-before the point where argument registers are stored. The type of the\n\
|
||||
-function to be called is also passed as the second argument; it is\n\
|
||||
-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is\n\
|
||||
-invoked just after the code to copy the return reg has been emitted.\n\
|
||||
-This functionality can be used to perform special setup of call argument\n\
|
||||
-registers if a target needs it.\n\
|
||||
+before the point where argument registers are stored.\n\
|
||||
+\n\
|
||||
+@var{complete_args} is the state of the target function's cumulative\n\
|
||||
+argument information after the final call to @code{TARGET_FUNCTION_ARG}.\n\
|
||||
+@var{loc} is the location of the argument. @var{type} is the type of\n\
|
||||
+the function being called, or @code{NULL_TREE} for libcalls.\n\
|
||||
+\n\
|
||||
For functions without arguments, the hook is called once with @code{pc_rtx}\n\
|
||||
passed instead of an argument register.\n\
|
||||
-Most ports do not need to implement anything for this hook.",
|
||||
- void, (rtx, tree),
|
||||
- hook_void_rtx_tree)
|
||||
+\n\
|
||||
+This functionality can be used to perform special setup of call argument\n\
|
||||
+registers, if a target needs it. Most ports do not need to implement\n\
|
||||
+anything for this hook.",
|
||||
+ void, (cumulative_args_t complete_args, rtx loc, tree type),
|
||||
+ hook_void_CUMULATIVE_ARGS_rtx_tree)
|
||||
|
||||
DEFHOOK
|
||||
(end_call_args,
|
||||
"This target hook is invoked while generating RTL for a function call,\n\
|
||||
just after the point where the return reg is copied into a pseudo. It\n\
|
||||
signals that all the call argument and return registers for the just\n\
|
||||
-emitted call are now no longer in use.\n\
|
||||
+emitted call are now no longer in use. @var{complete_args} is the\n\
|
||||
+state of the target function's cumulative argument information after\n\
|
||||
+the final call to @code{TARGET_FUNCTION_ARG}.\n\
|
||||
+\n\
|
||||
Most ports do not need to implement anything for this hook.",
|
||||
- void, (void),
|
||||
- hook_void_void)
|
||||
+ void, (cumulative_args_t complete_args),
|
||||
+ hook_void_CUMULATIVE_ARGS)
|
||||
|
||||
DEFHOOK
|
||||
(push_argument,
|
||||
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
|
||||
index 399d6f874..c88afa5db 100644
|
||||
--- a/gcc/targhooks.cc
|
||||
+++ b/gcc/targhooks.cc
|
||||
@@ -772,12 +772,22 @@ hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t,
|
||||
return 0;
|
||||
}
|
||||
|
||||
+void
|
||||
+hook_void_CUMULATIVE_ARGS (cumulative_args_t)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
void
|
||||
hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED,
|
||||
tree ATTRIBUTE_UNUSED)
|
||||
{
|
||||
}
|
||||
|
||||
+void
|
||||
+hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree)
|
||||
+{
|
||||
+}
|
||||
+
|
||||
/* Default implementation of TARGET_PUSH_ARGUMENT. */
|
||||
|
||||
bool
|
||||
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
|
||||
index ecce55ebe..c6e12fc2e 100644
|
||||
--- a/gcc/targhooks.h
|
||||
+++ b/gcc/targhooks.h
|
||||
@@ -138,8 +138,9 @@ extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true
|
||||
(cumulative_args_t, const function_arg_info &);
|
||||
extern int hook_int_CUMULATIVE_ARGS_arg_info_0
|
||||
(cumulative_args_t, const function_arg_info &);
|
||||
-extern void hook_void_CUMULATIVE_ARGS_tree
|
||||
- (cumulative_args_t, tree);
|
||||
+extern void hook_void_CUMULATIVE_ARGS (cumulative_args_t);
|
||||
+extern void hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t, tree);
|
||||
+extern void hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree);
|
||||
extern const char *hook_invalid_arg_for_unprototyped_fn
|
||||
(const_tree, const_tree, const_tree);
|
||||
extern void default_function_arg_advance
|
||||
--
|
||||
2.33.0
|
||||
|
||||
490
0144-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
Normal file
490
0144-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
Normal file
@ -0,0 +1,490 @@
|
||||
From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 09:52:41 +0000
|
||||
Subject: [PATCH 052/157] [Backport][SME] Allow targets to add USEs to asms
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba
|
||||
|
||||
Arm's SME has an array called ZA that for inline asm purposes
|
||||
is effectively a form of special-purpose memory. It doesn't
|
||||
have an associated storage type and so can't be passed and
|
||||
returned in normal C/C++ objects.
|
||||
|
||||
We'd therefore like "za" in a clobber list to mean that an inline
|
||||
asm can read from and write to ZA. (Just reading or writing
|
||||
individually is unlikely to be useful, but we could add syntax
|
||||
for that too if necessary.)
|
||||
|
||||
There is currently a TARGET_MD_ASM_ADJUST target hook that allows
|
||||
targets to add clobbers to an asm instruction. This patch
|
||||
extends that to allow targets to add USEs as well.
|
||||
|
||||
gcc/
|
||||
* target.def (md_asm_adjust): Add a uses parameter.
|
||||
* doc/tm.texi: Regenerate.
|
||||
* cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust.
|
||||
Handle any USEs created by the target.
|
||||
(expand_asm_stmt): Likewise.
|
||||
* recog.cc (asm_noperands): Handle asms with USEs.
|
||||
(decode_asm_operands): Likewise.
|
||||
* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses
|
||||
parameter.
|
||||
* config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise.
|
||||
* config/arm/arm.cc (thumb1_md_asm_adjust): Likewise.
|
||||
* config/avr/avr.cc (avr_md_asm_adjust): Likewise.
|
||||
* config/cris/cris.cc (cris_md_asm_adjust): Likewise.
|
||||
* config/i386/i386.cc (ix86_md_asm_adjust): Likewise.
|
||||
* config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise.
|
||||
* config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise.
|
||||
* config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise.
|
||||
* config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise.
|
||||
* config/s390/s390.cc (s390_md_asm_adjust): Likewise.
|
||||
* config/vax/vax.cc (vax_md_asm_adjust): Likewise.
|
||||
* config/visium/visium.cc (visium_md_asm_adjust): Likewise.
|
||||
---
|
||||
gcc/cfgexpand.cc | 37 +++++++++++++++++++---------
|
||||
gcc/config/arm/aarch-common-protos.h | 2 +-
|
||||
gcc/config/arm/aarch-common.cc | 3 ++-
|
||||
gcc/config/arm/arm.cc | 5 ++--
|
||||
gcc/config/avr/avr.cc | 1 +
|
||||
gcc/config/cris/cris.cc | 6 +++--
|
||||
gcc/config/i386/i386.cc | 5 ++--
|
||||
gcc/config/mn10300/mn10300.cc | 3 ++-
|
||||
gcc/config/nds32/nds32.cc | 4 +--
|
||||
gcc/config/pdp11/pdp11.cc | 6 +++--
|
||||
gcc/config/rs6000/rs6000.cc | 3 ++-
|
||||
gcc/config/s390/s390.cc | 3 ++-
|
||||
gcc/config/vax/vax.cc | 4 ++-
|
||||
gcc/config/visium/visium.cc | 5 ++--
|
||||
gcc/doc/tm.texi | 5 ++--
|
||||
gcc/recog.cc | 20 ++++++++++-----
|
||||
gcc/target.def | 5 ++--
|
||||
17 files changed, 77 insertions(+), 40 deletions(-)
|
||||
|
||||
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
|
||||
index 4691355aa..5401a4ebd 100644
|
||||
--- a/gcc/cfgexpand.cc
|
||||
+++ b/gcc/cfgexpand.cc
|
||||
@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus)
|
||||
auto_vec<rtx> input_rvec, output_rvec;
|
||||
auto_vec<machine_mode> input_mode;
|
||||
auto_vec<const char *> constraints;
|
||||
+ auto_vec<rtx> use_rvec;
|
||||
auto_vec<rtx> clobber_rvec;
|
||||
HARD_REG_SET clobbered_regs;
|
||||
CLEAR_HARD_REG_SET (clobbered_regs);
|
||||
@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus)
|
||||
|
||||
if (targetm.md_asm_adjust)
|
||||
targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
|
||||
- constraints, clobber_rvec, clobbered_regs,
|
||||
- locus);
|
||||
+ constraints, use_rvec, clobber_rvec,
|
||||
+ clobbered_regs, locus);
|
||||
|
||||
asm_op = body;
|
||||
nclobbers = clobber_rvec.length ();
|
||||
- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers));
|
||||
+ auto nuses = use_rvec.length ();
|
||||
+ body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers));
|
||||
|
||||
- XVECEXP (body, 0, 0) = asm_op;
|
||||
- for (i = 0; i < nclobbers; i++)
|
||||
- XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rvec[i]);
|
||||
+ i = 0;
|
||||
+ XVECEXP (body, 0, i++) = asm_op;
|
||||
+ for (rtx use : use_rvec)
|
||||
+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
|
||||
+ for (rtx clobber : clobber_rvec)
|
||||
+ XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber);
|
||||
}
|
||||
|
||||
emit_insn (body);
|
||||
@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt)
|
||||
maintaining source-level compatibility means automatically clobbering
|
||||
the flags register. */
|
||||
rtx_insn *after_md_seq = NULL;
|
||||
+ auto_vec<rtx> use_rvec;
|
||||
if (targetm.md_asm_adjust)
|
||||
after_md_seq
|
||||
= targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
|
||||
- constraints, clobber_rvec, clobbered_regs,
|
||||
- locus);
|
||||
+ constraints, use_rvec, clobber_rvec,
|
||||
+ clobbered_regs, locus);
|
||||
|
||||
/* Do not allow the hook to change the output and input count,
|
||||
lest it mess up the operand numbering. */
|
||||
@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt)
|
||||
gcc_assert (input_rvec.length() == ninputs);
|
||||
gcc_assert (constraints.length() == noutputs + ninputs);
|
||||
|
||||
- /* But it certainly can adjust the clobbers. */
|
||||
+ /* But it certainly can adjust the uses and clobbers. */
|
||||
+ unsigned nuses = use_rvec.length ();
|
||||
unsigned nclobbers = clobber_rvec.length ();
|
||||
|
||||
/* Third pass checks for easy conflicts. */
|
||||
@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt)
|
||||
ARGVEC CONSTRAINTS OPNAMES))
|
||||
If there is more than one, put them inside a PARALLEL. */
|
||||
|
||||
- if (noutputs == 0 && nclobbers == 0)
|
||||
+ if (noutputs == 0 && nuses == 0 && nclobbers == 0)
|
||||
{
|
||||
/* No output operands: put in a raw ASM_OPERANDS rtx. */
|
||||
if (nlabels > 0)
|
||||
@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt)
|
||||
else
|
||||
emit_insn (body);
|
||||
}
|
||||
- else if (noutputs == 1 && nclobbers == 0)
|
||||
+ else if (noutputs == 1 && nuses == 0 && nclobbers == 0)
|
||||
{
|
||||
ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints[0];
|
||||
if (nlabels > 0)
|
||||
@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt)
|
||||
if (num == 0)
|
||||
num = 1;
|
||||
|
||||
- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers));
|
||||
+ body = gen_rtx_PARALLEL (VOIDmode,
|
||||
+ rtvec_alloc (num + nuses + nclobbers));
|
||||
|
||||
/* For each output operand, store a SET. */
|
||||
for (i = 0; i < noutputs; ++i)
|
||||
@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt)
|
||||
if (i == 0)
|
||||
XVECEXP (body, 0, i++) = obody;
|
||||
|
||||
+ /* Add the uses specified by the target hook. No checking should
|
||||
+ be needed since this doesn't come directly from user code. */
|
||||
+ for (rtx use : use_rvec)
|
||||
+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
|
||||
+
|
||||
/* Store (clobber REG) for each clobbered register specified. */
|
||||
for (unsigned j = 0; j < nclobbers; ++j)
|
||||
{
|
||||
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
|
||||
index ae0465159..3b525c174 100644
|
||||
--- a/gcc/config/arm/aarch-common-protos.h
|
||||
+++ b/gcc/config/arm/aarch-common-protos.h
|
||||
@@ -149,7 +149,7 @@ struct cpu_cost_table
|
||||
|
||||
rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> &constraints,
|
||||
+ vec<const char *> &constraints, vec<rtx> &,
|
||||
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
|
||||
location_t loc);
|
||||
|
||||
diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
|
||||
index 04a53d750..365cfc140 100644
|
||||
--- a/gcc/config/arm/aarch-common.cc
|
||||
+++ b/gcc/config/arm/aarch-common.cc
|
||||
@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
|
||||
rtx_insn *
|
||||
arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
|
||||
+ vec<const char *> &constraints,
|
||||
+ vec<rtx> & /*uses*/, vec<rtx> & /*clobbers*/,
|
||||
HARD_REG_SET & /*clobbered_regs*/, location_t loc)
|
||||
{
|
||||
bool saw_asm_flag = false;
|
||||
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
|
||||
index b700c23b8..c72e9c0b0 100644
|
||||
--- a/gcc/config/arm/arm.cc
|
||||
+++ b/gcc/config/arm/arm.cc
|
||||
@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
|
||||
static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
|
||||
vec<machine_mode> &,
|
||||
vec<const char *> &, vec<rtx> &,
|
||||
- HARD_REG_SET &, location_t);
|
||||
+ vec<rtx> &, HARD_REG_SET &, location_t);
|
||||
static const char *arm_identify_fpu_from_isa (sbitmap);
|
||||
|
||||
/* Table of machine attributes. */
|
||||
@@ -34209,7 +34209,8 @@ arm_stack_protect_guard (void)
|
||||
rtx_insn *
|
||||
thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
|
||||
+ vec<const char *> &constraints,
|
||||
+ vec<rtx> &, vec<rtx> & /*clobbers*/,
|
||||
HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
|
||||
{
|
||||
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
|
||||
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
|
||||
index 4ed390e4c..1b5a95410 100644
|
||||
--- a/gcc/config/avr/avr.cc
|
||||
+++ b/gcc/config/avr/avr.cc
|
||||
@@ -14497,6 +14497,7 @@ static rtx_insn *
|
||||
avr_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
vec<const char *> &/*constraints*/,
|
||||
+ vec<rtx> &/*uses*/,
|
||||
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
|
||||
location_t /*loc*/)
|
||||
{
|
||||
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
|
||||
index f0017d630..3a1c85481 100644
|
||||
--- a/gcc/config/cris/cris.cc
|
||||
+++ b/gcc/config/cris/cris.cc
|
||||
@@ -151,7 +151,8 @@ static void cris_function_arg_advance (cumulative_args_t,
|
||||
const function_arg_info &);
|
||||
static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &,
|
||||
vec<machine_mode> &, vec<const char *> &,
|
||||
- vec<rtx> &, HARD_REG_SET &, location_t);
|
||||
+ vec<rtx> &, vec<rtx> &,
|
||||
+ HARD_REG_SET &, location_t);
|
||||
|
||||
static void cris_option_override (void);
|
||||
|
||||
@@ -3506,7 +3507,8 @@ cris_function_arg_advance (cumulative_args_t ca_v,
|
||||
static rtx_insn *
|
||||
cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> &constraints, vec<rtx> &clobbers,
|
||||
+ vec<const char *> &constraints,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
/* For the time being, all asms clobber condition codes.
|
||||
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
|
||||
index 593185fa6..83a0d8abb 100644
|
||||
--- a/gcc/config/i386/i386.cc
|
||||
+++ b/gcc/config/i386/i386.cc
|
||||
@@ -22252,8 +22252,9 @@ ix86_c_mode_for_suffix (char suffix)
|
||||
static rtx_insn *
|
||||
ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> &constraints, vec<rtx> &clobbers,
|
||||
- HARD_REG_SET &clobbered_regs, location_t loc)
|
||||
+ vec<const char *> &constraints, vec<rtx> &/*uses*/,
|
||||
+ vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
|
||||
+ location_t loc)
|
||||
{
|
||||
bool saw_asm_flag = false;
|
||||
|
||||
diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc
|
||||
index 2a58dd925..2ca2c769c 100644
|
||||
--- a/gcc/config/mn10300/mn10300.cc
|
||||
+++ b/gcc/config/mn10300/mn10300.cc
|
||||
@@ -2849,7 +2849,8 @@ mn10300_conditional_register_usage (void)
|
||||
static rtx_insn *
|
||||
mn10300_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
|
||||
+ vec<const char *> & /*constraints*/,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG));
|
||||
diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc
|
||||
index 71fe9e8bc..27530495f 100644
|
||||
--- a/gcc/config/nds32/nds32.cc
|
||||
+++ b/gcc/config/nds32/nds32.cc
|
||||
@@ -4199,8 +4199,8 @@ nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
|
||||
vec<rtx> &inputs ATTRIBUTE_UNUSED,
|
||||
vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
|
||||
vec<const char *> &constraints ATTRIBUTE_UNUSED,
|
||||
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
|
||||
- location_t /*loc*/)
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
+ HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
if (!flag_inline_asm_r15)
|
||||
{
|
||||
diff --git a/gcc/config/pdp11/pdp11.cc b/gcc/config/pdp11/pdp11.cc
|
||||
index 380223439..25cf62cbc 100644
|
||||
--- a/gcc/config/pdp11/pdp11.cc
|
||||
+++ b/gcc/config/pdp11/pdp11.cc
|
||||
@@ -155,7 +155,8 @@ static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool);
|
||||
static int pdp11_insn_cost (rtx_insn *insn, bool speed);
|
||||
static rtx_insn *pdp11_md_asm_adjust (vec<rtx> &, vec<rtx> &,
|
||||
vec<machine_mode> &, vec<const char *> &,
|
||||
- vec<rtx> &, HARD_REG_SET &, location_t);
|
||||
+ vec<rtx> &, vec<rtx> &,
|
||||
+ HARD_REG_SET &, location_t);
|
||||
static bool pdp11_return_in_memory (const_tree, const_tree);
|
||||
static rtx pdp11_function_value (const_tree, const_tree, bool);
|
||||
static rtx pdp11_libcall_value (machine_mode, const_rtx);
|
||||
@@ -2137,7 +2138,8 @@ pdp11_cmp_length (rtx *operands, int words)
|
||||
static rtx_insn *
|
||||
pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
|
||||
+ vec<const char *> & /*constraints*/,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM));
|
||||
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
|
||||
index 0b75861bb..55d4ce751 100644
|
||||
--- a/gcc/config/rs6000/rs6000.cc
|
||||
+++ b/gcc/config/rs6000/rs6000.cc
|
||||
@@ -3443,7 +3443,8 @@ rs6000_builtin_mask_calculate (void)
|
||||
static rtx_insn *
|
||||
rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
|
||||
+ vec<const char *> & /*constraints*/,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
|
||||
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
|
||||
index ae0cf9ef5..f1599a5c5 100644
|
||||
--- a/gcc/config/s390/s390.cc
|
||||
+++ b/gcc/config/s390/s390.cc
|
||||
@@ -16994,7 +16994,8 @@ s390_hard_fp_reg_p (rtx x)
|
||||
static rtx_insn *
|
||||
s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
|
||||
vec<machine_mode> &input_modes,
|
||||
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
|
||||
+ vec<const char *> &constraints,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &/*clobbers*/,
|
||||
HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
|
||||
{
|
||||
if (!TARGET_VXE)
|
||||
diff --git a/gcc/config/vax/vax.cc b/gcc/config/vax/vax.cc
|
||||
index 28c1af59a..7673a1428 100644
|
||||
--- a/gcc/config/vax/vax.cc
|
||||
+++ b/gcc/config/vax/vax.cc
|
||||
@@ -57,7 +57,8 @@ static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool);
|
||||
static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode);
|
||||
static rtx_insn *vax_md_asm_adjust (vec<rtx> &, vec<rtx> &,
|
||||
vec<machine_mode> &, vec<const char *> &,
|
||||
- vec<rtx> &, HARD_REG_SET &, location_t);
|
||||
+ vec<rtx> &, vec<rtx> &, HARD_REG_SET &,
|
||||
+ location_t);
|
||||
static rtx vax_function_arg (cumulative_args_t, const function_arg_info &);
|
||||
static void vax_function_arg_advance (cumulative_args_t,
|
||||
const function_arg_info &);
|
||||
@@ -1179,6 +1180,7 @@ vax_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
|
||||
vec<rtx> &inputs ATTRIBUTE_UNUSED,
|
||||
vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
|
||||
vec<const char *> &constraints ATTRIBUTE_UNUSED,
|
||||
+ vec<rtx> &/*uses*/,
|
||||
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
|
||||
location_t /*loc*/)
|
||||
{
|
||||
diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc
|
||||
index 03c1a33e1..35b46ced9 100644
|
||||
--- a/gcc/config/visium/visium.cc
|
||||
+++ b/gcc/config/visium/visium.cc
|
||||
@@ -190,7 +190,7 @@ static tree visium_build_builtin_va_list (void);
|
||||
static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &,
|
||||
vec<machine_mode> &,
|
||||
vec<const char *> &, vec<rtx> &,
|
||||
- HARD_REG_SET &, location_t);
|
||||
+ vec<rtx> &, HARD_REG_SET &, location_t);
|
||||
|
||||
static bool visium_legitimate_constant_p (machine_mode, rtx);
|
||||
|
||||
@@ -794,7 +794,8 @@ visium_conditional_register_usage (void)
|
||||
static rtx_insn *
|
||||
visium_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
|
||||
vec<machine_mode> & /*input_modes*/,
|
||||
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
|
||||
+ vec<const char *> & /*constraints*/,
|
||||
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
|
||||
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
|
||||
{
|
||||
clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM));
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index 357c29a4d..4f93facf7 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -11626,10 +11626,11 @@ from shared libraries (DLLs).
|
||||
You need not define this macro if it would always evaluate to zero.
|
||||
@end defmac
|
||||
|
||||
-@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc})
|
||||
+@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{usess}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc})
|
||||
This target hook may add @dfn{clobbers} to @var{clobbers} and
|
||||
@var{clobbered_regs} for any hard regs the port wishes to automatically
|
||||
-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected
|
||||
+clobber for an asm. It can also add hard registers that are used by the
|
||||
+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected
|
||||
to avoid clobbering a register that is already used by the asm. @var{loc}
|
||||
is the source location of the asm.
|
||||
|
||||
diff --git a/gcc/recog.cc b/gcc/recog.cc
|
||||
index cd2410ab2..5b81d5e21 100644
|
||||
--- a/gcc/recog.cc
|
||||
+++ b/gcc/recog.cc
|
||||
@@ -1977,13 +1977,17 @@ asm_noperands (const_rtx body)
|
||||
{
|
||||
/* Multiple output operands, or 1 output plus some clobbers:
|
||||
body is
|
||||
- [(set OUTPUT (asm_operands ...))... (clobber (reg ...))...]. */
|
||||
- /* Count backwards through CLOBBERs to determine number of SETs. */
|
||||
+ [(set OUTPUT (asm_operands ...))...
|
||||
+ (use (reg ...))...
|
||||
+ (clobber (reg ...))...]. */
|
||||
+ /* Count backwards through USEs and CLOBBERs to determine
|
||||
+ number of SETs. */
|
||||
for (i = XVECLEN (body, 0); i > 0; i--)
|
||||
{
|
||||
if (GET_CODE (XVECEXP (body, 0, i - 1)) == SET)
|
||||
break;
|
||||
- if (GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER)
|
||||
+ if (GET_CODE (XVECEXP (body, 0, i - 1)) != USE
|
||||
+ && GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER)
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -2010,10 +2014,13 @@ asm_noperands (const_rtx body)
|
||||
else
|
||||
{
|
||||
/* 0 outputs, but some clobbers:
|
||||
- body is [(asm_operands ...) (clobber (reg ...))...]. */
|
||||
+ body is [(asm_operands ...)
|
||||
+ (use (reg ...))...
|
||||
+ (clobber (reg ...))...]. */
|
||||
/* Make sure all the other parallel things really are clobbers. */
|
||||
for (i = XVECLEN (body, 0) - 1; i > 0; i--)
|
||||
- if (GET_CODE (XVECEXP (body, 0, i)) != CLOBBER)
|
||||
+ if (GET_CODE (XVECEXP (body, 0, i)) != USE
|
||||
+ && GET_CODE (XVECEXP (body, 0, i)) != CLOBBER)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -2080,7 +2087,8 @@ decode_asm_operands (rtx body, rtx *operands, rtx **operand_locs,
|
||||
the SETs. Their constraints are in the ASM_OPERANDS itself. */
|
||||
for (i = 0; i < nparallel; i++)
|
||||
{
|
||||
- if (GET_CODE (XVECEXP (body, 0, i)) == CLOBBER)
|
||||
+ if (GET_CODE (XVECEXP (body, 0, i)) == USE
|
||||
+ || GET_CODE (XVECEXP (body, 0, i)) == CLOBBER)
|
||||
break; /* Past last SET */
|
||||
gcc_assert (GET_CODE (XVECEXP (body, 0, i)) == SET);
|
||||
if (operands)
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index a57e51b0d..60096c60c 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -4309,7 +4309,8 @@ DEFHOOK
|
||||
(md_asm_adjust,
|
||||
"This target hook may add @dfn{clobbers} to @var{clobbers} and\n\
|
||||
@var{clobbered_regs} for any hard regs the port wishes to automatically\n\
|
||||
-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected\n\
|
||||
+clobber for an asm. It can also add hard registers that are used by the\n\
|
||||
+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected\n\
|
||||
to avoid clobbering a register that is already used by the asm. @var{loc}\n\
|
||||
is the source location of the asm.\n\
|
||||
\n\
|
||||
@@ -4320,7 +4321,7 @@ changes to @var{inputs} must be accompanied by the corresponding changes\n\
|
||||
to @var{input_modes}.",
|
||||
rtx_insn *,
|
||||
(vec<rtx>& outputs, vec<rtx>& inputs, vec<machine_mode>& input_modes,
|
||||
- vec<const char *>& constraints, vec<rtx>& clobbers,
|
||||
+ vec<const char *>& constraints, vec<rtx>& usess, vec<rtx>& clobbers,
|
||||
HARD_REG_SET& clobbered_regs, location_t loc),
|
||||
NULL)
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
998
0145-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
Normal file
998
0145-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
Normal file
@ -0,0 +1,998 @@
|
||||
From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001
|
||||
From: Tamar Christina <tamar.christina@arm.com>
|
||||
Date: Mon, 19 Jun 2023 15:47:46 +0100
|
||||
Subject: [PATCH 053/157] [Backport][SME] New compact syntax for insn and
|
||||
insn_split in Machine Descriptions.
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217
|
||||
|
||||
This patch adds support for a compact syntax for specifying constraints in
|
||||
instruction patterns. Credit for the idea goes to Richard Earnshaw.
|
||||
|
||||
With this new syntax we want a clean break from the current limitations to make
|
||||
something that is hopefully easier to use and maintain.
|
||||
|
||||
The idea behind this compact syntax is that often times it's quite hard to
|
||||
correlate the entries in the constrains list, attributes and instruction lists.
|
||||
|
||||
One has to count and this often is tedious. Additionally when changing a single
|
||||
line in the insn multiple lines in a diff change, making it harder to see what's
|
||||
going on.
|
||||
|
||||
This new syntax takes into account many of the common things that are done in MD
|
||||
files. It's also worth saying that this version is intended to deal with the
|
||||
common case of a string based alternatives. For C chunks we have some ideas
|
||||
but those are not intended to be addressed here.
|
||||
|
||||
It's easiest to explain with an example:
|
||||
|
||||
normal syntax:
|
||||
|
||||
(define_insn_and_split "*movsi_aarch64"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w")
|
||||
(match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
|
||||
"(register_operand (operands[0], SImode)
|
||||
|| aarch64_reg_or_zero (operands[1], SImode))"
|
||||
"@
|
||||
mov\\t%w0, %w1
|
||||
mov\\t%w0, %w1
|
||||
mov\\t%w0, %w1
|
||||
mov\\t%w0, %1
|
||||
#
|
||||
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
ldr\\t%w0, %1
|
||||
ldr\\t%s0, %1
|
||||
str\\t%w1, %0
|
||||
str\\t%s1, %0
|
||||
adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
|
||||
adr\\t%x0, %c1
|
||||
adrp\\t%x0, %A1
|
||||
fmov\\t%s0, %w1
|
||||
fmov\\t%w0, %s1
|
||||
fmov\\t%s0, %s1
|
||||
* return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
|
||||
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
|
||||
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
||||
[(const_int 0)]
|
||||
"{
|
||||
aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
DONE;
|
||||
}"
|
||||
;; The "mov_imm" type for CNT is just a placeholder.
|
||||
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
|
||||
load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
|
||||
(set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
|
||||
(set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
|
||||
]
|
||||
)
|
||||
|
||||
New syntax:
|
||||
|
||||
(define_insn_and_split "*movsi_aarch64"
|
||||
[(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
(match_operand:SI 1 "aarch64_mov_operand"))]
|
||||
"(register_operand (operands[0], SImode)
|
||||
|| aarch64_reg_or_zero (operands[1], SImode))"
|
||||
{@ [cons: =0, 1; attrs: type, arch, length]
|
||||
[r , r ; mov_reg , * , 4] mov\t%w0, %w1
|
||||
[k , r ; mov_reg , * , 4] ^
|
||||
[r , k ; mov_reg , * , 4] ^
|
||||
[r , M ; mov_imm , * , 4] mov\t%w0, %1
|
||||
[r , n ; mov_imm , * ,16] #
|
||||
/* The "mov_imm" type for CNT is just a placeholder. */
|
||||
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
[r , m ; load_4 , * , 4] ldr\t%w0, %1
|
||||
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
|
||||
[m , rZ ; store_4 , * , 4] str\t%w1, %0
|
||||
[m , w ; store_4 , fp , 4] str\t%s1, %0
|
||||
[r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
|
||||
[r , Usa; adr , * , 4] adr\t%x0, %c1
|
||||
[r , Ush; adr , * , 4] adrp\t%x0, %A1
|
||||
[w , rZ ; f_mcr , fp , 4] fmov\t%s0, %w1
|
||||
[r , w ; f_mrc , fp , 4] fmov\t%w0, %s1
|
||||
[w , w ; fmov , fp , 4] fmov\t%s0, %s1
|
||||
[w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
|
||||
}
|
||||
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
|
||||
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
||||
[(const_int 0)]
|
||||
{
|
||||
aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
The main syntax rules are as follows (See docs for full rules):
|
||||
- Template must start with "{@" and end with "}" to use the new syntax.
|
||||
- "{@" is followed by a layout in parentheses which is "cons:" followed by
|
||||
a list of match_operand/match_scratch IDs, then a semicolon, then the
|
||||
same for attributes ("attrs:"). Both sections are optional (so you can
|
||||
use only cons, or only attrs, or both), and cons must come before attrs
|
||||
if present.
|
||||
- Each alternative begins with any amount of whitespace.
|
||||
- Following the whitespace is a comma-separated list of constraints and/or
|
||||
attributes within brackets [], with sections separated by a semicolon.
|
||||
- Following the closing ']' is any amount of whitespace, and then the actual
|
||||
asm output.
|
||||
- Spaces are allowed in the list (they will simply be removed).
|
||||
- All alternatives should be specified: a blank list should be
|
||||
"[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
|
||||
you leave certain attributes empty, I have found).
|
||||
- The actual constraint string in the match_operand or match_scratch, and
|
||||
the attribute string in the set_attr, must be blank or an empty string
|
||||
(you can't combine the old and new syntaxes).
|
||||
- The common idion * return can be shortened by using <<.
|
||||
- Any unexpanded iterators left during processing will result in an error at
|
||||
compile time. If for some reason <> is needed in the output then these
|
||||
must be escaped using \.
|
||||
- Within an {@ block both multiline and singleline C comments are allowed, but
|
||||
when used outside of a C block they must be the only non-whitespace blocks on
|
||||
the line
|
||||
- Inside an {@ block any unexpanded iterators will result in a compile time
|
||||
fault instead of incorrect assembly being generated at runtime. If the
|
||||
literal <> is needed in the output this needs to be escaped with \<\>.
|
||||
- This check is not performed inside C blocks (lines starting with *).
|
||||
- Instead of copying the previous instruction again in the next pattern, one
|
||||
can use ^ to refer to the previous asm string.
|
||||
|
||||
This patch works by blindly transforming the new syntax into the old syntax,
|
||||
so it doesn't do extensive checking. However, it does verify that:
|
||||
- The correct number of constraints/attributes are specified.
|
||||
- You haven't mixed old and new syntax.
|
||||
- The specified operand IDs/attribute names actually exist.
|
||||
- You don't have duplicate cons
|
||||
|
||||
If something goes wrong, it may write invalid constraints/attributes/template
|
||||
back into the rtx. But this shouldn't matter because error_at will cause the
|
||||
program to fail on exit anyway.
|
||||
|
||||
Because this transformation occurs as early as possible (before patterns are
|
||||
queued), the rest of the compiler can completely ignore the new syntax and
|
||||
assume that the old syntax will always be used.
|
||||
|
||||
This doesn't seem to have any measurable effect on the runtime of gen*
|
||||
programs.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* gensupport.cc (class conlist, add_constraints, add_attributes,
|
||||
skip_spaces, expect_char, preprocess_compact_syntax,
|
||||
parse_section_layout, parse_section, convert_syntax): New.
|
||||
(process_rtx): Check for conversion.
|
||||
* genoutput.cc (process_template): Check for unresolved iterators.
|
||||
(class data): Add compact_syntax_p.
|
||||
(gen_insn): Use it.
|
||||
* gensupport.h (compact_syntax): New.
|
||||
(hash-set.h): Include.
|
||||
* doc/md.texi: Document it.
|
||||
|
||||
Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>
|
||||
---
|
||||
gcc/doc/md.texi | 163 +++++++++++++++
|
||||
gcc/genoutput.cc | 48 ++++-
|
||||
gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
gcc/gensupport.h | 3 +
|
||||
4 files changed, 709 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
|
||||
index 3b544358b..04ace8f7f 100644
|
||||
--- a/gcc/doc/md.texi
|
||||
+++ b/gcc/doc/md.texi
|
||||
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
|
||||
from such an insn.
|
||||
* Output Statement:: For more generality, write C code to output
|
||||
the assembler code.
|
||||
+* Compact Syntax:: Compact syntax for writing machine descriptors.
|
||||
* Predicates:: Controlling what kinds of operands can be used
|
||||
for an insn.
|
||||
* Constraints:: Fine-tuning operand selection.
|
||||
@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
|
||||
@end group
|
||||
@end smallexample
|
||||
|
||||
+@node Compact Syntax
|
||||
+@section Compact Syntax
|
||||
+@cindex compact syntax
|
||||
+
|
||||
+When a @code{define_insn} or @code{define_insn_and_split} has multiple
|
||||
+alternatives it may be beneficial to use the compact syntax when specifying
|
||||
+alternatives.
|
||||
+
|
||||
+This syntax puts the constraints and attributes on the same horizontal line as
|
||||
+the instruction assembly template.
|
||||
+
|
||||
+As an example
|
||||
+
|
||||
+@smallexample
|
||||
+@group
|
||||
+(define_insn_and_split ""
|
||||
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
|
||||
+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv"))]
|
||||
+ ""
|
||||
+ "@@
|
||||
+ mov\\t%w0, %w1
|
||||
+ mov\\t%w0, %w1
|
||||
+ mov\\t%w0, %w1
|
||||
+ mov\\t%w0, %1
|
||||
+ #
|
||||
+ * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
|
||||
+ "&& true"
|
||||
+ [(const_int 0)]
|
||||
+ @{
|
||||
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
+ DONE;
|
||||
+ @}
|
||||
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
|
||||
+ (set_attr "arch" "*,*,*,*,*,sve")
|
||||
+ (set_attr "length" "4,4,4,4,*, 4")
|
||||
+]
|
||||
+)
|
||||
+@end group
|
||||
+@end smallexample
|
||||
+
|
||||
+can be better expressed as:
|
||||
+
|
||||
+@smallexample
|
||||
+@group
|
||||
+(define_insn_and_split ""
|
||||
+ [(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
+ (match_operand:SI 1 "aarch64_mov_operand"))]
|
||||
+ ""
|
||||
+ @{@@ [cons: =0, 1; attrs: type, arch, length]
|
||||
+ [r , r ; mov_reg , * , 4] mov\t%w0, %w1
|
||||
+ [k , r ; mov_reg , * , 4] ^
|
||||
+ [r , k ; mov_reg , * , 4] ^
|
||||
+ [r , M ; mov_imm , * , 4] mov\t%w0, %1
|
||||
+ [r , n ; mov_imm , * , *] #
|
||||
+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ @}
|
||||
+ "&& true"
|
||||
+ [(const_int 0)]
|
||||
+ @{
|
||||
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
+ DONE;
|
||||
+ @}
|
||||
+)
|
||||
+@end group
|
||||
+@end smallexample
|
||||
+
|
||||
+The syntax rules are as follows:
|
||||
+@itemize @bullet
|
||||
+@item
|
||||
+Templates must start with @samp{@{@@} to use the new syntax.
|
||||
+
|
||||
+@item
|
||||
+@samp{@{@@} is followed by a layout in square brackets which is @samp{cons:}
|
||||
+followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
|
||||
+operand numbers, then a semicolon, followed by the same for attributes
|
||||
+(@samp{attrs:}). Operand modifiers like @code{=} and @code{+} can be placed
|
||||
+before an operand number.
|
||||
+Both sections are optional (so you can use only @samp{cons}, or only
|
||||
+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
|
||||
+present.
|
||||
+
|
||||
+@item
|
||||
+Each alternative begins with any amount of whitespace.
|
||||
+
|
||||
+@item
|
||||
+Following the whitespace is a comma-separated list of "constraints" and/or
|
||||
+"attributes" within brackets @code{[]}, with sections separated by a semicolon.
|
||||
+
|
||||
+@item
|
||||
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
|
||||
+This allows less copy pasting between alternative and reduces the number of
|
||||
+lines to update on changes.
|
||||
+
|
||||
+@item
|
||||
+When using C functions for output, the idiom @samp{* return @var{function};}
|
||||
+can be replaced with the shorthand @samp{<< @var{function};}.
|
||||
+
|
||||
+@item
|
||||
+Following the closing @samp{]} is any amount of whitespace, and then the actual
|
||||
+asm output.
|
||||
+
|
||||
+@item
|
||||
+Spaces are allowed in the list (they will simply be removed).
|
||||
+
|
||||
+@item
|
||||
+All constraint alternatives should be specified. For example, a list of
|
||||
+of three blank alternatives should be written @samp{[,,]} rather than
|
||||
+@samp{[]}.
|
||||
+
|
||||
+@item
|
||||
+All attribute alternatives should be non-empty, with @samp{*}
|
||||
+representing the default attribute value. For example, a list of three
|
||||
+default attribute values should be written @samp{[*,*,*]} rather than
|
||||
+@samp{[]}.
|
||||
+
|
||||
+@item
|
||||
+Within an @samp{@{@@} block both multiline and singleline C comments are
|
||||
+allowed, but when used outside of a C block they must be the only non-whitespace
|
||||
+blocks on the line.
|
||||
+
|
||||
+@item
|
||||
+Within an @samp{@{@@} block, any iterators that do not get expanded will result
|
||||
+in an error. If for some reason it is required to have @code{<} or @code{>} in
|
||||
+the output then these must be escaped using @backslashchar{}.
|
||||
+
|
||||
+@item
|
||||
+It is possible to use the @samp{attrs} list to specify some attributes and to
|
||||
+use the normal @code{set_attr} syntax to specify other attributes. There must
|
||||
+not be any overlap between the two lists.
|
||||
+
|
||||
+In other words, the following is valid:
|
||||
+@smallexample
|
||||
+@group
|
||||
+(define_insn_and_split ""
|
||||
+ [(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
+ (match_operand:SI 1 "aarch64_mov_operand"))]
|
||||
+ ""
|
||||
+ @{@@ [cons: 0, 1; attrs: type, arch, length]@}
|
||||
+ @dots{}
|
||||
+ [(set_attr "foo" "mov_imm")]
|
||||
+)
|
||||
+@end group
|
||||
+@end smallexample
|
||||
+
|
||||
+but this is not valid:
|
||||
+@smallexample
|
||||
+@group
|
||||
+(define_insn_and_split ""
|
||||
+ [(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
+ (match_operand:SI 1 "aarch64_mov_operand"))]
|
||||
+ ""
|
||||
+ @{@@ [cons: 0, 1; attrs: type, arch, length]@}
|
||||
+ @dots{}
|
||||
+ [(set_attr "arch" "bar")
|
||||
+ (set_attr "foo" "mov_imm")]
|
||||
+)
|
||||
+@end group
|
||||
+@end smallexample
|
||||
+
|
||||
+because it specifies @code{arch} twice.
|
||||
+@end itemize
|
||||
+
|
||||
@node Predicates
|
||||
@section Predicates
|
||||
@cindex predicates
|
||||
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
|
||||
index 6bb03e286..de5dafdbf 100644
|
||||
--- a/gcc/genoutput.cc
|
||||
+++ b/gcc/genoutput.cc
|
||||
@@ -157,6 +157,7 @@ public:
|
||||
int n_alternatives; /* Number of alternatives in each constraint */
|
||||
int operand_number; /* Operand index in the big array. */
|
||||
int output_format; /* INSN_OUTPUT_FORMAT_*. */
|
||||
+ bool compact_syntax_p;
|
||||
struct operand_data operand[MAX_MAX_OPERANDS];
|
||||
};
|
||||
|
||||
@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code)
|
||||
if (sp != ep)
|
||||
message_at (d->loc, "trailing whitespace in output template");
|
||||
|
||||
- while (cp < sp)
|
||||
+ /* Check for any unexpanded iterators. */
|
||||
+ if (bp[0] != '*' && d->compact_syntax_p)
|
||||
{
|
||||
- putchar (*cp);
|
||||
- cp++;
|
||||
+ const char *p = cp;
|
||||
+ const char *last_bracket = nullptr;
|
||||
+ while (p < sp)
|
||||
+ {
|
||||
+ if (*p == '\\' && p + 1 < sp)
|
||||
+ {
|
||||
+ putchar (*p);
|
||||
+ putchar (*(p+1));
|
||||
+ p += 2;
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (*p == '>' && last_bracket && *last_bracket == '<')
|
||||
+ {
|
||||
+ int len = p - last_bracket;
|
||||
+ fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
|
||||
+ len - 1, last_bracket + 1, cp);
|
||||
+ }
|
||||
+ else if (*p == '<' || *p == '>')
|
||||
+ last_bracket = p;
|
||||
+
|
||||
+ putchar (*p);
|
||||
+ p += 1;
|
||||
+ }
|
||||
+
|
||||
+ if (last_bracket)
|
||||
+ {
|
||||
+ char *nl = strchr (const_cast<char*> (cp), '\n');
|
||||
+ if (nl)
|
||||
+ *nl = '\0';
|
||||
+ fatal_at (d->loc, "unmatched angle brackets, likely an "
|
||||
+ "error in iterator syntax in %s", cp);
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ while (cp < sp)
|
||||
+ putchar (*(cp++));
|
||||
}
|
||||
|
||||
+ cp = sp;
|
||||
+
|
||||
if (!found_star)
|
||||
puts ("\",");
|
||||
else if (*bp != '*')
|
||||
@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info)
|
||||
else
|
||||
d->name = 0;
|
||||
|
||||
+ d->compact_syntax_p = compact_syntax.contains (insn);
|
||||
+
|
||||
/* Build up the list in the same order as the insns are seen
|
||||
in the machine description. */
|
||||
d->next = 0;
|
||||
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
|
||||
index 42680499d..23c61dcdd 100644
|
||||
--- a/gcc/gensupport.cc
|
||||
+++ b/gcc/gensupport.cc
|
||||
@@ -18,6 +18,8 @@
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "bconfig.h"
|
||||
+#define INCLUDE_STRING
|
||||
+#define INCLUDE_VECTOR
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
@@ -33,6 +35,8 @@
|
||||
static rtx operand_data[MAX_OPERANDS];
|
||||
static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
|
||||
static char used_operands_numbers[MAX_OPERANDS];
|
||||
+/* List of entries which are part of the new syntax. */
|
||||
+hash_set<rtx> compact_syntax;
|
||||
|
||||
|
||||
/* In case some macros used by files we include need it, define this here. */
|
||||
@@ -545,6 +549,497 @@ gen_rewrite_sequence (rtvec vec)
|
||||
return new_vec;
|
||||
}
|
||||
|
||||
+/* The following is for handling the compact syntax for constraints and
|
||||
+ attributes.
|
||||
+
|
||||
+ The normal syntax looks like this:
|
||||
+
|
||||
+ ...
|
||||
+ (match_operand: 0 "s_register_operand" "r,I,k")
|
||||
+ (match_operand: 2 "s_register_operand" "r,k,I")
|
||||
+ ...
|
||||
+ "@
|
||||
+ <asm>
|
||||
+ <asm>
|
||||
+ <asm>"
|
||||
+ ...
|
||||
+ (set_attr "length" "4,8,8")
|
||||
+
|
||||
+ The compact syntax looks like this:
|
||||
+
|
||||
+ ...
|
||||
+ (match_operand: 0 "s_register_operand")
|
||||
+ (match_operand: 2 "s_register_operand")
|
||||
+ ...
|
||||
+ {@ [cons: 0, 2; attrs: length]
|
||||
+ [r,r; 4] <asm>
|
||||
+ [I,k; 8] <asm>
|
||||
+ [k,I; 8] <asm>
|
||||
+ }
|
||||
+ ...
|
||||
+ [<other attributes>]
|
||||
+
|
||||
+ This is the only place where this syntax needs to be handled. Relevant
|
||||
+ patterns are transformed from compact to the normal syntax before they are
|
||||
+ queued, so none of the gen* programs need to know about this syntax at all.
|
||||
+
|
||||
+ Conversion process (convert_syntax):
|
||||
+
|
||||
+ 0) Check that pattern actually uses new syntax (check for {@ ... }).
|
||||
+
|
||||
+ 1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above
|
||||
+ example. cons must come first; both are optional. Set up two vecs,
|
||||
+ convec and attrvec, for holding the results of the transformation.
|
||||
+
|
||||
+ 2) For each alternative: parse the list of constraints and/or attributes,
|
||||
+ and enqueue them in the relevant lists in convec and attrvec. By the end
|
||||
+ of this process, convec[N].con and attrvec[N].con should contain regular
|
||||
+ syntax constraint/attribute lists like "r,I,k". Copy the asm to a string
|
||||
+ as we go.
|
||||
+
|
||||
+ 3) Search the rtx and write the constraint and attribute lists into the
|
||||
+ correct places. Write the asm back into the template. */
|
||||
+
|
||||
+/* Helper class for shuffling constraints/attributes in convert_syntax and
|
||||
+ add_constraints/add_attributes. This includes commas but not whitespace. */
|
||||
+
|
||||
+class conlist {
|
||||
+private:
|
||||
+ std::string con;
|
||||
+
|
||||
+public:
|
||||
+ std::string name;
|
||||
+ int idx = -1;
|
||||
+
|
||||
+ conlist () = default;
|
||||
+
|
||||
+ /* [ns..ns + len) should be a string with the id of the rtx to match
|
||||
+ i.e. if rtx is the relevant match_operand or match_scratch then
|
||||
+ [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
|
||||
+ [ns..ns + len) should equal XSTR (rtx, 0). */
|
||||
+ conlist (const char *ns, unsigned int len, bool numeric)
|
||||
+ {
|
||||
+ /* Trim leading whitespaces. */
|
||||
+ while (ISBLANK (*ns))
|
||||
+ {
|
||||
+ ns++;
|
||||
+ len--;
|
||||
+ }
|
||||
+
|
||||
+ /* Trim trailing whitespace. */
|
||||
+ for (int i = len - 1; i >= 0; i--, len--)
|
||||
+ if (!ISBLANK (ns[i]))
|
||||
+ break;
|
||||
+
|
||||
+ /* Parse off any modifiers. */
|
||||
+ while (!ISALNUM (*ns))
|
||||
+ {
|
||||
+ con += *(ns++);
|
||||
+ len--;
|
||||
+ }
|
||||
+
|
||||
+ name.assign (ns, len);
|
||||
+ if (numeric)
|
||||
+ idx = std::stoi (name);
|
||||
+ }
|
||||
+
|
||||
+ /* Adds a character to the end of the string. */
|
||||
+ void add (char c)
|
||||
+ {
|
||||
+ con += c;
|
||||
+ }
|
||||
+
|
||||
+ /* Output the string in the form of a brand-new char *, then effectively
|
||||
+ clear the internal string by resetting len to 0. */
|
||||
+ char *out ()
|
||||
+ {
|
||||
+ /* Final character is always a trailing comma, so strip it out. */
|
||||
+ char *q = xstrndup (con.c_str (), con.size () - 1);
|
||||
+ con.clear ();
|
||||
+ return q;
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
+typedef std::vector<conlist> vec_conlist;
|
||||
+
|
||||
+/* Add constraints to an rtx. This function is similar to remove_constraints.
|
||||
+ Errors if adding the constraints would overwrite existing constraints. */
|
||||
+
|
||||
+static void
|
||||
+add_constraints (rtx part, file_location loc, vec_conlist &cons)
|
||||
+{
|
||||
+ const char *format_ptr;
|
||||
+
|
||||
+ if (part == NULL_RTX)
|
||||
+ return;
|
||||
+
|
||||
+ /* If match_op or match_scr, check if we have the right one, and if so, copy
|
||||
+ over the constraint list. */
|
||||
+ if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
|
||||
+ {
|
||||
+ int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
|
||||
+ unsigned id = XINT (part, 0);
|
||||
+
|
||||
+ if (id >= cons.size () || cons[id].idx == -1)
|
||||
+ return;
|
||||
+
|
||||
+ if (XSTR (part, field)[0] != '\0')
|
||||
+ {
|
||||
+ error_at (loc, "can't mix normal and compact constraint syntax");
|
||||
+ return;
|
||||
+ }
|
||||
+ XSTR (part, field) = cons[id].out ();
|
||||
+ cons[id].idx = -1;
|
||||
+ }
|
||||
+
|
||||
+ format_ptr = GET_RTX_FORMAT (GET_CODE (part));
|
||||
+
|
||||
+ /* Recursively search the rtx. */
|
||||
+ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
|
||||
+ switch (*format_ptr++)
|
||||
+ {
|
||||
+ case 'e':
|
||||
+ case 'u':
|
||||
+ add_constraints (XEXP (part, i), loc, cons);
|
||||
+ break;
|
||||
+ case 'E':
|
||||
+ if (XVEC (part, i) != NULL)
|
||||
+ for (int j = 0; j < XVECLEN (part, i); j++)
|
||||
+ add_constraints (XVECEXP (part, i, j), loc, cons);
|
||||
+ break;
|
||||
+ default:
|
||||
+ continue;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Add ATTRS to definition X's attribute list. */
|
||||
+
|
||||
+static void
|
||||
+add_attributes (rtx x, vec_conlist &attrs)
|
||||
+{
|
||||
+ unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
|
||||
+ rtvec orig = XVEC (x, attr_index);
|
||||
+ if (orig)
|
||||
+ {
|
||||
+ size_t n_curr = XVECLEN (x, attr_index);
|
||||
+ rtvec copy = rtvec_alloc (n_curr + attrs.size ());
|
||||
+
|
||||
+ /* Create a shallow copy of existing entries. */
|
||||
+ memcpy (©->elem[attrs.size ()], &orig->elem[0],
|
||||
+ sizeof (rtx) * n_curr);
|
||||
+ XVEC (x, attr_index) = copy;
|
||||
+ }
|
||||
+ else
|
||||
+ XVEC (x, attr_index) = rtvec_alloc (attrs.size ());
|
||||
+
|
||||
+ /* Create the new elements. */
|
||||
+ for (unsigned i = 0; i < attrs.size (); i++)
|
||||
+ {
|
||||
+ rtx attr = rtx_alloc (SET_ATTR);
|
||||
+ XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
|
||||
+ XSTR (attr, 1) = attrs[i].out ();
|
||||
+ XVECEXP (x, attr_index, i) = attr;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Consumes spaces and tabs. */
|
||||
+
|
||||
+static inline void
|
||||
+skip_spaces (const char **str)
|
||||
+{
|
||||
+ while (ISBLANK (**str))
|
||||
+ (*str)++;
|
||||
+}
|
||||
+
|
||||
+/* Consumes the given character, if it's there. */
|
||||
+
|
||||
+static inline bool
|
||||
+expect_char (const char **str, char c)
|
||||
+{
|
||||
+ if (**str != c)
|
||||
+ return false;
|
||||
+ (*str)++;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
+/* Parses the section layout that follows a "{@" if using new syntax. Builds
|
||||
+ a vector for a single section. E.g. if we have "attrs: length, arch]..."
|
||||
+ then list will have two elements, the first for "length" and the second
|
||||
+ for "arch". */
|
||||
+
|
||||
+static void
|
||||
+parse_section_layout (file_location loc, const char **templ, const char *label,
|
||||
+ vec_conlist &list, bool numeric)
|
||||
+{
|
||||
+ const char *name_start;
|
||||
+ size_t label_len = strlen (label);
|
||||
+ if (strncmp (label, *templ, label_len) == 0)
|
||||
+ {
|
||||
+ *templ += label_len;
|
||||
+
|
||||
+ /* Gather the names. */
|
||||
+ while (**templ != ';' && **templ != ']')
|
||||
+ {
|
||||
+ skip_spaces (templ);
|
||||
+ name_start = *templ;
|
||||
+ int len = 0;
|
||||
+ char val = (*templ)[len];
|
||||
+ while (val != ',' && val != ';' && val != ']')
|
||||
+ {
|
||||
+ if (val == 0 || val == '\n')
|
||||
+ fatal_at (loc, "missing ']'");
|
||||
+ val = (*templ)[++len];
|
||||
+ }
|
||||
+ *templ += len;
|
||||
+ if (val == ',')
|
||||
+ (*templ)++;
|
||||
+ list.push_back (conlist (name_start, len, numeric));
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Parse a section, a section is defined as a named space separated list, e.g.
|
||||
+
|
||||
+ foo: a, b, c
|
||||
+
|
||||
+ is a section named "foo" with entries a, b and c. */
|
||||
+
|
||||
+static void
|
||||
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
|
||||
+ vec_conlist &list, file_location loc, const char *name)
|
||||
+{
|
||||
+ unsigned int i;
|
||||
+
|
||||
+ /* Go through the list, one character at a time, adding said character
|
||||
+ to the correct string. */
|
||||
+ for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
|
||||
+ if (!ISBLANK (**templ))
|
||||
+ {
|
||||
+ if (**templ == 0 || **templ == '\n')
|
||||
+ fatal_at (loc, "missing ']'");
|
||||
+ list[i].add (**templ);
|
||||
+ if (**templ == ',')
|
||||
+ {
|
||||
+ ++i;
|
||||
+ if (i == n_elems)
|
||||
+ fatal_at (loc, "too many %ss in alternative %d: expected %d",
|
||||
+ name, alt_no, n_elems);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ if (i + 1 < n_elems)
|
||||
+ fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
|
||||
+ name, alt_no, n_elems, i);
|
||||
+
|
||||
+ list[i].add (',');
|
||||
+}
|
||||
+
|
||||
+/* The compact syntax has more convience syntaxes. As such we post process
|
||||
+ the lines to get them back to something the normal syntax understands. */
|
||||
+
|
||||
+static void
|
||||
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
|
||||
+ std::string &last_line)
|
||||
+{
|
||||
+ /* Check if we're copying the last statement. */
|
||||
+ if (line.find ("^") == 0 && line.size () == 1)
|
||||
+ {
|
||||
+ if (last_line.empty ())
|
||||
+ fatal_at (loc, "found instruction to copy previous line (^) in"
|
||||
+ "alternative %d but no previous line to copy", alt_no);
|
||||
+ line = last_line;
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ std::string result;
|
||||
+ std::string buffer;
|
||||
+ /* Check if we have << which means return c statement. */
|
||||
+ if (line.find ("<<") == 0)
|
||||
+ {
|
||||
+ result.append ("* return ");
|
||||
+ const char *chunk = line.c_str () + 2;
|
||||
+ skip_spaces (&chunk);
|
||||
+ result.append (chunk);
|
||||
+ }
|
||||
+ else
|
||||
+ result.append (line);
|
||||
+
|
||||
+ line = result;
|
||||
+ return;
|
||||
+}
|
||||
+
|
||||
+/* Converts an rtx from compact syntax to normal syntax if possible. */
|
||||
+
|
||||
+static void
|
||||
+convert_syntax (rtx x, file_location loc)
|
||||
+{
|
||||
+ int alt_no;
|
||||
+ unsigned int templ_index;
|
||||
+ const char *templ;
|
||||
+ vec_conlist tconvec, convec, attrvec;
|
||||
+
|
||||
+ templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
|
||||
+
|
||||
+ templ = XTMPL (x, templ_index);
|
||||
+
|
||||
+ /* Templates with constraints start with "{@". */
|
||||
+ if (strncmp ("*{@", templ, 3))
|
||||
+ return;
|
||||
+
|
||||
+ /* Get the layout for the template. */
|
||||
+ templ += 3;
|
||||
+ skip_spaces (&templ);
|
||||
+
|
||||
+ if (!expect_char (&templ, '['))
|
||||
+ fatal_at (loc, "expecing `[' to begin section list");
|
||||
+
|
||||
+ parse_section_layout (loc, &templ, "cons:", tconvec, true);
|
||||
+
|
||||
+ /* Check for any duplicate cons entries and sort based on i. */
|
||||
+ for (auto e : tconvec)
|
||||
+ {
|
||||
+ unsigned idx = e.idx;
|
||||
+ if (idx >= convec.size ())
|
||||
+ convec.resize (idx + 1);
|
||||
+
|
||||
+ if (convec[idx].idx >= 0)
|
||||
+ fatal_at (loc, "duplicate cons number found: %d", idx);
|
||||
+ convec[idx] = e;
|
||||
+ }
|
||||
+ tconvec.clear ();
|
||||
+
|
||||
+ if (*templ != ']')
|
||||
+ {
|
||||
+ if (*templ == ';')
|
||||
+ skip_spaces (&(++templ));
|
||||
+ parse_section_layout (loc, &templ, "attrs:", attrvec, false);
|
||||
+ }
|
||||
+
|
||||
+ if (!expect_char (&templ, ']'))
|
||||
+ fatal_at (loc, "expecting `]` to end section list - section list must have "
|
||||
+ "cons first, attrs second");
|
||||
+
|
||||
+ /* We will write the un-constrainified template into new_templ. */
|
||||
+ std::string new_templ;
|
||||
+ new_templ.append ("@");
|
||||
+
|
||||
+ /* Skip to the first proper line. */
|
||||
+ skip_spaces (&templ);
|
||||
+ if (*templ == 0)
|
||||
+ fatal_at (loc, "'{@...}' blocks must have at least one alternative");
|
||||
+ if (*templ != '\n')
|
||||
+ fatal_at (loc, "unexpected character '%c' after ']'", *templ);
|
||||
+ templ++;
|
||||
+
|
||||
+ alt_no = 0;
|
||||
+ std::string last_line;
|
||||
+
|
||||
+ /* Process the alternatives. */
|
||||
+ while (*(templ - 1) != '\0')
|
||||
+ {
|
||||
+ /* Skip leading whitespace. */
|
||||
+ std::string buffer;
|
||||
+ skip_spaces (&templ);
|
||||
+
|
||||
+ /* Check if we're at the end. */
|
||||
+ if (templ[0] == '}' && templ[1] == '\0')
|
||||
+ break;
|
||||
+
|
||||
+ if (expect_char (&templ, '['))
|
||||
+ {
|
||||
+ new_templ += '\n';
|
||||
+ new_templ.append (buffer);
|
||||
+ /* Parse the constraint list, then the attribute list. */
|
||||
+ if (convec.size () > 0)
|
||||
+ parse_section (&templ, convec.size (), alt_no, convec, loc,
|
||||
+ "constraint");
|
||||
+
|
||||
+ if (attrvec.size () > 0)
|
||||
+ {
|
||||
+ if (convec.size () > 0 && !expect_char (&templ, ';'))
|
||||
+ fatal_at (loc, "expected `;' to separate constraints "
|
||||
+ "and attributes in alternative %d", alt_no);
|
||||
+
|
||||
+ parse_section (&templ, attrvec.size (), alt_no,
|
||||
+ attrvec, loc, "attribute");
|
||||
+ }
|
||||
+
|
||||
+ if (!expect_char (&templ, ']'))
|
||||
+ fatal_at (loc, "expected end of constraint/attribute list but "
|
||||
+ "missing an ending `]' in alternative %d", alt_no);
|
||||
+ }
|
||||
+ else if (templ[0] == '/' && templ[1] == '/')
|
||||
+ {
|
||||
+ templ += 2;
|
||||
+ /* Glob till newline or end of string. */
|
||||
+ while (*templ != '\n' || *templ != '\0')
|
||||
+ templ++;
|
||||
+
|
||||
+ /* Skip any newlines or whitespaces needed. */
|
||||
+ while (ISSPACE(*templ))
|
||||
+ templ++;
|
||||
+ continue;
|
||||
+ }
|
||||
+ else if (templ[0] == '/' && templ[1] == '*')
|
||||
+ {
|
||||
+ templ += 2;
|
||||
+ /* Glob till newline or end of multiline comment. */
|
||||
+ while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
|
||||
+ templ++;
|
||||
+
|
||||
+ while (templ[0] != '*' || templ[1] != '/')
|
||||
+ {
|
||||
+ if (templ[0] == 0)
|
||||
+ fatal_at (loc, "unterminated '/*'");
|
||||
+ templ++;
|
||||
+ }
|
||||
+ templ += 2;
|
||||
+
|
||||
+ /* Skip any newlines or whitespaces needed. */
|
||||
+ while (ISSPACE(*templ))
|
||||
+ templ++;
|
||||
+ continue;
|
||||
+ }
|
||||
+ else
|
||||
+ fatal_at (loc, "expected constraint/attribute list at beginning of "
|
||||
+ "alternative %d but missing a starting `['", alt_no);
|
||||
+
|
||||
+ /* Skip whitespace between list and asm. */
|
||||
+ skip_spaces (&templ);
|
||||
+
|
||||
+ /* Copy asm to new template. */
|
||||
+ std::string line;
|
||||
+ while (*templ != '\n' && *templ != '\0')
|
||||
+ line += *templ++;
|
||||
+
|
||||
+ /* Apply any pre-processing needed to the line. */
|
||||
+ preprocess_compact_syntax (loc, alt_no, line, last_line);
|
||||
+ new_templ.append (line);
|
||||
+ last_line = line;
|
||||
+
|
||||
+ /* Normal "*..." syntax expects the closing quote to be on the final
|
||||
+ line of asm, whereas we allow the closing "}" to be on its own line.
|
||||
+ Postpone copying the '\n' until we know that there is another
|
||||
+ alternative in the list. */
|
||||
+ while (ISSPACE (*templ))
|
||||
+ templ++;
|
||||
+ ++alt_no;
|
||||
+ }
|
||||
+
|
||||
+ /* Write the constraints and attributes into their proper places. */
|
||||
+ if (convec.size () > 0)
|
||||
+ add_constraints (x, loc, convec);
|
||||
+
|
||||
+ if (attrvec.size () > 0)
|
||||
+ add_attributes (x, attrvec);
|
||||
+
|
||||
+ /* Copy over the new un-constrainified template. */
|
||||
+ XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
|
||||
+
|
||||
+ /* Register for later checks during iterator expansions. */
|
||||
+ compact_syntax.add (x);
|
||||
+}
|
||||
+
|
||||
/* Process a top level rtx in some way, queuing as appropriate. */
|
||||
|
||||
static void
|
||||
@@ -553,10 +1048,12 @@ process_rtx (rtx desc, file_location loc)
|
||||
switch (GET_CODE (desc))
|
||||
{
|
||||
case DEFINE_INSN:
|
||||
+ convert_syntax (desc, loc);
|
||||
queue_pattern (desc, &define_insn_tail, loc);
|
||||
break;
|
||||
|
||||
case DEFINE_COND_EXEC:
|
||||
+ convert_syntax (desc, loc);
|
||||
queue_pattern (desc, &define_cond_exec_tail, loc);
|
||||
break;
|
||||
|
||||
@@ -631,6 +1128,7 @@ process_rtx (rtx desc, file_location loc)
|
||||
attr = XVEC (desc, split_code + 1);
|
||||
PUT_CODE (desc, DEFINE_INSN);
|
||||
XVEC (desc, 4) = attr;
|
||||
+ convert_syntax (desc, loc);
|
||||
|
||||
/* Queue them. */
|
||||
insn_elem = queue_pattern (desc, &define_insn_tail, loc);
|
||||
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
|
||||
index 9a0fd7393..a19fc1319 100644
|
||||
--- a/gcc/gensupport.h
|
||||
+++ b/gcc/gensupport.h
|
||||
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#ifndef GCC_GENSUPPORT_H
|
||||
#define GCC_GENSUPPORT_H
|
||||
|
||||
+#include "hash-set.h"
|
||||
#include "read-md.h"
|
||||
|
||||
struct obstack;
|
||||
@@ -218,6 +219,8 @@ struct pattern_stats
|
||||
int num_operand_vars;
|
||||
};
|
||||
|
||||
+extern hash_set<rtx> compact_syntax;
|
||||
+
|
||||
extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
|
||||
extern void compute_test_codes (rtx, file_location, char *);
|
||||
extern file_location get_file_location (rtx);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
104
0146-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
Normal file
104
0146-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
Normal file
@ -0,0 +1,104 @@
|
||||
From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Wed, 13 Sep 2023 14:50:30 +0100
|
||||
Subject: [PATCH 054/157] [Backport][SME] recog: Improve parser for pattern new
|
||||
compact syntax
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7
|
||||
|
||||
Hi all,
|
||||
|
||||
this is to add support to the new compact pattern syntax for the case
|
||||
where the constraints do appear unsorted like:
|
||||
|
||||
(define_insn "*<optab>si3_insn_uxtw"
|
||||
[(set (match_operand:DI 0 "register_operand")
|
||||
(zero_extend:DI (SHIFT_no_rotate:SI
|
||||
(match_operand:SI 1 "register_operand")
|
||||
(match_operand:QI 2 "aarch64_reg_or_shift_imm_si"))))]
|
||||
""
|
||||
{@ [cons: =0, 2, 1]
|
||||
[ r, Uss, r] <shift>\\t%w0, %w1, %2
|
||||
[ r, r, r] <shift>\\t%w0, %w1, %w2
|
||||
}
|
||||
[(set_attr "type" "bfx,shift_reg")]
|
||||
)
|
||||
|
||||
Best Regards
|
||||
|
||||
Andrea
|
||||
|
||||
gcc/Changelog
|
||||
|
||||
2023-09-20 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gensupport.cc (convert_syntax): Updated to support unordered
|
||||
constraints in compact syntax.
|
||||
---
|
||||
gcc/gensupport.cc | 32 ++++++++++++++++----------------
|
||||
1 file changed, 16 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
|
||||
index 23c61dcdd..97c614850 100644
|
||||
--- a/gcc/gensupport.cc
|
||||
+++ b/gcc/gensupport.cc
|
||||
@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc)
|
||||
|
||||
parse_section_layout (loc, &templ, "cons:", tconvec, true);
|
||||
|
||||
- /* Check for any duplicate cons entries and sort based on i. */
|
||||
- for (auto e : tconvec)
|
||||
- {
|
||||
- unsigned idx = e.idx;
|
||||
- if (idx >= convec.size ())
|
||||
- convec.resize (idx + 1);
|
||||
-
|
||||
- if (convec[idx].idx >= 0)
|
||||
- fatal_at (loc, "duplicate cons number found: %d", idx);
|
||||
- convec[idx] = e;
|
||||
- }
|
||||
- tconvec.clear ();
|
||||
-
|
||||
if (*templ != ']')
|
||||
{
|
||||
if (*templ == ';')
|
||||
@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc)
|
||||
new_templ += '\n';
|
||||
new_templ.append (buffer);
|
||||
/* Parse the constraint list, then the attribute list. */
|
||||
- if (convec.size () > 0)
|
||||
- parse_section (&templ, convec.size (), alt_no, convec, loc,
|
||||
+ if (tconvec.size () > 0)
|
||||
+ parse_section (&templ, tconvec.size (), alt_no, tconvec, loc,
|
||||
"constraint");
|
||||
|
||||
if (attrvec.size () > 0)
|
||||
{
|
||||
- if (convec.size () > 0 && !expect_char (&templ, ';'))
|
||||
+ if (tconvec.size () > 0 && !expect_char (&templ, ';'))
|
||||
fatal_at (loc, "expected `;' to separate constraints "
|
||||
"and attributes in alternative %d", alt_no);
|
||||
|
||||
@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc)
|
||||
++alt_no;
|
||||
}
|
||||
|
||||
+ /* Check for any duplicate cons entries and sort based on i. */
|
||||
+ for (auto e : tconvec)
|
||||
+ {
|
||||
+ unsigned idx = e.idx;
|
||||
+ if (idx >= convec.size ())
|
||||
+ convec.resize (idx + 1);
|
||||
+
|
||||
+ if (convec[idx].idx >= 0)
|
||||
+ fatal_at (loc, "duplicate cons number found: %d", idx);
|
||||
+ convec[idx] = e;
|
||||
+ }
|
||||
+ tconvec.clear ();
|
||||
+
|
||||
/* Write the constraints and attributes into their proper places. */
|
||||
if (convec.size () > 0)
|
||||
add_constraints (x, loc, convec);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
49
0147-Backport-SME-recog-Support-space-in-cons.patch
Normal file
49
0147-Backport-SME-recog-Support-space-in-cons.patch
Normal file
@ -0,0 +1,49 @@
|
||||
From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001
|
||||
From: Andrea Corallo <andrea.corallo@arm.com>
|
||||
Date: Fri, 15 Sep 2023 10:23:02 +0200
|
||||
Subject: [PATCH 055/157] [Backport][SME] recog: Support space in "[ cons"
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d
|
||||
|
||||
Hi all,
|
||||
|
||||
this is to allow for spaces before "cons:" in the definitions of
|
||||
patterns using the new compact syntax, ex:
|
||||
|
||||
(define_insn "aarch64_simd_dup<mode>"
|
||||
[(set (match_operand:VDQ_I 0 "register_operand")
|
||||
(vec_duplicate:VDQ_I
|
||||
(match_operand:<VEL> 1 "register_operand")))]
|
||||
"TARGET_SIMD"
|
||||
{@ [ cons: =0 , 1 ; attrs: type ]
|
||||
[ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
|
||||
[ w , ?r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
|
||||
}
|
||||
)
|
||||
|
||||
gcc/Changelog
|
||||
|
||||
2023-09-20 Andrea Corallo <andrea.corallo@arm.com>
|
||||
|
||||
* gensupport.cc (convert_syntax): Skip spaces before "cons:"
|
||||
in new compact pattern syntax.
|
||||
---
|
||||
gcc/gensupport.cc | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
|
||||
index 97c614850..3d7a6d4fd 100644
|
||||
--- a/gcc/gensupport.cc
|
||||
+++ b/gcc/gensupport.cc
|
||||
@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc)
|
||||
if (!expect_char (&templ, '['))
|
||||
fatal_at (loc, "expecing `[' to begin section list");
|
||||
|
||||
+ skip_spaces (&templ);
|
||||
+
|
||||
parse_section_layout (loc, &templ, "cons:", tconvec, true);
|
||||
|
||||
if (*templ != ']')
|
||||
--
|
||||
2.33.0
|
||||
|
||||
164
0148-Backport-SME-aarch64-Generalise-require_immediate_la.patch
Normal file
164
0148-Backport-SME-aarch64-Generalise-require_immediate_la.patch
Normal file
@ -0,0 +1,164 @@
|
||||
From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:18 +0000
|
||||
Subject: [PATCH 056/157] [Backport][SME] aarch64: Generalise
|
||||
require_immediate_lane_index
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106
|
||||
|
||||
require_immediate_lane_index previously hard-coded the assumption
|
||||
that the group size is determined by the argument immediately before
|
||||
the index. However, for SME, there are cases where it should be
|
||||
determined by an earlier argument instead.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h:
|
||||
(function_checker::require_immediate_lane_index): Add an argument
|
||||
for the index of the indexed vector argument.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(function_checker::require_immediate_lane_index): Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
(ternary_bfloat_lane_base::check): Update accordingly.
|
||||
(ternary_qq_lane_base::check): Likewise.
|
||||
(binary_lane_def::check): Likewise.
|
||||
(binary_long_lane_def::check): Likewise.
|
||||
(ternary_lane_def::check): Likewise.
|
||||
(ternary_lane_rotate_def::check): Likewise.
|
||||
(ternary_long_lane_def::check): Likewise.
|
||||
(ternary_qq_lane_rotate_def::check): Likewise.
|
||||
---
|
||||
.../aarch64/aarch64-sve-builtins-shapes.cc | 16 ++++++++--------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 18 ++++++++++++------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 3 ++-
|
||||
3 files changed, 22 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
index f57f92698..4fa4181b9 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (3, N);
|
||||
+ return c.require_immediate_lane_index (3, 2, N);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -956,7 +956,7 @@ struct ternary_qq_lane_base
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (3, 4);
|
||||
+ return c.require_immediate_lane_index (3, 0);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0>
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (2);
|
||||
+ return c.require_immediate_lane_index (2, 1);
|
||||
}
|
||||
};
|
||||
SHAPE (binary_lane)
|
||||
@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0>
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (2);
|
||||
+ return c.require_immediate_lane_index (2, 1);
|
||||
}
|
||||
};
|
||||
SHAPE (binary_long_lane)
|
||||
@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0>
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (3);
|
||||
+ return c.require_immediate_lane_index (3, 2);
|
||||
}
|
||||
};
|
||||
SHAPE (ternary_lane)
|
||||
@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0>
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return (c.require_immediate_lane_index (3, 2)
|
||||
+ return (c.require_immediate_lane_index (3, 2, 2)
|
||||
&& c.require_immediate_one_of (4, 0, 90, 180, 270));
|
||||
}
|
||||
};
|
||||
@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return c.require_immediate_lane_index (3);
|
||||
+ return c.require_immediate_lane_index (3, 2);
|
||||
}
|
||||
};
|
||||
SHAPE (ternary_long_lane)
|
||||
@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0>
|
||||
bool
|
||||
check (function_checker &c) const OVERRIDE
|
||||
{
|
||||
- return (c.require_immediate_lane_index (3, 4)
|
||||
+ return (c.require_immediate_lane_index (3, 0)
|
||||
&& c.require_immediate_one_of (4, 0, 90, 180, 270));
|
||||
}
|
||||
};
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index 91af96687..7924cdf0f 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
|
||||
return false;
|
||||
}
|
||||
|
||||
-/* Check that argument REL_ARGNO is suitable for indexing argument
|
||||
- REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts
|
||||
- from the end of the predication arguments. */
|
||||
+/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into
|
||||
+ groups of GROUP_SIZE elements. Return true if argument REL_ARGNO is
|
||||
+ a suitable constant index for selecting one of these groups. The
|
||||
+ selection happens within a 128-bit quadword, rather than the whole vector.
|
||||
+
|
||||
+ REL_ARGNO and REL_VEC_ARGNO count from the end of the predication
|
||||
+ arguments. */
|
||||
bool
|
||||
function_checker::require_immediate_lane_index (unsigned int rel_argno,
|
||||
+ unsigned int rel_vec_argno,
|
||||
unsigned int group_size)
|
||||
{
|
||||
unsigned int argno = m_base_arg + rel_argno;
|
||||
if (!argument_exists_p (argno))
|
||||
return true;
|
||||
|
||||
- /* Get the type of the previous argument. tree_argument_type wants a
|
||||
- 1-based number, whereas ARGNO is 0-based. */
|
||||
- machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno));
|
||||
+ /* Get the type of the vector argument. tree_argument_type wants a
|
||||
+ 1-based number, whereas VEC_ARGNO is 0-based. */
|
||||
+ unsigned int vec_argno = m_base_arg + rel_vec_argno;
|
||||
+ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1));
|
||||
gcc_assert (VECTOR_MODE_P (mode));
|
||||
unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode));
|
||||
return require_immediate_range (rel_argno, 0, nlanes - 1);
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 52994cde0..824c31cd7 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -463,7 +463,8 @@ public:
|
||||
bool require_immediate_either_or (unsigned int, HOST_WIDE_INT,
|
||||
HOST_WIDE_INT);
|
||||
bool require_immediate_enum (unsigned int, tree);
|
||||
- bool require_immediate_lane_index (unsigned int, unsigned int = 1);
|
||||
+ bool require_immediate_lane_index (unsigned int, unsigned int,
|
||||
+ unsigned int = 1);
|
||||
bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT,
|
||||
HOST_WIDE_INT, HOST_WIDE_INT);
|
||||
bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
469
0149-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
Normal file
469
0149-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
Normal file
@ -0,0 +1,469 @@
|
||||
From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001
|
||||
From: Christophe Lyon <christophe.lyon@arm.com>
|
||||
Date: Fri, 11 Mar 2022 16:21:02 +0000
|
||||
Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22
|
||||
|
||||
This patch updates the aarch64 backend as needed to support DFP modes
|
||||
(SD, DD and TD).
|
||||
|
||||
Changes v1->v2:
|
||||
|
||||
* Drop support for DFP modes in
|
||||
aarch64_gen_{load||store}[wb]_pair as these are only used in
|
||||
prologue/epilogue where DFP modes are not used. Drop the
|
||||
changes to the corresponding patterns in aarch64.md, and
|
||||
useless GPF_PAIR iterator.
|
||||
|
||||
* In aarch64_reinterpret_float_as_int, handle DDmode the same way
|
||||
as DFmode (needed in case the representation of the
|
||||
floating-point value can be loaded using mov/movk.
|
||||
|
||||
* In aarch64_float_const_zero_rtx_p, reject constants with DFP
|
||||
mode: when X is zero, the callers want to emit either '0' or
|
||||
'zr' depending on the context, which is not the way 0.0 is
|
||||
represented in DFP mode (in particular fmov d0, #0 is not right
|
||||
for DFP).
|
||||
|
||||
* In aarch64_legitimate_constant_p, accept DFP
|
||||
|
||||
2022-03-31 Christophe Lyon <christophe.lyon@arm.com>
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc
|
||||
(aarch64_split_128bit_move): Handle DFP modes.
|
||||
(aarch64_mode_valid_for_sched_fusion_p): Likewise.
|
||||
(aarch64_classify_address): Likewise.
|
||||
(aarch64_legitimize_address_displacement): Likewise.
|
||||
(aarch64_reinterpret_float_as_int): Likewise.
|
||||
(aarch64_float_const_zero_rtx_p): Likewise.
|
||||
(aarch64_can_const_movi_rtx_p): Likewise.
|
||||
(aarch64_anchor_offset): Likewise.
|
||||
(aarch64_secondary_reload): Likewise.
|
||||
(aarch64_rtx_costs): Likewise.
|
||||
(aarch64_legitimate_constant_p): Likewise.
|
||||
(aarch64_gimplify_va_arg_expr): Likewise.
|
||||
(aapcs_vfp_sub_candidate): Likewise.
|
||||
(aarch64_vfp_is_call_or_return_candidate): Likewise.
|
||||
(aarch64_output_scalar_simd_mov_immediate): Likewise.
|
||||
(aarch64_gen_adjusted_ldpstp): Likewise.
|
||||
(aarch64_scalar_mode_supported_p): Accept DFP modes if enabled.
|
||||
* config/aarch64/aarch64.md
|
||||
(movsf_aarch64): Use SFD iterator and rename into
|
||||
mov<mode>_aarch64.
|
||||
(movdf_aarch64): Use DFD iterator and rename into
|
||||
mov<mode>_aarch64.
|
||||
(movtf_aarch64): Use TFD iterator and rename into
|
||||
mov<mode>_aarch64.
|
||||
(split pattern for move TF mode): Use TFD iterator.
|
||||
* config/aarch64/iterators.md
|
||||
(GPF_TF_F16_MOV): Add DFP modes.
|
||||
(SFD, DFD, TFD): New iterators.
|
||||
(GPF_TF): Add DFP modes.
|
||||
(TX, DX, DX2): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++-----------
|
||||
gcc/config/aarch64/aarch64.md | 34 +++++++-------
|
||||
gcc/config/aarch64/iterators.md | 24 +++++++---
|
||||
3 files changed, 89 insertions(+), 51 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 055b436b1..02210ed13 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
|
||||
|
||||
machine_mode mode = GET_MODE (dst);
|
||||
|
||||
- gcc_assert (mode == TImode || mode == TFmode);
|
||||
+ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode);
|
||||
gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
|
||||
gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
|
||||
|
||||
@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
|
||||
{
|
||||
return mode == SImode || mode == DImode
|
||||
|| mode == SFmode || mode == DFmode
|
||||
+ || mode == SDmode || mode == DDmode
|
||||
|| (aarch64_vector_mode_supported_p (mode)
|
||||
&& (known_eq (GET_MODE_SIZE (mode), 8)
|
||||
|| (known_eq (GET_MODE_SIZE (mode), 16)
|
||||
@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
||||
vec_flags &= ~VEC_PARTIAL;
|
||||
|
||||
/* On BE, we use load/store pair for all large int mode load/stores.
|
||||
- TI/TFmode may also use a load/store pair. */
|
||||
+ TI/TF/TDmode may also use a load/store pair. */
|
||||
bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
|
||||
bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
|
||||
|| type == ADDR_QUERY_LDP_STP_N
|
||||
|| mode == TImode
|
||||
|| mode == TFmode
|
||||
+ || mode == TDmode
|
||||
|| (BYTES_BIG_ENDIAN && advsimd_struct_p));
|
||||
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
|
||||
corresponds to the actual size of the memory being loaded/stored and the
|
||||
@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
||||
info->offset = op1;
|
||||
info->const_offset = offset;
|
||||
|
||||
- /* TImode and TFmode values are allowed in both pairs of X
|
||||
+ /* TImode, TFmode and TDmode values are allowed in both pairs of X
|
||||
registers and individual Q registers. The available
|
||||
address modes are:
|
||||
X,X: 7-bit signed scaled offset
|
||||
@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
||||
When performing the check for pairs of X registers i.e. LDP/STP
|
||||
pass down DImode since that is the natural size of the LDP/STP
|
||||
instruction memory accesses. */
|
||||
- if (mode == TImode || mode == TFmode)
|
||||
+ if (mode == TImode || mode == TFmode || mode == TDmode)
|
||||
return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
|
||||
&& (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
|
||||
|| offset_12bit_unsigned_scaled_p (mode, offset)));
|
||||
@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
|
||||
info->offset = XEXP (XEXP (x, 1), 1);
|
||||
info->const_offset = offset;
|
||||
|
||||
- /* TImode and TFmode values are allowed in both pairs of X
|
||||
+ /* TImode, TFmode and TDmode values are allowed in both pairs of X
|
||||
registers and individual Q registers. The available
|
||||
address modes are:
|
||||
X,X: 7-bit signed scaled offset
|
||||
Q: 9-bit signed offset
|
||||
We conservatively require an offset representable in either mode.
|
||||
*/
|
||||
- if (mode == TImode || mode == TFmode)
|
||||
+ if (mode == TImode || mode == TFmode || mode == TDmode)
|
||||
return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
|
||||
&& aarch64_offset_9bit_signed_unscaled_p (mode, offset));
|
||||
|
||||
@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
|
||||
offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
|
||||
range otherwise to increase opportunities for sharing the base
|
||||
address of different sizes. Unaligned accesses use the signed
|
||||
- 9-bit range, TImode/TFmode use the intersection of signed
|
||||
+ 9-bit range, TImode/TFmode/TDmode use the intersection of signed
|
||||
scaled 7-bit and signed 9-bit offset. */
|
||||
- if (mode == TImode || mode == TFmode)
|
||||
+ if (mode == TImode || mode == TFmode || mode == TDmode)
|
||||
second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
|
||||
else if ((const_offset & (size - 1)) != 0)
|
||||
second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
|
||||
@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
|
||||
CONST_DOUBLE_REAL_VALUE (value),
|
||||
REAL_MODE_FORMAT (mode));
|
||||
|
||||
- if (mode == DFmode)
|
||||
+ if (mode == DFmode || mode == DDmode)
|
||||
{
|
||||
int order = BYTES_BIG_ENDIAN ? 1 : 0;
|
||||
ival = zext_hwi (res[order], 32);
|
||||
@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x)
|
||||
return false;
|
||||
}
|
||||
|
||||
-/* Return TRUE if rtx X is immediate constant 0.0 */
|
||||
+/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal
|
||||
+ Floating Point). */
|
||||
bool
|
||||
aarch64_float_const_zero_rtx_p (rtx x)
|
||||
{
|
||||
- if (GET_MODE (x) == VOIDmode)
|
||||
+ /* 0.0 in Decimal Floating Point cannot be represented by #0 or
|
||||
+ zr as our callers expect, so no need to check the actual
|
||||
+ value if X is of Decimal Floating Point type. */
|
||||
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT)
|
||||
return false;
|
||||
|
||||
if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
|
||||
@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
|
||||
else
|
||||
return false;
|
||||
|
||||
- /* use a 64 bit mode for everything except for DI/DF mode, where we use
|
||||
+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
|
||||
a 128 bit vector mode. */
|
||||
int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
|
||||
|
||||
@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
|
||||
if (IN_RANGE (offset, -256, 0))
|
||||
return 0;
|
||||
|
||||
- if (mode == TImode || mode == TFmode)
|
||||
+ if (mode == TImode || mode == TFmode || mode == TDmode)
|
||||
return (offset + 0x100) & ~0x1ff;
|
||||
|
||||
/* Use 12-bit offset by access size. */
|
||||
@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||||
|
||||
/* Without the TARGET_SIMD instructions we cannot move a Q register
|
||||
to a Q register directly. We need a scratch. */
|
||||
- if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
|
||||
+ if (REG_P (x)
|
||||
+ && (mode == TFmode || mode == TImode || mode == TDmode)
|
||||
+ && mode == GET_MODE (x)
|
||||
&& FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
|
||||
&& reg_class_subset_p (rclass, FP_REGS))
|
||||
{
|
||||
@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||||
return NO_REGS;
|
||||
}
|
||||
|
||||
- /* A TFmode or TImode memory access should be handled via an FP_REGS
|
||||
+ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS
|
||||
because AArch64 has richer addressing modes for LDR/STR instructions
|
||||
than LDP/STP instructions. */
|
||||
if (TARGET_FLOAT && rclass == GENERAL_REGS
|
||||
&& known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
|
||||
return FP_REGS;
|
||||
|
||||
- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
|
||||
+ if (rclass == FP_REGS
|
||||
+ && (mode == TImode || mode == TFmode || mode == TDmode)
|
||||
+ && CONSTANT_P(x))
|
||||
return GENERAL_REGS;
|
||||
|
||||
return NO_REGS;
|
||||
@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
|
||||
*cost += extra_cost->ldst.storev;
|
||||
else if (GET_MODE_CLASS (mode) == MODE_INT)
|
||||
*cost += extra_cost->ldst.store;
|
||||
- else if (mode == SFmode)
|
||||
+ else if (mode == SFmode || mode == SDmode)
|
||||
*cost += extra_cost->ldst.storef;
|
||||
- else if (mode == DFmode)
|
||||
+ else if (mode == DFmode || mode == DDmode)
|
||||
*cost += extra_cost->ldst.stored;
|
||||
|
||||
*cost +=
|
||||
@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
|
||||
/* mov[df,sf]_aarch64. */
|
||||
if (aarch64_float_const_representable_p (x))
|
||||
/* FMOV (scalar immediate). */
|
||||
- *cost += extra_cost->fp[mode == DFmode].fpconst;
|
||||
+ *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
|
||||
else if (!aarch64_float_const_zero_rtx_p (x))
|
||||
{
|
||||
/* This will be a load from memory. */
|
||||
- if (mode == DFmode)
|
||||
+ if (mode == DFmode || mode == DDmode)
|
||||
*cost += extra_cost->ldst.loadd;
|
||||
else
|
||||
*cost += extra_cost->ldst.loadf;
|
||||
@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
|
||||
*cost += extra_cost->ldst.loadv;
|
||||
else if (GET_MODE_CLASS (mode) == MODE_INT)
|
||||
*cost += extra_cost->ldst.load;
|
||||
- else if (mode == SFmode)
|
||||
+ else if (mode == SFmode || mode == SDmode)
|
||||
*cost += extra_cost->ldst.loadf;
|
||||
- else if (mode == DFmode)
|
||||
+ else if (mode == DFmode || mode == DDmode)
|
||||
*cost += extra_cost->ldst.loadd;
|
||||
|
||||
*cost +=
|
||||
@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
|
||||
{
|
||||
/* Support CSE and rematerialization of common constants. */
|
||||
if (CONST_INT_P (x)
|
||||
- || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT))
|
||||
+ || CONST_DOUBLE_P (x))
|
||||
return true;
|
||||
|
||||
/* Only accept variable-length vector constants if they can be
|
||||
@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
|
||||
field_t = long_double_type_node;
|
||||
field_ptr_t = long_double_ptr_type_node;
|
||||
break;
|
||||
+ case SDmode:
|
||||
+ field_t = dfloat32_type_node;
|
||||
+ field_ptr_t = build_pointer_type (dfloat32_type_node);
|
||||
+ break;
|
||||
+ case DDmode:
|
||||
+ field_t = dfloat64_type_node;
|
||||
+ field_ptr_t = build_pointer_type (dfloat64_type_node);
|
||||
+ break;
|
||||
+ case TDmode:
|
||||
+ field_t = dfloat128_type_node;
|
||||
+ field_ptr_t = build_pointer_type (dfloat128_type_node);
|
||||
+ break;
|
||||
case E_HFmode:
|
||||
field_t = aarch64_fp16_type_node;
|
||||
field_ptr_t = aarch64_fp16_ptr_type_node;
|
||||
@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
|
||||
case REAL_TYPE:
|
||||
mode = TYPE_MODE (type);
|
||||
if (mode != DFmode && mode != SFmode
|
||||
- && mode != TFmode && mode != HFmode)
|
||||
+ && mode != TFmode && mode != HFmode
|
||||
+ && mode != SDmode && mode != DDmode && mode != TDmode)
|
||||
return -1;
|
||||
|
||||
if (*modep == VOIDmode)
|
||||
@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
|
||||
machine_mode new_mode = VOIDmode;
|
||||
bool composite_p = aarch64_composite_type_p (type, mode);
|
||||
|
||||
- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
|
||||
+ if ((!composite_p
|
||||
+ && (GET_MODE_CLASS (mode) == MODE_FLOAT
|
||||
+ || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT))
|
||||
|| aarch64_short_vector_p (type, mode))
|
||||
{
|
||||
*count = 1;
|
||||
@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
|
||||
}
|
||||
|
||||
machine_mode vmode;
|
||||
- /* use a 64 bit mode for everything except for DI/DF mode, where we use
|
||||
+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
|
||||
a 128 bit vector mode. */
|
||||
int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
|
||||
|
||||
@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
|
||||
base_off = (off_val_1 + off_val_3) / 2;
|
||||
else
|
||||
/* However, due to issues with negative LDP/STP offset generation for
|
||||
- larger modes, for DF, DI and vector modes. we must not use negative
|
||||
+ larger modes, for DF, DD, DI and vector modes. we must not use negative
|
||||
addresses smaller than 9 signed unadjusted bits can store. This
|
||||
provides the most range in this case. */
|
||||
base_off = off_val_1;
|
||||
@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
|
||||
static bool
|
||||
aarch64_scalar_mode_supported_p (scalar_mode mode)
|
||||
{
|
||||
+ if (DECIMAL_FLOAT_MODE_P (mode))
|
||||
+ return default_decimal_float_supported_p ();
|
||||
+
|
||||
return (mode == HFmode
|
||||
? true
|
||||
: default_scalar_mode_supported_p (mode));
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index a78476c8a..8757a962f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1476,11 +1476,11 @@
|
||||
(set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
|
||||
)
|
||||
|
||||
-(define_insn "*movsf_aarch64"
|
||||
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
|
||||
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
|
||||
- "TARGET_FLOAT && (register_operand (operands[0], SFmode)
|
||||
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
|
||||
+(define_insn "*mov<mode>_aarch64"
|
||||
+ [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
|
||||
+ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
|
||||
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||||
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
movi\\t%0.2s, #0
|
||||
fmov\\t%s0, %w1
|
||||
@@ -1500,11 +1500,11 @@
|
||||
(set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
|
||||
)
|
||||
|
||||
-(define_insn "*movdf_aarch64"
|
||||
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
|
||||
- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
|
||||
- "TARGET_FLOAT && (register_operand (operands[0], DFmode)
|
||||
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
|
||||
+(define_insn "*mov<mode>_aarch64"
|
||||
+ [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
|
||||
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
|
||||
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||||
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
movi\\t%d0, #0
|
||||
fmov\\t%d0, %x1
|
||||
@@ -1545,13 +1545,13 @@
|
||||
}
|
||||
)
|
||||
|
||||
-(define_insn "*movtf_aarch64"
|
||||
- [(set (match_operand:TF 0
|
||||
+(define_insn "*mov<mode>_aarch64"
|
||||
+ [(set (match_operand:TFD 0
|
||||
"nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
|
||||
- (match_operand:TF 1
|
||||
+ (match_operand:TFD 1
|
||||
"general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
|
||||
- "TARGET_FLOAT && (register_operand (operands[0], TFmode)
|
||||
- || aarch64_reg_or_fp_zero (operands[1], TFmode))"
|
||||
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||||
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
mov\\t%0.16b, %1.16b
|
||||
#
|
||||
@@ -1571,8 +1571,8 @@
|
||||
)
|
||||
|
||||
(define_split
|
||||
- [(set (match_operand:TF 0 "register_operand" "")
|
||||
- (match_operand:TF 1 "nonmemory_operand" ""))]
|
||||
+ [(set (match_operand:TFD 0 "register_operand" "")
|
||||
+ (match_operand:TFD 1 "nonmemory_operand" ""))]
|
||||
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
|
||||
[(const_int 0)]
|
||||
{
|
||||
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
||||
index 967e6b0b1..d0cd1b788 100644
|
||||
--- a/gcc/config/aarch64/iterators.md
|
||||
+++ b/gcc/config/aarch64/iterators.md
|
||||
@@ -67,14 +67,24 @@
|
||||
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
|
||||
|
||||
;; Iterator for all scalar floating point modes suitable for moving, including
|
||||
-;; special BF type (HF, SF, DF, TF and BF)
|
||||
-(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
|
||||
+;; special BF type and decimal floating point types (HF, SF, DF, TF, BF,
|
||||
+;; SD, DD and TD)
|
||||
+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD])
|
||||
+
|
||||
+;; Iterator for scalar 32bit fp modes (SF, SD)
|
||||
+(define_mode_iterator SFD [SD SF])
|
||||
+
|
||||
+;; Iterator for scalar 64bit fp modes (DF, DD)
|
||||
+(define_mode_iterator DFD [DD DF])
|
||||
+
|
||||
+;; Iterator for scalar 128bit fp modes (TF, TD)
|
||||
+(define_mode_iterator TFD [TD TF])
|
||||
|
||||
;; Double vector modes.
|
||||
(define_mode_iterator VDF [V2SF V4HF])
|
||||
|
||||
-;; Iterator for all scalar floating point modes (SF, DF and TF)
|
||||
-(define_mode_iterator GPF_TF [SF DF TF])
|
||||
+;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD)
|
||||
+(define_mode_iterator GPF_TF [SF DF TF SD DD TD])
|
||||
|
||||
;; Integer Advanced SIMD modes.
|
||||
(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
|
||||
@@ -301,7 +311,7 @@
|
||||
;; 2 and 4 lane SI modes.
|
||||
(define_mode_iterator VS [V2SI V4SI])
|
||||
|
||||
-(define_mode_iterator TX [TI TF])
|
||||
+(define_mode_iterator TX [TI TF TD])
|
||||
|
||||
;; Advanced SIMD opaque structure modes.
|
||||
(define_mode_iterator VSTRUCT [OI CI XI])
|
||||
@@ -403,10 +413,10 @@
|
||||
V4x8HF V4x4SF V4x2DF V4x8BF])
|
||||
|
||||
;; Double scalar modes
|
||||
-(define_mode_iterator DX [DI DF])
|
||||
+(define_mode_iterator DX [DI DF DD])
|
||||
|
||||
;; Duplicate of the above
|
||||
-(define_mode_iterator DX2 [DI DF])
|
||||
+(define_mode_iterator DX2 [DI DF DD])
|
||||
|
||||
;; Single scalar modes
|
||||
(define_mode_iterator SX [SI SF])
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1824
0150-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
Normal file
1824
0150-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
Normal file
File diff suppressed because it is too large
Load Diff
213
0151-Backport-SME-aarch64-Simplify-output-template-emissi.patch
Normal file
213
0151-Backport-SME-aarch64-Simplify-output-template-emissi.patch
Normal file
@ -0,0 +1,213 @@
|
||||
From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001
|
||||
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
Date: Wed, 31 May 2023 11:23:23 +0100
|
||||
Subject: [PATCH 059/157] [Backport][SME] aarch64: Simplify output template
|
||||
emission code for a few patterns
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750
|
||||
|
||||
If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always
|
||||
replace it with more compact MD syntax for each alternative in a mult-alternative '@' block.
|
||||
This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise.
|
||||
No behavioural change intended.
|
||||
|
||||
Bootstrapped and tested on aarch64-none-linux-gnu.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Rewrite
|
||||
output template to avoid explicit switch on which_alternative.
|
||||
(*aarch64_simd_mov<VQMOV:mode>): Likewise.
|
||||
(and<mode>3): Likewise.
|
||||
(ior<mode>3): Likewise.
|
||||
* config/aarch64/aarch64.md (*mov<mode>_aarch64): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 97 +++++++++---------------------
|
||||
gcc/config/aarch64/aarch64.md | 42 ++++---------
|
||||
2 files changed, 40 insertions(+), 99 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index ef7fc4ecb..2d688edf5 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -122,28 +122,16 @@
|
||||
"TARGET_FLOAT
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||||
-{
|
||||
- switch (which_alternative)
|
||||
- {
|
||||
- case 0: return "ldr\t%d0, %1";
|
||||
- case 1: return "str\txzr, %0";
|
||||
- case 2: return "str\t%d1, %0";
|
||||
- case 3:
|
||||
- if (TARGET_SIMD)
|
||||
- return "mov\t%0.<Vbtype>, %1.<Vbtype>";
|
||||
- return "fmov\t%d0, %d1";
|
||||
- case 4:
|
||||
- if (TARGET_SIMD)
|
||||
- return "umov\t%0, %1.d[0]";
|
||||
- return "fmov\t%x0, %d1";
|
||||
- case 5: return "fmov\t%d0, %1";
|
||||
- case 6: return "mov\t%0, %1";
|
||||
- case 7:
|
||||
- return aarch64_output_simd_mov_immediate (operands[1], 64);
|
||||
- case 8: return "fmov\t%d0, xzr";
|
||||
- default: gcc_unreachable ();
|
||||
- }
|
||||
-}
|
||||
+ "@
|
||||
+ ldr\t%d0, %1
|
||||
+ str\txzr, %0
|
||||
+ str\t%d1, %0
|
||||
+ * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
|
||||
+ * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
|
||||
+ fmov\t%d0, %1
|
||||
+ mov\t%0, %1
|
||||
+ * return aarch64_output_simd_mov_immediate (operands[1], 64);
|
||||
+ fmov\t%d0, xzr"
|
||||
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
|
||||
neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
||||
mov_reg, neon_move<q>, f_mcr")
|
||||
@@ -158,29 +146,16 @@
|
||||
"TARGET_FLOAT
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||||
-{
|
||||
- switch (which_alternative)
|
||||
- {
|
||||
- case 0:
|
||||
- return "ldr\t%q0, %1";
|
||||
- case 1:
|
||||
- return "stp\txzr, xzr, %0";
|
||||
- case 2:
|
||||
- return "str\t%q1, %0";
|
||||
- case 3:
|
||||
- return "mov\t%0.<Vbtype>, %1.<Vbtype>";
|
||||
- case 4:
|
||||
- case 5:
|
||||
- case 6:
|
||||
- return "#";
|
||||
- case 7:
|
||||
- return aarch64_output_simd_mov_immediate (operands[1], 128);
|
||||
- case 8:
|
||||
- return "fmov\t%d0, xzr";
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
- }
|
||||
-}
|
||||
+ "@
|
||||
+ ldr\t%q0, %1
|
||||
+ stp\txzr, xzr, %0
|
||||
+ str\t%q1, %0
|
||||
+ mov\t%0.<Vbtype>, %1.<Vbtype>
|
||||
+ #
|
||||
+ #
|
||||
+ #
|
||||
+ * return aarch64_output_simd_mov_immediate (operands[1], 128);
|
||||
+ fmov\t%d0, xzr"
|
||||
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
|
||||
neon_logic<q>, multiple, multiple,\
|
||||
multiple, neon_move<q>, fmov")
|
||||
@@ -1004,18 +979,10 @@
|
||||
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
|
||||
(match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
|
||||
"TARGET_SIMD"
|
||||
- {
|
||||
- switch (which_alternative)
|
||||
- {
|
||||
- case 0:
|
||||
- return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
|
||||
- case 1:
|
||||
- return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
|
||||
- AARCH64_CHECK_BIC);
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
- }
|
||||
- }
|
||||
+ "@
|
||||
+ and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
|
||||
+ * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
|
||||
+ AARCH64_CHECK_BIC);"
|
||||
[(set_attr "type" "neon_logic<q>")]
|
||||
)
|
||||
|
||||
@@ -1025,18 +992,10 @@
|
||||
(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
|
||||
(match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
|
||||
"TARGET_SIMD"
|
||||
- {
|
||||
- switch (which_alternative)
|
||||
- {
|
||||
- case 0:
|
||||
- return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
|
||||
- case 1:
|
||||
- return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
|
||||
- AARCH64_CHECK_ORR);
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
- }
|
||||
- }
|
||||
+ "@
|
||||
+ orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
|
||||
+ * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
|
||||
+ AARCH64_CHECK_ORR);"
|
||||
[(set_attr "type" "neon_logic<q>")]
|
||||
)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index c0cc91756..7454a5c77 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1198,36 +1198,18 @@
|
||||
(match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
|
||||
"(register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
|
||||
-{
|
||||
- switch (which_alternative)
|
||||
- {
|
||||
- case 0:
|
||||
- return "mov\t%w0, %w1";
|
||||
- case 1:
|
||||
- return "mov\t%w0, %1";
|
||||
- case 2:
|
||||
- return aarch64_output_scalar_simd_mov_immediate (operands[1],
|
||||
- <MODE>mode);
|
||||
- case 3:
|
||||
- return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
- case 4:
|
||||
- return "ldr<size>\t%w0, %1";
|
||||
- case 5:
|
||||
- return "ldr\t%<size>0, %1";
|
||||
- case 6:
|
||||
- return "str<size>\t%w1, %0";
|
||||
- case 7:
|
||||
- return "str\t%<size>1, %0";
|
||||
- case 8:
|
||||
- return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
|
||||
- case 9:
|
||||
- return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
|
||||
- case 10:
|
||||
- return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
|
||||
- default:
|
||||
- gcc_unreachable ();
|
||||
- }
|
||||
-}
|
||||
+ "@
|
||||
+ mov\t%w0, %w1
|
||||
+ mov\t%w0, %1
|
||||
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||||
+ * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
+ ldr<size>\t%w0, %1
|
||||
+ ldr\t%<size>0, %1
|
||||
+ str<size>\t%w1, %0
|
||||
+ str\t%<size>1, %0
|
||||
+ * return TARGET_SIMD ? \"umov\t%w0, %1.<v>[0]\" : \"fmov\t%w0, %s1\";
|
||||
+ * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\";
|
||||
+ * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>[0]\" : \"fmov\t%s0, %s1\";"
|
||||
;; The "mov_imm" type for CNT is just a placeholder.
|
||||
[(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
|
||||
store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
|
||||
--
|
||||
2.33.0
|
||||
|
||||
631
0152-Backport-SME-Improve-immediate-expansion-PR106583.patch
Normal file
631
0152-Backport-SME-Improve-immediate-expansion-PR106583.patch
Normal file
@ -0,0 +1,631 @@
|
||||
From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001
|
||||
From: Wilco Dijkstra <wdijkstr@arm.com>
|
||||
Date: Mon, 24 Oct 2022 15:14:14 +0100
|
||||
Subject: [PATCH 060/157] [Backport][SME] Improve immediate expansion
|
||||
[PR106583]
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d
|
||||
|
||||
Improve immediate expansion of immediates which can be created from a
|
||||
bitmask immediate and 2 MOVKs. Simplify, refactor and improve efficiency
|
||||
of bitmask checks. Move various immediate handling functions together
|
||||
to avoid forward declarations.
|
||||
|
||||
This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%.
|
||||
|
||||
gcc/
|
||||
|
||||
PR target/106583
|
||||
* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
|
||||
Add support for a bitmask immediate with 2 MOVKs.
|
||||
(aarch64_check_bitmask): New function after refactorization.
|
||||
(aarch64_bitmask_imm): Simplify replication of small modes.
|
||||
Split function into 64-bit only version for efficiency.
|
||||
(aarch64_move_imm): Move near other immediate functions.
|
||||
(aarch64_uimm12_shift): Likewise.
|
||||
(aarch64_clamp_to_uimm12_shift): Likewise.
|
||||
(aarch64_movk_shift): Likewise.
|
||||
(aarch64_replicate_bitmask_imm): Likewise.
|
||||
(aarch64_and_split_imm1): Likewise.
|
||||
(aarch64_and_split_imm2): Likewise.
|
||||
(aarch64_and_bitmask_imm): Likewise.
|
||||
(aarch64_movw_imm): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
PR target/106583
|
||||
* gcc.target/aarch64/pr106583.c: Add new test.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 485 +++++++++++---------
|
||||
gcc/testsuite/gcc.target/aarch64/pr106583.c | 41 ++
|
||||
2 files changed, 301 insertions(+), 225 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index b4b646fa0..cf7736994 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
|
||||
static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
|
||||
static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
|
||||
aarch64_addr_query_type);
|
||||
-static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
|
||||
|
||||
/* The processor for which instructions should be scheduled. */
|
||||
enum aarch64_processor aarch64_tune = cortexa53;
|
||||
@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
|
||||
factor, nelts_per_vq);
|
||||
}
|
||||
|
||||
+/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
|
||||
+
|
||||
+static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
|
||||
+ {
|
||||
+ 0x0000000100000001ull,
|
||||
+ 0x0001000100010001ull,
|
||||
+ 0x0101010101010101ull,
|
||||
+ 0x1111111111111111ull,
|
||||
+ 0x5555555555555555ull,
|
||||
+ };
|
||||
+
|
||||
+
|
||||
+
|
||||
+/* Return true if 64-bit VAL is a valid bitmask immediate. */
|
||||
+static bool
|
||||
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
|
||||
+{
|
||||
+ unsigned HOST_WIDE_INT tmp, mask, first_one, next_one;
|
||||
+ int bits;
|
||||
+
|
||||
+ /* Check for a single sequence of one bits and return quickly if so.
|
||||
+ The special cases of all ones and all zeroes returns false. */
|
||||
+ tmp = val + (val & -val);
|
||||
+
|
||||
+ if (tmp == (tmp & -tmp))
|
||||
+ return (val + 1) > 1;
|
||||
+
|
||||
+ /* Invert if the immediate doesn't start with a zero bit - this means we
|
||||
+ only need to search for sequences of one bits. */
|
||||
+ if (val & 1)
|
||||
+ val = ~val;
|
||||
+
|
||||
+ /* Find the first set bit and set tmp to val with the first sequence of one
|
||||
+ bits removed. Return success if there is a single sequence of ones. */
|
||||
+ first_one = val & -val;
|
||||
+ tmp = val & (val + first_one);
|
||||
+
|
||||
+ if (tmp == 0)
|
||||
+ return true;
|
||||
+
|
||||
+ /* Find the next set bit and compute the difference in bit position. */
|
||||
+ next_one = tmp & -tmp;
|
||||
+ bits = clz_hwi (first_one) - clz_hwi (next_one);
|
||||
+ mask = val ^ tmp;
|
||||
+
|
||||
+ /* Check the bit position difference is a power of 2, and that the first
|
||||
+ sequence of one bits fits within 'bits' bits. */
|
||||
+ if ((mask >> bits) != 0 || bits != (bits & -bits))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Check the sequence of one bits is repeated 64/bits times. */
|
||||
+ return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Return true if VAL is a valid bitmask immediate for MODE. */
|
||||
+bool
|
||||
+aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
|
||||
+{
|
||||
+ if (mode == DImode)
|
||||
+ return aarch64_bitmask_imm (val_in);
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT val = val_in;
|
||||
+
|
||||
+ if (mode == SImode)
|
||||
+ return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
|
||||
+
|
||||
+ /* Replicate small immediates to fit 64 bits. */
|
||||
+ int size = GET_MODE_UNIT_PRECISION (mode);
|
||||
+ val &= (HOST_WIDE_INT_1U << size) - 1;
|
||||
+ val *= bitmask_imm_mul[__builtin_clz (size) - 26];
|
||||
+
|
||||
+ return aarch64_bitmask_imm (val);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Return true if the immediate VAL can be a bitfield immediate
|
||||
+ by changing the given MASK bits in VAL to zeroes, ones or bits
|
||||
+ from the other half of VAL. Return the new immediate in VAL2. */
|
||||
+static inline bool
|
||||
+aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
|
||||
+ unsigned HOST_WIDE_INT &val2,
|
||||
+ unsigned HOST_WIDE_INT mask)
|
||||
+{
|
||||
+ val2 = val & ~mask;
|
||||
+ if (val2 != val && aarch64_bitmask_imm (val2))
|
||||
+ return true;
|
||||
+ val2 = val | mask;
|
||||
+ if (val2 != val && aarch64_bitmask_imm (val2))
|
||||
+ return true;
|
||||
+ val = val & ~mask;
|
||||
+ val2 = val | (((val >> 32) | (val << 32)) & mask);
|
||||
+ if (val2 != val && aarch64_bitmask_imm (val2))
|
||||
+ return true;
|
||||
+ val2 = val | (((val >> 16) | (val << 48)) & mask);
|
||||
+ if (val2 != val && aarch64_bitmask_imm (val2))
|
||||
+ return true;
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Return true if val is an immediate that can be loaded into a
|
||||
+ register by a MOVZ instruction. */
|
||||
+static bool
|
||||
+aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
|
||||
+{
|
||||
+ if (GET_MODE_SIZE (mode) > 4)
|
||||
+ {
|
||||
+ if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
|
||||
+ || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
|
||||
+ return 1;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Ignore sign extension. */
|
||||
+ val &= (HOST_WIDE_INT) 0xffffffff;
|
||||
+ }
|
||||
+ return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
|
||||
+ || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Return true if VAL is an immediate that can be loaded into a
|
||||
+ register in a single instruction. */
|
||||
+bool
|
||||
+aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
|
||||
+{
|
||||
+ scalar_int_mode int_mode;
|
||||
+ if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
+ return false;
|
||||
+
|
||||
+ if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
|
||||
+ return 1;
|
||||
+ return aarch64_bitmask_imm (val, int_mode);
|
||||
+}
|
||||
+
|
||||
+
|
||||
static int
|
||||
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
scalar_int_mode mode)
|
||||
@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
|
||||
/* Check if we have to emit a second instruction by checking to see
|
||||
- if any of the upper 32 bits of the original DI mode value is set. */
|
||||
+ if any of the upper 32 bits of the original DI mode value is set. */
|
||||
if (val == val2)
|
||||
return 1;
|
||||
|
||||
@@ -5822,36 +5958,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
|
||||
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
|
||||
|
||||
- if (zero_match != 2 && one_match != 2)
|
||||
+ if (zero_match < 2 && one_match < 2)
|
||||
{
|
||||
/* Try emitting a bitmask immediate with a movk replacing 16 bits.
|
||||
For a 64-bit bitmask try whether changing 16 bits to all ones or
|
||||
zeroes creates a valid bitmask. To check any repeated bitmask,
|
||||
try using 16 bits from the other 32-bit half of val. */
|
||||
|
||||
- for (i = 0; i < 64; i += 16, mask <<= 16)
|
||||
- {
|
||||
- val2 = val & ~mask;
|
||||
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
|
||||
- break;
|
||||
- val2 = val | mask;
|
||||
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
|
||||
- break;
|
||||
- val2 = val2 & ~mask;
|
||||
- val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
|
||||
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
|
||||
- break;
|
||||
- }
|
||||
- if (i != 64)
|
||||
- {
|
||||
- if (generate)
|
||||
+ for (i = 0; i < 64; i += 16)
|
||||
+ if (aarch64_check_bitmask (val, val2, mask << i))
|
||||
+ {
|
||||
+ if (generate)
|
||||
+ {
|
||||
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
+ GEN_INT ((val >> i) & 0xffff)));
|
||||
+ }
|
||||
+ return 2;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
|
||||
+ if (zero_match + one_match == 0)
|
||||
+ {
|
||||
+ for (i = 0; i < 48; i += 16)
|
||||
+ for (int j = i + 16; j < 64; j += 16)
|
||||
+ if (aarch64_check_bitmask (val, val2, (mask << i) | (mask << j)))
|
||||
{
|
||||
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
- GEN_INT ((val >> i) & 0xffff)));
|
||||
+ if (generate)
|
||||
+ {
|
||||
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
+ GEN_INT ((val >> i) & 0xffff)));
|
||||
+ emit_insn (gen_insv_immdi (dest, GEN_INT (j),
|
||||
+ GEN_INT ((val >> j) & 0xffff)));
|
||||
+ }
|
||||
+ return 3;
|
||||
}
|
||||
- return 2;
|
||||
- }
|
||||
}
|
||||
|
||||
/* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
|
||||
@@ -5898,6 +6041,99 @@ aarch64_mov128_immediate (rtx imm)
|
||||
}
|
||||
|
||||
|
||||
+/* Return true if val can be encoded as a 12-bit unsigned immediate with
|
||||
+ a left shift of 0 or 12 bits. */
|
||||
+bool
|
||||
+aarch64_uimm12_shift (HOST_WIDE_INT val)
|
||||
+{
|
||||
+ return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
|
||||
+ || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
|
||||
+ );
|
||||
+}
|
||||
+
|
||||
+/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
|
||||
+ that can be created with a left shift of 0 or 12. */
|
||||
+static HOST_WIDE_INT
|
||||
+aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
|
||||
+{
|
||||
+ /* Check to see if the value fits in 24 bits, as that is the maximum we can
|
||||
+ handle correctly. */
|
||||
+ gcc_assert ((val & 0xffffff) == val);
|
||||
+
|
||||
+ if (((val & 0xfff) << 0) == val)
|
||||
+ return val;
|
||||
+
|
||||
+ return val & (0xfff << 12);
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Test whether:
|
||||
+
|
||||
+ X = (X & AND_VAL) | IOR_VAL;
|
||||
+
|
||||
+ can be implemented using:
|
||||
+
|
||||
+ MOVK X, #(IOR_VAL >> shift), LSL #shift
|
||||
+
|
||||
+ Return the shift if so, otherwise return -1. */
|
||||
+int
|
||||
+aarch64_movk_shift (const wide_int_ref &and_val,
|
||||
+ const wide_int_ref &ior_val)
|
||||
+{
|
||||
+ unsigned int precision = and_val.get_precision ();
|
||||
+ unsigned HOST_WIDE_INT mask = 0xffff;
|
||||
+ for (unsigned int shift = 0; shift < precision; shift += 16)
|
||||
+ {
|
||||
+ if (and_val == ~mask && (ior_val & mask) == ior_val)
|
||||
+ return shift;
|
||||
+ mask <<= 16;
|
||||
+ }
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
|
||||
+ Assumed precondition: VAL_IN Is not zero. */
|
||||
+
|
||||
+unsigned HOST_WIDE_INT
|
||||
+aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
|
||||
+{
|
||||
+ int lowest_bit_set = ctz_hwi (val_in);
|
||||
+ int highest_bit_set = floor_log2 (val_in);
|
||||
+ gcc_assert (val_in != 0);
|
||||
+
|
||||
+ return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
|
||||
+ (HOST_WIDE_INT_1U << lowest_bit_set));
|
||||
+}
|
||||
+
|
||||
+/* Create constant where bits outside of lowest bit set to highest bit set
|
||||
+ are set to 1. */
|
||||
+
|
||||
+unsigned HOST_WIDE_INT
|
||||
+aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
|
||||
+{
|
||||
+ return val_in | ~aarch64_and_split_imm1 (val_in);
|
||||
+}
|
||||
+
|
||||
+/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
|
||||
+
|
||||
+bool
|
||||
+aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
|
||||
+{
|
||||
+ scalar_int_mode int_mode;
|
||||
+ if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
+ return false;
|
||||
+
|
||||
+ if (aarch64_bitmask_imm (val_in, int_mode))
|
||||
+ return false;
|
||||
+
|
||||
+ if (aarch64_move_imm (val_in, int_mode))
|
||||
+ return false;
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
|
||||
+
|
||||
+ return aarch64_bitmask_imm (imm2, int_mode);
|
||||
+}
|
||||
+
|
||||
/* Return the number of temporary registers that aarch64_add_offset_1
|
||||
would need to add OFFSET to a register. */
|
||||
|
||||
@@ -10379,207 +10615,6 @@ aarch64_tls_referenced_p (rtx x)
|
||||
}
|
||||
|
||||
|
||||
-/* Return true if val can be encoded as a 12-bit unsigned immediate with
|
||||
- a left shift of 0 or 12 bits. */
|
||||
-bool
|
||||
-aarch64_uimm12_shift (HOST_WIDE_INT val)
|
||||
-{
|
||||
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
|
||||
- );
|
||||
-}
|
||||
-
|
||||
-/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
|
||||
- that can be created with a left shift of 0 or 12. */
|
||||
-static HOST_WIDE_INT
|
||||
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
|
||||
-{
|
||||
- /* Check to see if the value fits in 24 bits, as that is the maximum we can
|
||||
- handle correctly. */
|
||||
- gcc_assert ((val & 0xffffff) == val);
|
||||
-
|
||||
- if (((val & 0xfff) << 0) == val)
|
||||
- return val;
|
||||
-
|
||||
- return val & (0xfff << 12);
|
||||
-}
|
||||
-
|
||||
-/* Return true if val is an immediate that can be loaded into a
|
||||
- register by a MOVZ instruction. */
|
||||
-static bool
|
||||
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
|
||||
-{
|
||||
- if (GET_MODE_SIZE (mode) > 4)
|
||||
- {
|
||||
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
|
||||
- return 1;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- /* Ignore sign extension. */
|
||||
- val &= (HOST_WIDE_INT) 0xffffffff;
|
||||
- }
|
||||
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
|
||||
-}
|
||||
-
|
||||
-/* Test whether:
|
||||
-
|
||||
- X = (X & AND_VAL) | IOR_VAL;
|
||||
-
|
||||
- can be implemented using:
|
||||
-
|
||||
- MOVK X, #(IOR_VAL >> shift), LSL #shift
|
||||
-
|
||||
- Return the shift if so, otherwise return -1. */
|
||||
-int
|
||||
-aarch64_movk_shift (const wide_int_ref &and_val,
|
||||
- const wide_int_ref &ior_val)
|
||||
-{
|
||||
- unsigned int precision = and_val.get_precision ();
|
||||
- unsigned HOST_WIDE_INT mask = 0xffff;
|
||||
- for (unsigned int shift = 0; shift < precision; shift += 16)
|
||||
- {
|
||||
- if (and_val == ~mask && (ior_val & mask) == ior_val)
|
||||
- return shift;
|
||||
- mask <<= 16;
|
||||
- }
|
||||
- return -1;
|
||||
-}
|
||||
-
|
||||
-/* VAL is a value with the inner mode of MODE. Replicate it to fill a
|
||||
- 64-bit (DImode) integer. */
|
||||
-
|
||||
-static unsigned HOST_WIDE_INT
|
||||
-aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
|
||||
-{
|
||||
- unsigned int size = GET_MODE_UNIT_PRECISION (mode);
|
||||
- while (size < 64)
|
||||
- {
|
||||
- val &= (HOST_WIDE_INT_1U << size) - 1;
|
||||
- val |= val << size;
|
||||
- size *= 2;
|
||||
- }
|
||||
- return val;
|
||||
-}
|
||||
-
|
||||
-/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
|
||||
-
|
||||
-static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
|
||||
- {
|
||||
- 0x0000000100000001ull,
|
||||
- 0x0001000100010001ull,
|
||||
- 0x0101010101010101ull,
|
||||
- 0x1111111111111111ull,
|
||||
- 0x5555555555555555ull,
|
||||
- };
|
||||
-
|
||||
-
|
||||
-/* Return true if val is a valid bitmask immediate. */
|
||||
-
|
||||
-bool
|
||||
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
|
||||
-{
|
||||
- unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
|
||||
- int bits;
|
||||
-
|
||||
- /* Check for a single sequence of one bits and return quickly if so.
|
||||
- The special cases of all ones and all zeroes returns false. */
|
||||
- val = aarch64_replicate_bitmask_imm (val_in, mode);
|
||||
- tmp = val + (val & -val);
|
||||
-
|
||||
- if (tmp == (tmp & -tmp))
|
||||
- return (val + 1) > 1;
|
||||
-
|
||||
- /* Replicate 32-bit immediates so we can treat them as 64-bit. */
|
||||
- if (mode == SImode)
|
||||
- val = (val << 32) | (val & 0xffffffff);
|
||||
-
|
||||
- /* Invert if the immediate doesn't start with a zero bit - this means we
|
||||
- only need to search for sequences of one bits. */
|
||||
- if (val & 1)
|
||||
- val = ~val;
|
||||
-
|
||||
- /* Find the first set bit and set tmp to val with the first sequence of one
|
||||
- bits removed. Return success if there is a single sequence of ones. */
|
||||
- first_one = val & -val;
|
||||
- tmp = val & (val + first_one);
|
||||
-
|
||||
- if (tmp == 0)
|
||||
- return true;
|
||||
-
|
||||
- /* Find the next set bit and compute the difference in bit position. */
|
||||
- next_one = tmp & -tmp;
|
||||
- bits = clz_hwi (first_one) - clz_hwi (next_one);
|
||||
- mask = val ^ tmp;
|
||||
-
|
||||
- /* Check the bit position difference is a power of 2, and that the first
|
||||
- sequence of one bits fits within 'bits' bits. */
|
||||
- if ((mask >> bits) != 0 || bits != (bits & -bits))
|
||||
- return false;
|
||||
-
|
||||
- /* Check the sequence of one bits is repeated 64/bits times. */
|
||||
- return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
|
||||
-}
|
||||
-
|
||||
-/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
|
||||
- Assumed precondition: VAL_IN Is not zero. */
|
||||
-
|
||||
-unsigned HOST_WIDE_INT
|
||||
-aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
|
||||
-{
|
||||
- int lowest_bit_set = ctz_hwi (val_in);
|
||||
- int highest_bit_set = floor_log2 (val_in);
|
||||
- gcc_assert (val_in != 0);
|
||||
-
|
||||
- return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
|
||||
- (HOST_WIDE_INT_1U << lowest_bit_set));
|
||||
-}
|
||||
-
|
||||
-/* Create constant where bits outside of lowest bit set to highest bit set
|
||||
- are set to 1. */
|
||||
-
|
||||
-unsigned HOST_WIDE_INT
|
||||
-aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
|
||||
-{
|
||||
- return val_in | ~aarch64_and_split_imm1 (val_in);
|
||||
-}
|
||||
-
|
||||
-/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
|
||||
-
|
||||
-bool
|
||||
-aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
|
||||
-{
|
||||
- scalar_int_mode int_mode;
|
||||
- if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
- return false;
|
||||
-
|
||||
- if (aarch64_bitmask_imm (val_in, int_mode))
|
||||
- return false;
|
||||
-
|
||||
- if (aarch64_move_imm (val_in, int_mode))
|
||||
- return false;
|
||||
-
|
||||
- unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
|
||||
-
|
||||
- return aarch64_bitmask_imm (imm2, int_mode);
|
||||
-}
|
||||
-
|
||||
-/* Return true if val is an immediate that can be loaded into a
|
||||
- register in a single instruction. */
|
||||
-bool
|
||||
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
|
||||
-{
|
||||
- scalar_int_mode int_mode;
|
||||
- if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
- return false;
|
||||
-
|
||||
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
|
||||
- return 1;
|
||||
- return aarch64_bitmask_imm (val, int_mode);
|
||||
-}
|
||||
-
|
||||
static bool
|
||||
aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c
|
||||
new file mode 100644
|
||||
index 000000000..0f9315808
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c
|
||||
@@ -0,0 +1,41 @@
|
||||
+/* { dg-do assemble } */
|
||||
+/* { dg-options "-O2 --save-temps" } */
|
||||
+
|
||||
+long f1 (void)
|
||||
+{
|
||||
+ return 0x7efefefefefefeff;
|
||||
+}
|
||||
+
|
||||
+long f2 (void)
|
||||
+{
|
||||
+ return 0x12345678aaaaaaaa;
|
||||
+}
|
||||
+
|
||||
+long f3 (void)
|
||||
+{
|
||||
+ return 0x1234cccccccc5678;
|
||||
+}
|
||||
+
|
||||
+long f4 (void)
|
||||
+{
|
||||
+ return 0x7777123456787777;
|
||||
+}
|
||||
+
|
||||
+long f5 (void)
|
||||
+{
|
||||
+ return 0x5555555512345678;
|
||||
+}
|
||||
+
|
||||
+long f6 (void)
|
||||
+{
|
||||
+ return 0x1234bbbb5678bbbb;
|
||||
+}
|
||||
+
|
||||
+long f7 (void)
|
||||
+{
|
||||
+ return 0x4444123444445678;
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */
|
||||
+/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
410
0153-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
Normal file
410
0153-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
Normal file
@ -0,0 +1,410 @@
|
||||
From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001
|
||||
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
|
||||
Date: Mon, 5 Dec 2022 10:49:25 +0000
|
||||
Subject: [PATCH 061/157] [Backport][SME] AArch64: Cleanup move immediate code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1
|
||||
|
||||
Simplify, refactor and improve various move immediate functions.
|
||||
Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special
|
||||
cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
|
||||
pattern only needs a single alternative for move immediate.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
|
||||
(aarch64_is_mov_xn_imm): New function.
|
||||
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
|
||||
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
|
||||
Simplify special cases.
|
||||
(aarch64_uimm12_shift): Simplify code.
|
||||
(aarch64_clamp_to_uimm12_shift): Likewise.
|
||||
(aarch64_movw_imm): Rename to aarch64_is_movz.
|
||||
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
|
||||
aarch64_internal_mov_immediate.
|
||||
(aarch64_rtx_costs): Likewise.
|
||||
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
|
||||
constraints into single 'O'.
|
||||
(mov<mode>_aarch64): Likewise.
|
||||
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
|
||||
(aarch64_bitmask_imm): Likewise.
|
||||
(aarch64_uimm12_shift): Likewise.
|
||||
(aarch64_is_mov_xn_imm): New prototype.
|
||||
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
|
||||
limit 'N' to 64-bit only moves.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-protos.h | 7 +-
|
||||
gcc/config/aarch64/aarch64.cc | 158 ++++++++++++----------------
|
||||
gcc/config/aarch64/aarch64.md | 17 ++-
|
||||
gcc/config/aarch64/constraints.md | 5 +
|
||||
4 files changed, 85 insertions(+), 102 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index 97984f3ab..3ff1a0163 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
|
||||
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
|
||||
int aarch64_get_condition_code (rtx);
|
||||
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
|
||||
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
|
||||
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
|
||||
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
|
||||
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
|
||||
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
|
||||
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT,
|
||||
unsigned HOST_WIDE_INT);
|
||||
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
|
||||
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
|
||||
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
|
||||
machine_mode aarch64_sve_int_mode (machine_mode);
|
||||
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
|
||||
machine_mode aarch64_sve_pred_mode (machine_mode);
|
||||
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
|
||||
bool aarch64_sve_float_mul_immediate_p (rtx);
|
||||
bool aarch64_split_dimode_const_store (rtx, rtx);
|
||||
bool aarch64_symbolic_address_p (rtx);
|
||||
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
|
||||
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
|
||||
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
|
||||
+bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT);
|
||||
bool aarch64_use_return_insn_p (void);
|
||||
const char *aarch64_output_casesi (rtx *);
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index cf7736994..acb659f53 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
|
||||
|
||||
/* Return true if VAL is a valid bitmask immediate for MODE. */
|
||||
bool
|
||||
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
|
||||
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
|
||||
{
|
||||
if (mode == DImode)
|
||||
- return aarch64_bitmask_imm (val_in);
|
||||
-
|
||||
- unsigned HOST_WIDE_INT val = val_in;
|
||||
+ return aarch64_bitmask_imm (val);
|
||||
|
||||
if (mode == SImode)
|
||||
return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
|
||||
@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
|
||||
}
|
||||
|
||||
|
||||
-/* Return true if val is an immediate that can be loaded into a
|
||||
- register by a MOVZ instruction. */
|
||||
-static bool
|
||||
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
|
||||
+/* Return true if VAL is a valid MOVZ immediate. */
|
||||
+static inline bool
|
||||
+aarch64_is_movz (unsigned HOST_WIDE_INT val)
|
||||
{
|
||||
- if (GET_MODE_SIZE (mode) > 4)
|
||||
- {
|
||||
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
|
||||
- return 1;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- /* Ignore sign extension. */
|
||||
- val &= (HOST_WIDE_INT) 0xffffffff;
|
||||
- }
|
||||
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
|
||||
+ return (val >> (ctz_hwi (val) & 48)) < 65536;
|
||||
}
|
||||
|
||||
|
||||
-/* Return true if VAL is an immediate that can be loaded into a
|
||||
- register in a single instruction. */
|
||||
+/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ. */
|
||||
bool
|
||||
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
|
||||
+aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val)
|
||||
{
|
||||
- scalar_int_mode int_mode;
|
||||
- if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
- return false;
|
||||
+ return aarch64_is_movz (val) || aarch64_is_movz (~val)
|
||||
+ || aarch64_bitmask_imm (val);
|
||||
+}
|
||||
|
||||
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
|
||||
- return 1;
|
||||
- return aarch64_bitmask_imm (val, int_mode);
|
||||
+
|
||||
+/* Return true if VAL is an immediate that can be created by a single
|
||||
+ MOV instruction. */
|
||||
+bool
|
||||
+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
|
||||
+{
|
||||
+ gcc_assert (mode == SImode || mode == DImode);
|
||||
+
|
||||
+ if (val < 65536)
|
||||
+ return true;
|
||||
+
|
||||
+ unsigned HOST_WIDE_INT mask =
|
||||
+ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
|
||||
+
|
||||
+ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
|
||||
+ return true;
|
||||
+
|
||||
+ val = (val & mask) | ((val << 32) & ~mask);
|
||||
+ return aarch64_bitmask_imm (val);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
- scalar_int_mode mode)
|
||||
+ machine_mode mode)
|
||||
{
|
||||
int i;
|
||||
unsigned HOST_WIDE_INT val, val2, mask;
|
||||
int one_match, zero_match;
|
||||
int num_insns;
|
||||
|
||||
+ gcc_assert (mode == SImode || mode == DImode);
|
||||
+
|
||||
val = INTVAL (imm);
|
||||
|
||||
if (aarch64_move_imm (val, mode))
|
||||
@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
return 1;
|
||||
}
|
||||
|
||||
- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
|
||||
- (with XXXX non-zero). In that case check to see if the move can be done in
|
||||
- a smaller mode. */
|
||||
- val2 = val & 0xffffffff;
|
||||
- if (mode == DImode
|
||||
- && aarch64_move_imm (val2, SImode)
|
||||
- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
|
||||
- {
|
||||
- if (generate)
|
||||
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
-
|
||||
- /* Check if we have to emit a second instruction by checking to see
|
||||
- if any of the upper 32 bits of the original DI mode value is set. */
|
||||
- if (val == val2)
|
||||
- return 1;
|
||||
-
|
||||
- i = (val >> 48) ? 48 : 32;
|
||||
-
|
||||
- if (generate)
|
||||
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
- GEN_INT ((val >> i) & 0xffff)));
|
||||
-
|
||||
- return 2;
|
||||
- }
|
||||
-
|
||||
if ((val >> 32) == 0 || mode == SImode)
|
||||
{
|
||||
if (generate)
|
||||
@@ -5958,24 +5935,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
|
||||
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
|
||||
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
|
||||
|
||||
+ /* Try a bitmask immediate and a movk to generate the immediate
|
||||
+ in 2 instructions. */
|
||||
+
|
||||
if (zero_match < 2 && one_match < 2)
|
||||
{
|
||||
- /* Try emitting a bitmask immediate with a movk replacing 16 bits.
|
||||
- For a 64-bit bitmask try whether changing 16 bits to all ones or
|
||||
- zeroes creates a valid bitmask. To check any repeated bitmask,
|
||||
- try using 16 bits from the other 32-bit half of val. */
|
||||
-
|
||||
for (i = 0; i < 64; i += 16)
|
||||
- if (aarch64_check_bitmask (val, val2, mask << i))
|
||||
- {
|
||||
- if (generate)
|
||||
- {
|
||||
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
- GEN_INT ((val >> i) & 0xffff)));
|
||||
- }
|
||||
- return 2;
|
||||
- }
|
||||
+ {
|
||||
+ if (aarch64_check_bitmask (val, val2, mask << i))
|
||||
+ break;
|
||||
+
|
||||
+ val2 = val & ~(mask << i);
|
||||
+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
|
||||
+ break;
|
||||
+ }
|
||||
+
|
||||
+ if (i != 64)
|
||||
+ {
|
||||
+ if (generate)
|
||||
+ {
|
||||
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
|
||||
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
|
||||
+ GEN_INT ((val >> i) & 0xffff)));
|
||||
+ }
|
||||
+ return 2;
|
||||
+ }
|
||||
}
|
||||
|
||||
/* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
|
||||
@@ -6044,26 +6028,24 @@ aarch64_mov128_immediate (rtx imm)
|
||||
/* Return true if val can be encoded as a 12-bit unsigned immediate with
|
||||
a left shift of 0 or 12 bits. */
|
||||
bool
|
||||
-aarch64_uimm12_shift (HOST_WIDE_INT val)
|
||||
+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
|
||||
{
|
||||
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
|
||||
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
|
||||
- );
|
||||
+ return val < 4096 || (val & 0xfff000) == val;
|
||||
}
|
||||
|
||||
/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
|
||||
that can be created with a left shift of 0 or 12. */
|
||||
static HOST_WIDE_INT
|
||||
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
|
||||
+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
|
||||
{
|
||||
/* Check to see if the value fits in 24 bits, as that is the maximum we can
|
||||
handle correctly. */
|
||||
- gcc_assert ((val & 0xffffff) == val);
|
||||
+ gcc_assert (val < 0x1000000);
|
||||
|
||||
- if (((val & 0xfff) << 0) == val)
|
||||
+ if (val < 4096)
|
||||
return val;
|
||||
|
||||
- return val & (0xfff << 12);
|
||||
+ return val & 0xfff000;
|
||||
}
|
||||
|
||||
|
||||
@@ -7211,8 +7193,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
||||
return;
|
||||
}
|
||||
|
||||
- aarch64_internal_mov_immediate (dest, imm, true,
|
||||
- as_a <scalar_int_mode> (mode));
|
||||
+ aarch64_internal_mov_immediate (dest, imm, true, mode);
|
||||
}
|
||||
|
||||
/* Return the MEM rtx that provides the canary value that should be used
|
||||
@@ -11410,9 +11391,7 @@ aarch64_float_const_rtx_p (rtx x)
|
||||
&& SCALAR_FLOAT_MODE_P (mode)
|
||||
&& aarch64_reinterpret_float_as_int (x, &ival))
|
||||
{
|
||||
- scalar_int_mode imode = (mode == HFmode
|
||||
- ? SImode
|
||||
- : int_mode_for_mode (mode).require ());
|
||||
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode;
|
||||
int num_instr = aarch64_internal_mov_immediate
|
||||
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
|
||||
return num_instr < 3;
|
||||
@@ -14049,10 +14028,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
|
||||
proportionally expensive to the number of instructions
|
||||
required to build that constant. This is true whether we
|
||||
are compiling for SPEED or otherwise. */
|
||||
- if (!is_a <scalar_int_mode> (mode, &int_mode))
|
||||
- int_mode = word_mode;
|
||||
+ machine_mode imode = known_le (GET_MODE_SIZE (mode), 4)
|
||||
+ ? SImode : DImode;
|
||||
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
|
||||
- (NULL_RTX, x, false, int_mode));
|
||||
+ (NULL_RTX, x, false, imode));
|
||||
}
|
||||
return true;
|
||||
|
||||
@@ -14068,9 +14047,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
|
||||
bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
|
||||
gcc_assert (succeed);
|
||||
|
||||
- scalar_int_mode imode = (mode == HFmode
|
||||
- ? SImode
|
||||
- : int_mode_for_mode (mode).require ());
|
||||
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8)
|
||||
+ ? DImode : SImode;
|
||||
int ncost = aarch64_internal_mov_immediate
|
||||
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
|
||||
*cost += COSTS_N_INSNS (ncost);
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 7454a5c77..ea94152bf 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1288,16 +1288,15 @@
|
||||
)
|
||||
|
||||
(define_insn_and_split "*movdi_aarch64"
|
||||
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
|
||||
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
|
||||
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
|
||||
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
|
||||
"(register_operand (operands[0], DImode)
|
||||
|| aarch64_reg_or_zero (operands[1], DImode))"
|
||||
"@
|
||||
mov\\t%x0, %x1
|
||||
mov\\t%0, %x1
|
||||
mov\\t%x0, %1
|
||||
- mov\\t%x0, %1
|
||||
- mov\\t%w0, %1
|
||||
+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
|
||||
#
|
||||
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
ldr\\t%x0, %1
|
||||
@@ -1319,11 +1318,11 @@
|
||||
DONE;
|
||||
}"
|
||||
;; The "mov_imm" type for CNTD is just a placeholder.
|
||||
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
|
||||
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
|
||||
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
|
||||
fmov,neon_move")
|
||||
- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
|
||||
- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
|
||||
+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
|
||||
+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
|
||||
)
|
||||
|
||||
(define_insn "insv_imm<mode>"
|
||||
@@ -1487,7 +1486,7 @@
|
||||
|
||||
(define_insn "*mov<mode>_aarch64"
|
||||
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
|
||||
- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
|
||||
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
|
||||
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
@@ -1502,7 +1501,7 @@
|
||||
ldr\\t%x0, %1
|
||||
str\\t%x1, %0
|
||||
mov\\t%x0, %x1
|
||||
- mov\\t%x0, %1"
|
||||
+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";"
|
||||
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
|
||||
f_loadd,f_stored,load_8,store_8,mov_reg,\
|
||||
fconstd")
|
||||
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
|
||||
index ee7587cca..750a42fb1 100644
|
||||
--- a/gcc/config/aarch64/constraints.md
|
||||
+++ b/gcc/config/aarch64/constraints.md
|
||||
@@ -106,6 +106,11 @@
|
||||
|
||||
(define_constraint "N"
|
||||
"A constant that can be used with a 64-bit MOV immediate operation."
|
||||
+ (and (match_code "const_int")
|
||||
+ (match_test "aarch64_is_mov_xn_imm (ival)")))
|
||||
+
|
||||
+(define_constraint "O"
|
||||
+ "A constant that can be used with a 32 or 64-bit MOV immediate operation."
|
||||
(and (match_code "const_int")
|
||||
(match_test "aarch64_move_imm (ival, DImode)")))
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
229
0154-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
Normal file
229
0154-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
Normal file
@ -0,0 +1,229 @@
|
||||
From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001
|
||||
From: Tamar Christina <tamar.christina@arm.com>
|
||||
Date: Mon, 19 Jun 2023 15:55:28 +0100
|
||||
Subject: [PATCH 062/157] [Backport][SME] AArch64: convert some patterns to
|
||||
compact MD syntax
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113
|
||||
|
||||
Hi All,
|
||||
|
||||
This converts some patterns in the AArch64 backend to use the new
|
||||
compact syntax.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64.md (arches): Add nosimd.
|
||||
(*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to
|
||||
compact syntax.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------
|
||||
1 file changed, 78 insertions(+), 83 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index ea94152bf..5d02da42f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -378,7 +378,7 @@
|
||||
;; As a convenience, "fp_q" means "fp" + the ability to move between
|
||||
;; Q registers and is equivalent to "simd".
|
||||
|
||||
-(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
|
||||
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16])
|
||||
|
||||
(define_enum_attr "arch" "arches" (const_string "any"))
|
||||
|
||||
@@ -409,6 +409,9 @@
|
||||
(and (eq_attr "arch" "fp_q, simd")
|
||||
(match_test "TARGET_SIMD"))
|
||||
|
||||
+ (and (eq_attr "arch" "nosimd")
|
||||
+ (match_test "!TARGET_SIMD"))
|
||||
+
|
||||
(and (eq_attr "arch" "fp16")
|
||||
(match_test "TARGET_FP_F16INST"))
|
||||
|
||||
@@ -1194,26 +1197,27 @@
|
||||
)
|
||||
|
||||
(define_insn "*mov<mode>_aarch64"
|
||||
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w")
|
||||
- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
|
||||
+ [(set (match_operand:SHORT 0 "nonimmediate_operand")
|
||||
+ (match_operand:SHORT 1 "aarch64_mov_operand"))]
|
||||
"(register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
|
||||
- "@
|
||||
- mov\t%w0, %w1
|
||||
- mov\t%w0, %1
|
||||
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||||
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
- ldr<size>\t%w0, %1
|
||||
- ldr\t%<size>0, %1
|
||||
- str<size>\t%w1, %0
|
||||
- str\t%<size>1, %0
|
||||
- * return TARGET_SIMD ? \"umov\t%w0, %1.<v>[0]\" : \"fmov\t%w0, %s1\";
|
||||
- * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\";
|
||||
- * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>[0]\" : \"fmov\t%s0, %s1\";"
|
||||
- ;; The "mov_imm" type for CNT is just a placeholder.
|
||||
- [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
|
||||
- store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
|
||||
- (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
|
||||
+ {@ [cons: =0, 1; attrs: type, arch]
|
||||
+ [r, r ; mov_reg , * ] mov\t%w0, %w1
|
||||
+ [r, M ; mov_imm , * ] mov\t%w0, %1
|
||||
+ [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||||
+ /* The "mov_imm" type for CNT is just a placeholder. */
|
||||
+ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r, m ; load_4 , * ] ldr<size>\t%w0, %1
|
||||
+ [w, m ; load_4 , * ] ldr\t%<size>0, %1
|
||||
+ [m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||||
+ [m, w ; store_4 , * ] str\t%<size>1, %0
|
||||
+ [r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0]
|
||||
+ [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1 /*foo */
|
||||
+ [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
|
||||
+ [w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
|
||||
+ [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
|
||||
+ [w, w ; neon_dup , nosimd] fmov\t%s0, %s1
|
||||
+ }
|
||||
)
|
||||
|
||||
(define_expand "mov<mode>"
|
||||
@@ -1250,79 +1254,70 @@
|
||||
)
|
||||
|
||||
(define_insn_and_split "*movsi_aarch64"
|
||||
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w")
|
||||
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
|
||||
+ [(set (match_operand:SI 0 "nonimmediate_operand")
|
||||
+ (match_operand:SI 1 "aarch64_mov_operand"))]
|
||||
"(register_operand (operands[0], SImode)
|
||||
|| aarch64_reg_or_zero (operands[1], SImode))"
|
||||
- "@
|
||||
- mov\\t%w0, %w1
|
||||
- mov\\t%w0, %w1
|
||||
- mov\\t%w0, %w1
|
||||
- mov\\t%w0, %1
|
||||
- #
|
||||
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
- ldr\\t%w0, %1
|
||||
- ldr\\t%s0, %1
|
||||
- str\\t%w1, %0
|
||||
- str\\t%s1, %0
|
||||
- adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
|
||||
- adr\\t%x0, %c1
|
||||
- adrp\\t%x0, %A1
|
||||
- fmov\\t%s0, %w1
|
||||
- fmov\\t%w0, %s1
|
||||
- fmov\\t%s0, %s1
|
||||
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
|
||||
+ {@ [cons: =0, 1; attrs: type, arch, length]
|
||||
+ [r k, r ; mov_reg , * , 4] mov\t%w0, %w1
|
||||
+ [r , k ; mov_reg , * , 4] ^
|
||||
+ [r , M ; mov_imm , * , 4] mov\t%w0, %1
|
||||
+ [r , n ; mov_imm , * ,16] #
|
||||
+ /* The "mov_imm" type for CNT is just a placeholder. */
|
||||
+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r , m ; load_4 , * , 4] ldr\t%w0, %1
|
||||
+ [w , m ; load_4 , fp , 4] ldr\t%s0, %1
|
||||
+ [m , r Z; store_4 , * , 4] str\t%w1, %0
|
||||
+ [m , w ; store_4 , fp , 4] str\t%s1, %0
|
||||
+ [r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
|
||||
+ [r , Usa; adr , * , 4] adr\t%x0, %c1
|
||||
+ [r , Ush; adr , * , 4] adrp\t%x0, %A1
|
||||
+ [w , r Z; f_mcr , fp , 4] fmov\t%s0, %w1
|
||||
+ [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1
|
||||
+ [w , w ; fmov , fp , 4] fmov\t%s0, %s1
|
||||
+ [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
|
||||
+ }
|
||||
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
|
||||
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
||||
- [(const_int 0)]
|
||||
- "{
|
||||
- aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
- DONE;
|
||||
- }"
|
||||
- ;; The "mov_imm" type for CNT is just a placeholder.
|
||||
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
|
||||
- load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
|
||||
- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
|
||||
- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
|
||||
-]
|
||||
+ [(const_int 0)]
|
||||
+ {
|
||||
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
+ DONE;
|
||||
+ }
|
||||
)
|
||||
|
||||
(define_insn_and_split "*movdi_aarch64"
|
||||
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
|
||||
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
|
||||
+ [(set (match_operand:DI 0 "nonimmediate_operand")
|
||||
+ (match_operand:DI 1 "aarch64_mov_operand"))]
|
||||
"(register_operand (operands[0], DImode)
|
||||
|| aarch64_reg_or_zero (operands[1], DImode))"
|
||||
- "@
|
||||
- mov\\t%x0, %x1
|
||||
- mov\\t%0, %x1
|
||||
- mov\\t%x0, %1
|
||||
- * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
|
||||
- #
|
||||
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
|
||||
- ldr\\t%x0, %1
|
||||
- ldr\\t%d0, %1
|
||||
- str\\t%x1, %0
|
||||
- str\\t%d1, %0
|
||||
- * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
|
||||
- adr\\t%x0, %c1
|
||||
- adrp\\t%x0, %A1
|
||||
- fmov\\t%d0, %x1
|
||||
- fmov\\t%x0, %d1
|
||||
- fmov\\t%d0, %d1
|
||||
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
|
||||
- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
|
||||
- && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
||||
- [(const_int 0)]
|
||||
- "{
|
||||
- aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
- DONE;
|
||||
- }"
|
||||
- ;; The "mov_imm" type for CNTD is just a placeholder.
|
||||
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
|
||||
- load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
|
||||
- fmov,neon_move")
|
||||
- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
|
||||
- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
|
||||
+ {@ [cons: =0, 1; attrs: type, arch, length]
|
||||
+ [r, r ; mov_reg , * , 4] mov\t%x0, %x1
|
||||
+ [k, r ; mov_reg , * , 4] mov\t%0, %x1
|
||||
+ [r, k ; mov_reg , * , 4] mov\t%x0, %1
|
||||
+ [r, O ; mov_imm , * , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? "mov\t%x0, %1" : "mov\t%w0, %1";
|
||||
+ [r, n ; mov_imm , * ,16] #
|
||||
+ /* The "mov_imm" type for CNT is just a placeholder. */
|
||||
+ [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r, m ; load_8 , * , 4] ldr\t%x0, %1
|
||||
+ [w, m ; load_8 , fp , 4] ldr\t%d0, %1
|
||||
+ [m, r Z; store_8 , * , 4] str\t%x1, %0
|
||||
+ [m, w ; store_8 , fp , 4] str\t%d1, %0
|
||||
+ [r, Usw; load_8 , * , 8] << TARGET_ILP32 ? "adrp\t%0, %A1;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1;ldr\t%0, [%0, %L1]";
|
||||
+ [r, Usa; adr , * , 4] adr\t%x0, %c1
|
||||
+ [r, Ush; adr , * , 4] adrp\t%x0, %A1
|
||||
+ [w, r Z; f_mcr , fp , 4] fmov\t%d0, %x1
|
||||
+ [r, w ; f_mrc , fp , 4] fmov\t%x0, %d1
|
||||
+ [w, w ; fmov , fp , 4] fmov\t%d0, %d1
|
||||
+ [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
|
||||
+ }
|
||||
+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
|
||||
+ && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
|
||||
+ [(const_int 0)]
|
||||
+ {
|
||||
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
|
||||
+ DONE;
|
||||
+ }
|
||||
)
|
||||
|
||||
(define_insn "insv_imm<mode>"
|
||||
--
|
||||
2.33.0
|
||||
|
||||
792
0155-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
Normal file
792
0155-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
Normal file
@ -0,0 +1,792 @@
|
||||
From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:18 +0000
|
||||
Subject: [PATCH 063/157] [Backport][SME] aarch64: Use SVE's RDVL instruction
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0
|
||||
|
||||
We didn't previously use SVE's RDVL instruction, since the CNT*
|
||||
forms are preferred and provide most of the range. However,
|
||||
there are some cases that RDVL can handle and CNT* can't,
|
||||
and using RDVL-like instructions becomes important for SME.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p)
|
||||
(aarch64_output_sve_rdvl): Declare.
|
||||
* config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New
|
||||
function, split out from...
|
||||
(aarch64_sve_cnt_immediate_p): ...here.
|
||||
(aarch64_sve_rdvl_factor_p): New function.
|
||||
(aarch64_sve_rdvl_immediate_p): Likewise.
|
||||
(aarch64_output_sve_rdvl): Likewise.
|
||||
(aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL
|
||||
for some cases.
|
||||
(aarch64_expand_mov_immediate): Handle RDVL immediates.
|
||||
(aarch64_mov_operand_p): Likewise.
|
||||
* config/aarch64/constraints.md (Usr): New constraint.
|
||||
* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL
|
||||
alternative.
|
||||
(*movsi_aarch64, *movdi_aarch64): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output.
|
||||
* gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise.
|
||||
* gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used
|
||||
to calculate the -17 and 17 factors.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-protos.h | 2 +
|
||||
gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------
|
||||
gcc/config/aarch64/aarch64.md | 3 +
|
||||
gcc/config/aarch64/constraints.md | 6 +
|
||||
.../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++--
|
||||
.../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +-
|
||||
.../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +-
|
||||
.../gcc.target/aarch64/sve/loop_add_4.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_1.c | 3 +-
|
||||
14 files changed, 225 insertions(+), 123 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index 3ff1a0163..14a568140 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode);
|
||||
HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
|
||||
bool aarch64_sve_cnt_immediate_p (rtx);
|
||||
bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
|
||||
+bool aarch64_sve_rdvl_immediate_p (rtx);
|
||||
bool aarch64_sve_addvl_addpl_immediate_p (rtx);
|
||||
bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
|
||||
int aarch64_add_offset_temporaries (rtx);
|
||||
@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
|
||||
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
|
||||
char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
|
||||
char *aarch64_output_sve_scalar_inc_dec (rtx);
|
||||
+char *aarch64_output_sve_rdvl (rtx);
|
||||
char *aarch64_output_sve_addvl_addpl (rtx);
|
||||
char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
|
||||
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index acb659f53..4194dfc70 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
|
||||
return -1;
|
||||
}
|
||||
|
||||
+/* Return true if a single CNT[BHWD] instruction can multiply FACTOR
|
||||
+ by the number of 128-bit quadwords in an SVE vector. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor)
|
||||
+{
|
||||
+ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
|
||||
+ return (IN_RANGE (factor, 2, 16 * 16)
|
||||
+ && (factor & 1) == 0
|
||||
+ && factor <= 16 * (factor & -factor));
|
||||
+}
|
||||
+
|
||||
/* Return true if we can move VALUE into a register using a single
|
||||
CNT[BHWD] instruction. */
|
||||
|
||||
@@ -5527,11 +5539,7 @@ static bool
|
||||
aarch64_sve_cnt_immediate_p (poly_int64 value)
|
||||
{
|
||||
HOST_WIDE_INT factor = value.coeffs[0];
|
||||
- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
|
||||
- return (value.coeffs[1] == factor
|
||||
- && IN_RANGE (factor, 2, 16 * 16)
|
||||
- && (factor & 1) == 0
|
||||
- && factor <= 16 * (factor & -factor));
|
||||
+ return value.coeffs[1] == factor && aarch64_sve_cnt_factor_p (factor);
|
||||
}
|
||||
|
||||
/* Likewise for rtx X. */
|
||||
@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset)
|
||||
-offset_value.coeffs[1], 0);
|
||||
}
|
||||
|
||||
+/* Return true if a single RDVL instruction can multiply FACTOR by the
|
||||
+ number of 128-bit quadwords in an SVE vector. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor)
|
||||
+{
|
||||
+ return (multiple_p (factor, 16)
|
||||
+ && IN_RANGE (factor, -32 * 16, 31 * 16));
|
||||
+}
|
||||
+
|
||||
+/* Return true if we can move VALUE into a register using a single
|
||||
+ RDVL instruction. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_sve_rdvl_immediate_p (poly_int64 value)
|
||||
+{
|
||||
+ HOST_WIDE_INT factor = value.coeffs[0];
|
||||
+ return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor);
|
||||
+}
|
||||
+
|
||||
+/* Likewise for rtx X. */
|
||||
+
|
||||
+bool
|
||||
+aarch64_sve_rdvl_immediate_p (rtx x)
|
||||
+{
|
||||
+ poly_int64 value;
|
||||
+ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value);
|
||||
+}
|
||||
+
|
||||
+/* Return the asm string for moving RDVL immediate OFFSET into register
|
||||
+ operand 0. */
|
||||
+
|
||||
+char *
|
||||
+aarch64_output_sve_rdvl (rtx offset)
|
||||
+{
|
||||
+ static char buffer[sizeof ("rdvl\t%x0, #-") + 3 * sizeof (int)];
|
||||
+ poly_int64 offset_value = rtx_to_poly_int64 (offset);
|
||||
+ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value));
|
||||
+
|
||||
+ int factor = offset_value.coeffs[1];
|
||||
+ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16);
|
||||
+ return buffer;
|
||||
+}
|
||||
+
|
||||
/* Return true if we can add VALUE to a register using a single ADDVL
|
||||
or ADDPL instruction. */
|
||||
|
||||
@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset)
|
||||
count += 1;
|
||||
else if (factor != 0)
|
||||
{
|
||||
- factor = abs (factor);
|
||||
- if (factor > 16 * (factor & -factor))
|
||||
- /* Need one register for the CNT result and one for the multiplication
|
||||
- factor. If necessary, the second temporary can be reused for the
|
||||
- constant part of the offset. */
|
||||
+ factor /= (HOST_WIDE_INT) least_bit_hwi (factor);
|
||||
+ if (!IN_RANGE (factor, -32, 31))
|
||||
+ /* Need one register for the CNT or RDVL result and one for the
|
||||
+ multiplication factor. If necessary, the second temporary
|
||||
+ can be reused for the constant part of the offset. */
|
||||
return 2;
|
||||
- /* Need one register for the CNT result (which might then
|
||||
+ /* Need one register for the CNT or RDVL result (which might then
|
||||
be shifted). */
|
||||
count += 1;
|
||||
}
|
||||
@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
|
||||
/* Otherwise use a CNT-based sequence. */
|
||||
else if (factor != 0)
|
||||
{
|
||||
- /* Use a subtraction if we have a negative factor. */
|
||||
- rtx_code code = PLUS;
|
||||
- if (factor < 0)
|
||||
- {
|
||||
- factor = -factor;
|
||||
- code = MINUS;
|
||||
- }
|
||||
+ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT,
|
||||
+ with negative shifts indicating a shift right. */
|
||||
+ HOST_WIDE_INT low_bit = least_bit_hwi (factor);
|
||||
+ HOST_WIDE_INT rel_factor = factor / low_bit;
|
||||
+ int shift = exact_log2 (low_bit) - 4;
|
||||
+ gcc_assert (shift >= -4 && (rel_factor & 1) != 0);
|
||||
+
|
||||
+ /* Set CODE, VAL and SHIFT so that [+-] VAL * 2**SHIFT is
|
||||
+ equal to CNTB * FACTOR / 16, with CODE being the [+-].
|
||||
|
||||
- /* Calculate CNTD * FACTOR / 2. First try to fold the division
|
||||
- into the multiplication. */
|
||||
+ We can avoid a multiplication if REL_FACTOR is in the range
|
||||
+ of RDVL, although there are then various optimizations that
|
||||
+ we can try on top. */
|
||||
+ rtx_code code = PLUS;
|
||||
rtx val;
|
||||
- int shift = 0;
|
||||
- if (factor & 1)
|
||||
- /* Use a right shift by 1. */
|
||||
- shift = -1;
|
||||
- else
|
||||
- factor /= 2;
|
||||
- HOST_WIDE_INT low_bit = factor & -factor;
|
||||
- if (factor <= 16 * low_bit)
|
||||
+ if (IN_RANGE (rel_factor, -32, 31))
|
||||
{
|
||||
- if (factor > 16 * 8)
|
||||
+ /* Try to use an unshifted CNT[BHWD] or RDVL. */
|
||||
+ if (aarch64_sve_cnt_factor_p (factor)
|
||||
+ || aarch64_sve_rdvl_factor_p (factor))
|
||||
+ {
|
||||
+ val = gen_int_mode (poly_int64 (factor, factor), mode);
|
||||
+ shift = 0;
|
||||
+ }
|
||||
+ /* Try to subtract an unshifted CNT[BHWD]. */
|
||||
+ else if (aarch64_sve_cnt_factor_p (-factor))
|
||||
{
|
||||
- /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
|
||||
- the value with the minimum multiplier and shift it into
|
||||
- position. */
|
||||
- int extra_shift = exact_log2 (low_bit);
|
||||
- shift += extra_shift;
|
||||
- factor >>= extra_shift;
|
||||
+ code = MINUS;
|
||||
+ val = gen_int_mode (poly_int64 (-factor, -factor), mode);
|
||||
+ shift = 0;
|
||||
}
|
||||
- val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
|
||||
+ /* If subtraction is free, prefer to load a positive constant.
|
||||
+ In the best case this will fit a shifted CNTB. */
|
||||
+ else if (src != const0_rtx && rel_factor < 0)
|
||||
+ {
|
||||
+ code = MINUS;
|
||||
+ val = gen_int_mode (-rel_factor * BYTES_PER_SVE_VECTOR, mode);
|
||||
+ }
|
||||
+ /* Otherwise use a shifted RDVL or CNT[BHWD]. */
|
||||
+ else
|
||||
+ val = gen_int_mode (rel_factor * BYTES_PER_SVE_VECTOR, mode);
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* Base the factor on LOW_BIT if we can calculate LOW_BIT
|
||||
- directly, since that should increase the chances of being
|
||||
- able to use a shift and add sequence. If LOW_BIT itself
|
||||
- is out of range, just use CNTD. */
|
||||
- if (low_bit <= 16 * 8)
|
||||
- factor /= low_bit;
|
||||
+ /* If we can calculate CNTB << SHIFT directly, prefer to do that,
|
||||
+ since it should increase the chances of being able to use
|
||||
+ a shift and add sequence for the multiplication.
|
||||
+ If CNTB << SHIFT is out of range, stick with the current
|
||||
+ shift factor. */
|
||||
+ if (IN_RANGE (low_bit, 2, 16 * 16))
|
||||
+ {
|
||||
+ val = gen_int_mode (poly_int64 (low_bit, low_bit), mode);
|
||||
+ shift = 0;
|
||||
+ }
|
||||
else
|
||||
- low_bit = 1;
|
||||
+ val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode);
|
||||
|
||||
- val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
|
||||
val = aarch64_force_temporary (mode, temp1, val);
|
||||
|
||||
+ /* Prefer to multiply by a positive factor and subtract rather
|
||||
+ than multiply by a negative factor and add, since positive
|
||||
+ values are usually easier to move. */
|
||||
+ if (rel_factor < 0 && src != const0_rtx)
|
||||
+ {
|
||||
+ rel_factor = -rel_factor;
|
||||
+ code = MINUS;
|
||||
+ }
|
||||
+
|
||||
if (can_create_pseudo_p ())
|
||||
{
|
||||
- rtx coeff1 = gen_int_mode (factor, mode);
|
||||
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
|
||||
val = expand_mult (mode, val, coeff1, NULL_RTX, true, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* Go back to using a negative multiplication factor if we have
|
||||
- no register from which to subtract. */
|
||||
- if (code == MINUS && src == const0_rtx)
|
||||
- {
|
||||
- factor = -factor;
|
||||
- code = PLUS;
|
||||
- }
|
||||
- rtx coeff1 = gen_int_mode (factor, mode);
|
||||
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
|
||||
coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
|
||||
val = gen_rtx_MULT (mode, val, coeff1);
|
||||
}
|
||||
}
|
||||
|
||||
+ /* Multiply by 2 ** SHIFT. */
|
||||
if (shift > 0)
|
||||
{
|
||||
- /* Multiply by 1 << SHIFT. */
|
||||
val = aarch64_force_temporary (mode, temp1, val);
|
||||
val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
|
||||
}
|
||||
- else if (shift == -1)
|
||||
+ else if (shift < 0)
|
||||
{
|
||||
- /* Divide by 2. */
|
||||
val = aarch64_force_temporary (mode, temp1, val);
|
||||
- val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
|
||||
+ val = gen_rtx_ASHIFTRT (mode, val, GEN_INT (-shift));
|
||||
}
|
||||
|
||||
- /* Calculate SRC +/- CNTD * FACTOR / 2. */
|
||||
+ /* Add the result to SRC or subtract the result from SRC. */
|
||||
if (src != const0_rtx)
|
||||
{
|
||||
val = aarch64_force_temporary (mode, temp1, val);
|
||||
@@ -7045,7 +7112,9 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
|
||||
aarch64_report_sve_required ();
|
||||
return;
|
||||
}
|
||||
- if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
|
||||
+ if (base == const0_rtx
|
||||
+ && (aarch64_sve_cnt_immediate_p (offset)
|
||||
+ || aarch64_sve_rdvl_immediate_p (offset)))
|
||||
emit_insn (gen_rtx_SET (dest, imm));
|
||||
else
|
||||
{
|
||||
@@ -21751,7 +21820,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
|
||||
if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
|
||||
return true;
|
||||
|
||||
- if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
|
||||
+ if (TARGET_SVE
|
||||
+ && (aarch64_sve_cnt_immediate_p (x)
|
||||
+ || aarch64_sve_rdvl_immediate_p (x)))
|
||||
return true;
|
||||
|
||||
return aarch64_classify_symbolic_expression (x)
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 5d02da42f..c0977a3da 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1207,6 +1207,7 @@
|
||||
[w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
|
||||
/* The "mov_imm" type for CNT is just a placeholder. */
|
||||
[r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]);
|
||||
[r, m ; load_4 , * ] ldr<size>\t%w0, %1
|
||||
[w, m ; load_4 , * ] ldr\t%<size>0, %1
|
||||
[m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||||
@@ -1265,6 +1266,7 @@
|
||||
[r , n ; mov_imm , * ,16] #
|
||||
/* The "mov_imm" type for CNT is just a placeholder. */
|
||||
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
||||
[r , m ; load_4 , * , 4] ldr\t%w0, %1
|
||||
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
|
||||
[m , r Z; store_4 , * , 4] str\t%w1, %0
|
||||
@@ -1299,6 +1301,7 @@
|
||||
[r, n ; mov_imm , * ,16] #
|
||||
/* The "mov_imm" type for CNT is just a placeholder. */
|
||||
[r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
|
||||
+ [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
|
||||
[r, m ; load_8 , * , 4] ldr\t%x0, %1
|
||||
[w, m ; load_8 , fp , 4] ldr\t%d0, %1
|
||||
[m, r Z; store_8 , * , 4] str\t%x1, %0
|
||||
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
|
||||
index 750a42fb1..212a73416 100644
|
||||
--- a/gcc/config/aarch64/constraints.md
|
||||
+++ b/gcc/config/aarch64/constraints.md
|
||||
@@ -214,6 +214,12 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "aarch64_high_bits_all_ones_p (ival)")))
|
||||
|
||||
+(define_constraint "Usr"
|
||||
+ "@internal
|
||||
+ A constraint that matches a value produced by RDVL."
|
||||
+ (and (match_code "const_poly_int")
|
||||
+ (match_test "aarch64_sve_rdvl_immediate_p (op)")))
|
||||
+
|
||||
(define_constraint "Usv"
|
||||
"@internal
|
||||
A constraint that matches a VG-based constant that can be loaded by
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||||
index 8b8fe8e4f..a22d8a28d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
|
||||
@@ -51,19 +51,24 @@ PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; }
|
||||
*/
|
||||
PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; }
|
||||
|
||||
-/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntb_17:
|
||||
-** cntb x0, all, mul #16
|
||||
-** incb x0
|
||||
+** rdvl x0, #17
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; }
|
||||
|
||||
+/*
|
||||
+** cntb_31:
|
||||
+** rdvl x0, #31
|
||||
+** ret
|
||||
+*/
|
||||
+PROTO (cntb_31, uint64_t, ()) { return svcntb () * 31; }
|
||||
+
|
||||
/*
|
||||
** cntb_32:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 5
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; }
|
||||
@@ -80,16 +85,16 @@ PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; }
|
||||
|
||||
/*
|
||||
** cntb_64:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 9
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 6
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; }
|
||||
|
||||
/*
|
||||
** cntb_128:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 10
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 7
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; }
|
||||
@@ -106,46 +111,70 @@ PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; }
|
||||
|
||||
/*
|
||||
** cntb_m1:
|
||||
-** cntb (x[0-9]+)
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-1
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); }
|
||||
|
||||
/*
|
||||
** cntb_m13:
|
||||
-** cntb (x[0-9]+), all, mul #13
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-13
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; }
|
||||
|
||||
/*
|
||||
** cntb_m15:
|
||||
-** cntb (x[0-9]+), all, mul #15
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-15
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; }
|
||||
|
||||
/*
|
||||
** cntb_m16:
|
||||
-** cntb (x[0-9]+), all, mul #16
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-16
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; }
|
||||
|
||||
-/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntb_m17:
|
||||
-** cntb x0, all, mul #16
|
||||
-** incb x0
|
||||
-** neg x0, x0
|
||||
+** rdvl x0, #-17
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; }
|
||||
|
||||
+/*
|
||||
+** cntb_m32:
|
||||
+** rdvl x0, #-32
|
||||
+** ret
|
||||
+*/
|
||||
+PROTO (cntb_m32, uint64_t, ()) { return -svcntb () * 32; }
|
||||
+
|
||||
+/*
|
||||
+** cntb_m33:
|
||||
+** rdvl x0, #-32
|
||||
+** decb x0
|
||||
+** ret
|
||||
+*/
|
||||
+PROTO (cntb_m33, uint64_t, ()) { return -svcntb () * 33; }
|
||||
+
|
||||
+/*
|
||||
+** cntb_m34:
|
||||
+** rdvl (x[0-9]+), #-17
|
||||
+** lsl x0, \1, #?1
|
||||
+** ret
|
||||
+*/
|
||||
+PROTO (cntb_m34, uint64_t, ()) { return -svcntb () * 34; }
|
||||
+
|
||||
+/*
|
||||
+** cntb_m64:
|
||||
+** rdvl (x[0-9]+), #-1
|
||||
+** lsl x0, \1, #?6
|
||||
+** ret
|
||||
+*/
|
||||
+PROTO (cntb_m64, uint64_t, ()) { return -svcntb () * 64; }
|
||||
+
|
||||
/*
|
||||
** incb_1:
|
||||
** incb x0
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||||
index 0d0ed4849..090a643b4 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
|
||||
@@ -54,8 +54,8 @@ PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntd_17:
|
||||
-** cntb x0, all, mul #2
|
||||
-** incd x0
|
||||
+** rdvl (x[0-9]+), #17
|
||||
+** asr x0, \1, 3
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; }
|
||||
@@ -107,8 +107,7 @@ PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; }
|
||||
|
||||
/*
|
||||
** cntd_m16:
|
||||
-** cntb (x[0-9]+), all, mul #2
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-2
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
|
||||
@@ -116,9 +115,8 @@ PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntd_m17:
|
||||
-** cntb x0, all, mul #2
|
||||
-** incd x0
|
||||
-** neg x0, x0
|
||||
+** rdvl (x[0-9]+), #-17
|
||||
+** asr x0, \1, 3
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; }
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||||
index c29930f15..1a4e7dc0e 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
|
||||
@@ -54,8 +54,8 @@ PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cnth_17:
|
||||
-** cntb x0, all, mul #8
|
||||
-** inch x0
|
||||
+** rdvl (x[0-9]+), #17
|
||||
+** asr x0, \1, 1
|
||||
** ret
|
||||
*/
|
||||
PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; }
|
||||
@@ -69,16 +69,16 @@ PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; }
|
||||
|
||||
/*
|
||||
** cnth_64:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 5
|
||||
** ret
|
||||
*/
|
||||
PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; }
|
||||
|
||||
/*
|
||||
** cnth_128:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 9
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 6
|
||||
** ret
|
||||
*/
|
||||
PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; }
|
||||
@@ -109,8 +109,7 @@ PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; }
|
||||
|
||||
/*
|
||||
** cnth_m16:
|
||||
-** cntb (x[0-9]+), all, mul #8
|
||||
-** neg x0, \1
|
||||
+** rdvl x0, #-8
|
||||
** ret
|
||||
*/
|
||||
PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
|
||||
@@ -118,9 +117,8 @@ PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cnth_m17:
|
||||
-** cntb x0, all, mul #8
|
||||
-** inch x0
|
||||
-** neg x0, x0
|
||||
+** rdvl (x[0-9]+), #-17
|
||||
+** asr x0, \1, 1
|
||||
** ret
|
||||
*/
|
||||
PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; }
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||||
index e26cc67a4..9d1697690 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
|
||||
@@ -54,8 +54,8 @@ PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntw_17:
|
||||
-** cntb x0, all, mul #4
|
||||
-** incw x0
|
||||
+** rdvl (x[0-9]+), #17
|
||||
+** asr x0, \1, 2
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; }
|
||||
@@ -76,8 +76,8 @@ PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; }
|
||||
|
||||
/*
|
||||
** cntw_128:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl x0, \1, 8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl x0, \1, 5
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; }
|
||||
@@ -108,8 +108,7 @@ PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; }
|
||||
|
||||
/*
|
||||
** cntw_m16:
|
||||
-** cntb (x[0-9]+), all, mul #4
|
||||
-** neg x0, \1
|
||||
+** rdvl (x[0-9]+), #-4
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
|
||||
@@ -117,9 +116,8 @@ PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
|
||||
/* Other sequences would be OK. */
|
||||
/*
|
||||
** cntw_m17:
|
||||
-** cntb x0, all, mul #4
|
||||
-** incw x0
|
||||
-** neg x0, x0
|
||||
+** rdvl (x[0-9]+), #-17
|
||||
+** asr x0, \1, 2
|
||||
** ret
|
||||
*/
|
||||
PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; }
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||||
index c90730a03..94cd3a066 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
|
||||
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfb_vnum_31, uint16_t,
|
||||
|
||||
/*
|
||||
** prfb_vnum_32:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl (x[0-9]+), \1, #?8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl (x[0-9]+), \1, #?5
|
||||
** add (x[0-9]+), (\2, x0|x0, \2)
|
||||
** prfb pldl1keep, p0, \[\3\]
|
||||
** ret
|
||||
@@ -240,7 +240,7 @@ TEST_PREFETCH (prfb_vnum_m32, uint16_t,
|
||||
/*
|
||||
** prfb_vnum_m33:
|
||||
** ...
|
||||
-** prfb pldl1keep, p0, \[x[0-9]+\]
|
||||
+** prfb pldl1keep, p0, \[x[0-9]+(, x[0-9]+)?\]
|
||||
** ret
|
||||
*/
|
||||
TEST_PREFETCH (prfb_vnum_m33, uint16_t,
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||||
index 869ef3d3e..b7a116cf0 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
|
||||
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfd_vnum_31, uint16_t,
|
||||
|
||||
/*
|
||||
** prfd_vnum_32:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl (x[0-9]+), \1, #?8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl (x[0-9]+), \1, #?5
|
||||
** add (x[0-9]+), (\2, x0|x0, \2)
|
||||
** prfd pldl1keep, p0, \[\3\]
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||||
index 45a735eae..9d3df6bd3 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
|
||||
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfh_vnum_31, uint16_t,
|
||||
|
||||
/*
|
||||
** prfh_vnum_32:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl (x[0-9]+), \1, #?8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl (x[0-9]+), \1, #?5
|
||||
** add (x[0-9]+), (\2, x0|x0, \2)
|
||||
** prfh pldl1keep, p0, \[\3\]
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||||
index 444187f45..6962abab6 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
|
||||
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfw_vnum_31, uint16_t,
|
||||
|
||||
/*
|
||||
** prfw_vnum_32:
|
||||
-** cntd (x[0-9]+)
|
||||
-** lsl (x[0-9]+), \1, #?8
|
||||
+** cntb (x[0-9]+)
|
||||
+** lsl (x[0-9]+), \1, #?5
|
||||
** add (x[0-9]+), (\2, x0|x0, \2)
|
||||
** prfw pldl1keep, p0, \[\3\]
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||||
index 9ead9c21b..7f02497e8 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
|
||||
@@ -68,8 +68,7 @@ TEST_ALL (LOOP)
|
||||
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
|
||||
-/* 2 for the calculations of -17 and 17. */
|
||||
-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */
|
||||
+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */
|
||||
@@ -86,8 +85,7 @@ TEST_ALL (LOOP)
|
||||
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
|
||||
-/* 2 for the calculations of -17 and 17. */
|
||||
-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */
|
||||
+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||||
index 110947a6c..5de34fc61 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
|
||||
@@ -6,8 +6,7 @@
|
||||
|
||||
/*
|
||||
** test_1:
|
||||
-** cntd x12, all, mul #9
|
||||
-** lsl x12, x12, #?4
|
||||
+** rdvl x12, #18
|
||||
** mov x11, sp
|
||||
** ...
|
||||
** sub sp, sp, x12
|
||||
--
|
||||
2.33.0
|
||||
|
||||
137
0156-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
Normal file
137
0156-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
Normal file
@ -0,0 +1,137 @@
|
||||
From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:19 +0000
|
||||
Subject: [PATCH 064/157] [Backport][SME] aarch64: Make AARCH64_FL_SVE
|
||||
requirements explicit
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148
|
||||
|
||||
So far, all intrinsics covered by the aarch64-sve-builtins*
|
||||
framework have (naturally enough) required at least SVE.
|
||||
However, arm_sme.h defines a couple of intrinsics that can
|
||||
be called by any code. It's therefore necessary to make
|
||||
the implicit SVE requirement explicit.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove
|
||||
implied requirement on SVE.
|
||||
* config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE.
|
||||
* config/aarch64/aarch64-sve-builtins-sve2.def: Likewise.
|
||||
---
|
||||
.../aarch64/aarch64-sve-builtins-base.def | 10 +++++-----
|
||||
.../aarch64/aarch64-sve-builtins-sve2.def | 18 +++++++++++++-----
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
|
||||
3 files changed, 19 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
|
||||
index ffdf7cb4c..3a58f76c3 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
|
||||
@@ -17,7 +17,7 @@
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
-#define REQUIRED_EXTENSIONS 0
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE
|
||||
DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz)
|
||||
DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz)
|
||||
DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit)
|
||||
@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
|
||||
DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16
|
||||
DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
|
||||
DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
|
||||
DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
|
||||
@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
|
||||
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM
|
||||
DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
|
||||
DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
|
||||
DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
|
||||
@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
|
||||
DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM
|
||||
DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM
|
||||
DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
|
||||
DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
|
||||
DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
|
||||
index 635089ffc..d5f23a887 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
|
||||
@@ -17,7 +17,7 @@
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2
|
||||
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2
|
||||
DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
|
||||
DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
|
||||
DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
|
||||
@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
|
||||
DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES)
|
||||
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
|
||||
+ | AARCH64_FL_SVE2 \
|
||||
+ | AARCH64_FL_SVE2_AES)
|
||||
DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
|
||||
@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM)
|
||||
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
|
||||
+ | AARCH64_FL_SVE2 \
|
||||
+ | AARCH64_FL_SVE2_BITPERM)
|
||||
DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3)
|
||||
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
|
||||
+ | AARCH64_FL_SVE2 \
|
||||
+ | AARCH64_FL_SVE2_SHA3)
|
||||
DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
|
||||
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4)
|
||||
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
|
||||
+ | AARCH64_FL_SVE2 \
|
||||
+ | AARCH64_FL_SVE2_SM4)
|
||||
DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
|
||||
DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
|
||||
#undef REQUIRED_EXTENSIONS
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index 7924cdf0f..dde01f676 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -525,7 +525,7 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
|
||||
static CONSTEXPR const function_group_info function_groups[] = {
|
||||
#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
|
||||
{ #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
|
||||
- REQUIRED_EXTENSIONS | AARCH64_FL_SVE },
|
||||
+ REQUIRED_EXTENSIONS },
|
||||
#include "aarch64-sve-builtins.def"
|
||||
};
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
562
0157-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
Normal file
562
0157-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
Normal file
@ -0,0 +1,562 @@
|
||||
From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:19 +0000
|
||||
Subject: [PATCH 065/157] [Backport][SME] aarch64: Add group suffixes to SVE
|
||||
intrinsics
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d
|
||||
|
||||
The SME2 ACLE adds a new "group" suffix component to the naming
|
||||
convention for SVE intrinsics. This is also used in the new tuple
|
||||
forms of the svreinterpret intrinsics.
|
||||
|
||||
This patch adds support for group suffixes and defines the
|
||||
x2, x3 and x4 suffixes that are needed for the svreinterprets.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take
|
||||
a group suffix index parameter.
|
||||
(build_32_64, build_all): Update accordingly. Iterate over all
|
||||
group suffixes.
|
||||
* config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold)
|
||||
(svqshl_impl::fold, svrshl_impl::fold): Update function_instance
|
||||
constructors.
|
||||
* config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array.
|
||||
(groups_none): New constant.
|
||||
(function_groups): Initialize the groups field.
|
||||
(function_instance::hash): Hash the group index.
|
||||
(function_builder::get_name): Add the group suffix.
|
||||
(function_builder::add_overloaded_functions): Iterate over all
|
||||
group suffixes.
|
||||
(function_resolver::lookup_form): Take a group suffix parameter.
|
||||
(function_resolver::resolve_to): Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New
|
||||
macro.
|
||||
(x2, x3, x4): New group suffixes.
|
||||
* config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum.
|
||||
(group_suffix_info): New structure.
|
||||
(function_group_info::groups): New member variable.
|
||||
(function_instance::group_suffix_id): Likewise.
|
||||
(group_suffixes): New array.
|
||||
(function_instance::operator==): Compare the group suffixes.
|
||||
(function_instance::group_suffix): New function.
|
||||
---
|
||||
.../aarch64/aarch64-sve-builtins-shapes.cc | 53 ++++++------
|
||||
.../aarch64/aarch64-sve-builtins-sve2.cc | 10 +--
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 84 +++++++++++++------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.def | 9 ++
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 81 ++++++++++++++----
|
||||
5 files changed, 165 insertions(+), 72 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
index 4fa4181b9..3ecef026c 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format,
|
||||
}
|
||||
|
||||
/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID,
|
||||
- the type suffixes at index TI and the predication suffix at index PI.
|
||||
- The other arguments are as for build_all. */
|
||||
+ the type suffixes at index TI, the group suffixes at index GI, and the
|
||||
+ predication suffix at index PI. The other arguments are as for
|
||||
+ build_all. */
|
||||
static void
|
||||
build_one (function_builder &b, const char *signature,
|
||||
const function_group_info &group, mode_suffix_index mode_suffix_id,
|
||||
- unsigned int ti, unsigned int pi, bool force_direct_overloads)
|
||||
+ unsigned int ti, unsigned int gi, unsigned int pi,
|
||||
+ bool force_direct_overloads)
|
||||
{
|
||||
/* Byte forms of svdupq take 16 arguments. */
|
||||
auto_vec<tree, 16> argument_types;
|
||||
function_instance instance (group.base_name, *group.base, *group.shape,
|
||||
mode_suffix_id, group.types[ti],
|
||||
- group.preds[pi]);
|
||||
+ group.groups[gi], group.preds[pi]);
|
||||
tree return_type = parse_signature (instance, signature, argument_types);
|
||||
apply_predication (instance, return_type, argument_types);
|
||||
b.add_unique_function (instance, return_type, argument_types,
|
||||
@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature,
|
||||
mode_suffix_index mode64, bool force_direct_overloads = false)
|
||||
{
|
||||
for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
|
||||
- if (group.types[0][0] == NUM_TYPE_SUFFIXES)
|
||||
- {
|
||||
- gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
|
||||
- build_one (b, signature, group, mode32, 0, pi,
|
||||
- force_direct_overloads);
|
||||
- build_one (b, signature, group, mode64, 0, pi,
|
||||
- force_direct_overloads);
|
||||
- }
|
||||
- else
|
||||
- for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
|
||||
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
|
||||
+ if (group.types[0][0] == NUM_TYPE_SUFFIXES)
|
||||
{
|
||||
- unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
|
||||
- gcc_assert (bits == 32 || bits == 64);
|
||||
- mode_suffix_index mode = bits == 32 ? mode32 : mode64;
|
||||
- if (mode != MODE_none)
|
||||
- build_one (b, signature, group, mode, ti, pi,
|
||||
- force_direct_overloads);
|
||||
+ gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
|
||||
+ build_one (b, signature, group, mode32, 0, gi, pi,
|
||||
+ force_direct_overloads);
|
||||
+ build_one (b, signature, group, mode64, 0, gi, pi,
|
||||
+ force_direct_overloads);
|
||||
}
|
||||
+ else
|
||||
+ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
|
||||
+ ++ti)
|
||||
+ {
|
||||
+ unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
|
||||
+ gcc_assert (bits == 32 || bits == 64);
|
||||
+ mode_suffix_index mode = bits == 32 ? mode32 : mode64;
|
||||
+ if (mode != MODE_none)
|
||||
+ build_one (b, signature, group, mode, ti, gi, pi,
|
||||
+ force_direct_overloads);
|
||||
+ }
|
||||
}
|
||||
|
||||
/* For every type and predicate combination in GROUP, add one function
|
||||
@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature,
|
||||
bool force_direct_overloads = false)
|
||||
{
|
||||
for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
|
||||
- for (unsigned int ti = 0;
|
||||
- ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
|
||||
- build_one (b, signature, group, mode_suffix_id, ti, pi,
|
||||
- force_direct_overloads);
|
||||
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
|
||||
+ for (unsigned int ti = 0;
|
||||
+ ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
|
||||
+ build_one (b, signature, group, mode_suffix_id, ti, gi, pi,
|
||||
+ force_direct_overloads);
|
||||
}
|
||||
|
||||
/* TYPE is the largest type suffix associated with the arguments of R,
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
|
||||
index e066f096d..a94e5e269 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
|
||||
@@ -252,7 +252,7 @@ public:
|
||||
that we can use for sensible shift amounts. */
|
||||
function_instance instance ("svqshl", functions::svqshl,
|
||||
shapes::binary_int_opt_n, MODE_n,
|
||||
- f.type_suffix_ids, f.pred);
|
||||
+ f.type_suffix_ids, GROUP_none, f.pred);
|
||||
return f.redirect_call (instance);
|
||||
}
|
||||
else
|
||||
@@ -261,7 +261,7 @@ public:
|
||||
that we can use for sensible shift amounts. */
|
||||
function_instance instance ("svrshl", functions::svrshl,
|
||||
shapes::binary_int_opt_n, MODE_n,
|
||||
- f.type_suffix_ids, f.pred);
|
||||
+ f.type_suffix_ids, GROUP_none, f.pred);
|
||||
return f.redirect_call (instance);
|
||||
}
|
||||
}
|
||||
@@ -290,7 +290,7 @@ public:
|
||||
-wi::to_wide (amount));
|
||||
function_instance instance ("svasr", functions::svasr,
|
||||
shapes::binary_uint_opt_n, MODE_n,
|
||||
- f.type_suffix_ids, f.pred);
|
||||
+ f.type_suffix_ids, GROUP_none, f.pred);
|
||||
if (f.type_suffix (0).unsigned_p)
|
||||
{
|
||||
instance.base_name = "svlsr";
|
||||
@@ -322,7 +322,7 @@ public:
|
||||
that we can use for sensible shift amounts. */
|
||||
function_instance instance ("svlsl", functions::svlsl,
|
||||
shapes::binary_uint_opt_n, MODE_n,
|
||||
- f.type_suffix_ids, f.pred);
|
||||
+ f.type_suffix_ids, GROUP_none, f.pred);
|
||||
gcall *call = as_a <gcall *> (f.redirect_call (instance));
|
||||
gimple_call_set_arg (call, 2, amount);
|
||||
return call;
|
||||
@@ -335,7 +335,7 @@ public:
|
||||
-wi::to_wide (amount));
|
||||
function_instance instance ("svrshr", functions::svrshr,
|
||||
shapes::shift_right_imm, MODE_n,
|
||||
- f.type_suffix_ids, f.pred);
|
||||
+ f.type_suffix_ids, GROUP_none, f.pred);
|
||||
gcall *call = as_a <gcall *> (f.redirect_call (instance));
|
||||
gimple_call_set_arg (call, 2, amount);
|
||||
return call;
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index dde01f676..dc3fd80da 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
|
||||
0, VOIDmode }
|
||||
};
|
||||
|
||||
+CONSTEXPR const group_suffix_info group_suffixes[] = {
|
||||
+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \
|
||||
+ { "_" #NAME, VG, VECTORS_PER_TUPLE },
|
||||
+#include "aarch64-sve-builtins.def"
|
||||
+ { "", 0, 1 }
|
||||
+};
|
||||
+
|
||||
/* Define a TYPES_<combination> macro for each combination of type
|
||||
suffixes that an ACLE function can have, where <combination> is the
|
||||
name used in DEF_SVE_FUNCTION entries.
|
||||
@@ -483,6 +490,10 @@ DEF_SVE_TYPES_ARRAY (inc_dec_n);
|
||||
DEF_SVE_TYPES_ARRAY (reinterpret);
|
||||
DEF_SVE_TYPES_ARRAY (while);
|
||||
|
||||
+static const group_suffix_index groups_none[] = {
|
||||
+ GROUP_none, NUM_GROUP_SUFFIXES
|
||||
+};
|
||||
+
|
||||
/* Used by functions that have no governing predicate. */
|
||||
static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
|
||||
|
||||
@@ -524,8 +535,8 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
|
||||
/* A list of all SVE ACLE functions. */
|
||||
static CONSTEXPR const function_group_info function_groups[] = {
|
||||
#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
|
||||
- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
|
||||
- REQUIRED_EXTENSIONS },
|
||||
+ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \
|
||||
+ preds_##PREDS, REQUIRED_EXTENSIONS },
|
||||
#include "aarch64-sve-builtins.def"
|
||||
};
|
||||
|
||||
@@ -788,6 +799,7 @@ function_instance::hash () const
|
||||
h.add_int (mode_suffix_id);
|
||||
h.add_int (type_suffix_ids[0]);
|
||||
h.add_int (type_suffix_ids[1]);
|
||||
+ h.add_int (group_suffix_id);
|
||||
h.add_int (pred);
|
||||
return h.end ();
|
||||
}
|
||||
@@ -957,6 +969,8 @@ function_builder::get_name (const function_instance &instance,
|
||||
for (unsigned int i = 0; i < 2; ++i)
|
||||
if (!overloaded_p || instance.shape->explicit_type_suffix_p (i))
|
||||
append_name (instance.type_suffix (i).string);
|
||||
+ if (!overloaded_p || instance.shape->explicit_group_suffix_p ())
|
||||
+ append_name (instance.group_suffix ().string);
|
||||
append_name (pred_suffixes[instance.pred]);
|
||||
return finish_name ();
|
||||
}
|
||||
@@ -1113,19 +1127,26 @@ void
|
||||
function_builder::add_overloaded_functions (const function_group_info &group,
|
||||
mode_suffix_index mode)
|
||||
{
|
||||
- unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
|
||||
- unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
|
||||
- for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
|
||||
+ bool explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
|
||||
+ bool explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
|
||||
+ bool explicit_group = (*group.shape)->explicit_group_suffix_p ();
|
||||
+ auto add_function = [&](const type_suffix_pair &types,
|
||||
+ group_suffix_index group_suffix_id,
|
||||
+ unsigned int pi)
|
||||
+ {
|
||||
+ function_instance instance (group.base_name, *group.base,
|
||||
+ *group.shape, mode, types,
|
||||
+ group_suffix_id, group.preds[pi]);
|
||||
+ add_overloaded_function (instance, group.required_extensions);
|
||||
+ };
|
||||
+
|
||||
+ auto add_group_suffix = [&](group_suffix_index group_suffix_id,
|
||||
+ unsigned int pi)
|
||||
{
|
||||
if (!explicit_type0 && !explicit_type1)
|
||||
- {
|
||||
- /* Deal with the common case in which there is one overloaded
|
||||
- function for all type combinations. */
|
||||
- function_instance instance (group.base_name, *group.base,
|
||||
- *group.shape, mode, types_none[0],
|
||||
- group.preds[pi]);
|
||||
- add_overloaded_function (instance, group.required_extensions);
|
||||
- }
|
||||
+ /* Deal with the common case in which there is one overloaded
|
||||
+ function for all type combinations. */
|
||||
+ add_function (types_none[0], group_suffix_id, pi);
|
||||
else
|
||||
for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
|
||||
++ti)
|
||||
@@ -1136,12 +1157,16 @@ function_builder::add_overloaded_functions (const function_group_info &group,
|
||||
explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES,
|
||||
explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES
|
||||
};
|
||||
- function_instance instance (group.base_name, *group.base,
|
||||
- *group.shape, mode, types,
|
||||
- group.preds[pi]);
|
||||
- add_overloaded_function (instance, group.required_extensions);
|
||||
+ add_function (types, group_suffix_id, pi);
|
||||
}
|
||||
- }
|
||||
+ };
|
||||
+
|
||||
+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
|
||||
+ if (explicit_group)
|
||||
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
|
||||
+ add_group_suffix (group.groups[gi], pi);
|
||||
+ else
|
||||
+ add_group_suffix (GROUP_none, pi);
|
||||
}
|
||||
|
||||
/* Register all the functions in GROUP. */
|
||||
@@ -1213,29 +1238,34 @@ function_resolver::report_no_such_form (type_suffix_index type)
|
||||
}
|
||||
|
||||
/* Silently check whether there is an instance of the function with the
|
||||
- mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1.
|
||||
- Return its function decl if so, otherwise return null. */
|
||||
+ mode suffix given by MODE, the type suffixes given by TYPE0 and TYPE1,
|
||||
+ and the group suffix given by GROUP. Return its function decl if so,
|
||||
+ otherwise return null. */
|
||||
tree
|
||||
function_resolver::lookup_form (mode_suffix_index mode,
|
||||
type_suffix_index type0,
|
||||
- type_suffix_index type1)
|
||||
+ type_suffix_index type1,
|
||||
+ group_suffix_index group)
|
||||
{
|
||||
type_suffix_pair types = { type0, type1 };
|
||||
- function_instance instance (base_name, base, shape, mode, types, pred);
|
||||
+ function_instance instance (base_name, base, shape, mode, types,
|
||||
+ group, pred);
|
||||
registered_function *rfn
|
||||
= function_table->find_with_hash (instance, instance.hash ());
|
||||
return rfn ? rfn->decl : NULL_TREE;
|
||||
}
|
||||
|
||||
-/* Resolve the function to one with the mode suffix given by MODE and the
|
||||
- type suffixes given by TYPE0 and TYPE1. Return its function decl on
|
||||
- success, otherwise report an error and return error_mark_node. */
|
||||
+/* Resolve the function to one with the mode suffix given by MODE, the
|
||||
+ type suffixes given by TYPE0 and TYPE1, and group suffix given by
|
||||
+ GROUP. Return its function decl on success, otherwise report an
|
||||
+ error and return error_mark_node. */
|
||||
tree
|
||||
function_resolver::resolve_to (mode_suffix_index mode,
|
||||
type_suffix_index type0,
|
||||
- type_suffix_index type1)
|
||||
+ type_suffix_index type1,
|
||||
+ group_suffix_index group)
|
||||
{
|
||||
- tree res = lookup_form (mode, type0, type1);
|
||||
+ tree res = lookup_form (mode, type0, type1, group);
|
||||
if (!res)
|
||||
{
|
||||
if (type1 == NUM_TYPE_SUFFIXES)
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
|
||||
index 6e4dcdbc9..d9bf9c350 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
|
||||
@@ -29,6 +29,10 @@
|
||||
#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E)
|
||||
#endif
|
||||
|
||||
+#ifndef DEF_SVE_GROUP_SUFFIX
|
||||
+#define DEF_SVE_GROUP_SUFFIX(A, B, C)
|
||||
+#endif
|
||||
+
|
||||
#ifndef DEF_SVE_FUNCTION
|
||||
#define DEF_SVE_FUNCTION(A, B, C, D)
|
||||
#endif
|
||||
@@ -95,10 +99,15 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode)
|
||||
DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode)
|
||||
DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode)
|
||||
|
||||
+DEF_SVE_GROUP_SUFFIX (x2, 0, 2)
|
||||
+DEF_SVE_GROUP_SUFFIX (x3, 0, 3)
|
||||
+DEF_SVE_GROUP_SUFFIX (x4, 0, 4)
|
||||
+
|
||||
#include "aarch64-sve-builtins-base.def"
|
||||
#include "aarch64-sve-builtins-sve2.def"
|
||||
|
||||
#undef DEF_SVE_FUNCTION
|
||||
+#undef DEF_SVE_GROUP_SUFFIX
|
||||
#undef DEF_SVE_TYPE_SUFFIX
|
||||
#undef DEF_SVE_TYPE
|
||||
#undef DEF_SVE_MODE
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 824c31cd7..374c57e93 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -180,6 +180,17 @@ enum type_suffix_index
|
||||
NUM_TYPE_SUFFIXES
|
||||
};
|
||||
|
||||
+/* Enumerates the possible group suffixes. Each suffix combines two
|
||||
+ optional pieces of information: the vector group size in a ZA index,
|
||||
+ and the number of vectors in the largest tuple argument. */
|
||||
+enum group_suffix_index
|
||||
+{
|
||||
+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) GROUP_##NAME,
|
||||
+#include "aarch64-sve-builtins.def"
|
||||
+ GROUP_none,
|
||||
+ NUM_GROUP_SUFFIXES
|
||||
+};
|
||||
+
|
||||
/* Combines two type suffixes. */
|
||||
typedef enum type_suffix_index type_suffix_pair[2];
|
||||
|
||||
@@ -237,6 +248,21 @@ struct type_suffix_info
|
||||
machine_mode vector_mode : 16;
|
||||
};
|
||||
|
||||
+/* Static information about a group suffix. */
|
||||
+struct group_suffix_info
|
||||
+{
|
||||
+ /* The suffix string itself. */
|
||||
+ const char *string;
|
||||
+
|
||||
+ /* If the suffix describes a vector group in a ZA index, this is the
|
||||
+ size of that group, otherwise it is zero. */
|
||||
+ unsigned int vg;
|
||||
+
|
||||
+ /* The number of vectors in the largest (or only) tuple argument,
|
||||
+ or 1 if the suffix does not convey this information. */
|
||||
+ unsigned int vectors_per_tuple;
|
||||
+};
|
||||
+
|
||||
/* Static information about a set of functions. */
|
||||
struct function_group_info
|
||||
{
|
||||
@@ -251,14 +277,16 @@ struct function_group_info
|
||||
shapes. */
|
||||
const function_shape *const *shape;
|
||||
|
||||
- /* A list of the available type suffixes, and of the available predication
|
||||
- types. The function supports every combination of the two.
|
||||
+ /* A list of the available type suffixes, group suffixes, and predication
|
||||
+ types. The function supports every combination of the three.
|
||||
+
|
||||
+ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES.
|
||||
+ It is lexicographically ordered based on the index value.
|
||||
|
||||
- The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES
|
||||
- while the list of predication types is terminated by NUM_PREDS.
|
||||
- The list of type suffixes is lexicographically ordered based
|
||||
- on the index value. */
|
||||
+ The list of group suffixes is terminated by NUM_GROUP_SUFFIXES
|
||||
+ and the list of predication types is terminated by NUM_PREDS. */
|
||||
const type_suffix_pair *types;
|
||||
+ const group_suffix_index *groups;
|
||||
const predication_index *preds;
|
||||
|
||||
/* The architecture extensions that the functions require, as a set of
|
||||
@@ -273,7 +301,8 @@ class GTY((user)) function_instance
|
||||
public:
|
||||
function_instance (const char *, const function_base *,
|
||||
const function_shape *, mode_suffix_index,
|
||||
- const type_suffix_pair &, predication_index);
|
||||
+ const type_suffix_pair &, group_suffix_index,
|
||||
+ predication_index);
|
||||
|
||||
bool operator== (const function_instance &) const;
|
||||
bool operator!= (const function_instance &) const;
|
||||
@@ -294,6 +323,8 @@ public:
|
||||
units_index displacement_units () const;
|
||||
|
||||
const type_suffix_info &type_suffix (unsigned int) const;
|
||||
+ const group_suffix_info &group_suffix () const;
|
||||
+
|
||||
tree scalar_type (unsigned int) const;
|
||||
tree vector_type (unsigned int) const;
|
||||
tree tuple_type (unsigned int) const;
|
||||
@@ -301,14 +332,14 @@ public:
|
||||
machine_mode vector_mode (unsigned int) const;
|
||||
machine_mode gp_mode (unsigned int) const;
|
||||
|
||||
- /* The properties of the function. (The explicit "enum"s are required
|
||||
- for gengtype.) */
|
||||
+ /* The properties of the function. */
|
||||
const char *base_name;
|
||||
const function_base *base;
|
||||
const function_shape *shape;
|
||||
- enum mode_suffix_index mode_suffix_id;
|
||||
+ mode_suffix_index mode_suffix_id;
|
||||
type_suffix_pair type_suffix_ids;
|
||||
- enum predication_index pred;
|
||||
+ group_suffix_index group_suffix_id;
|
||||
+ predication_index pred;
|
||||
};
|
||||
|
||||
class registered_function;
|
||||
@@ -390,10 +421,12 @@ public:
|
||||
tree report_no_such_form (type_suffix_index);
|
||||
tree lookup_form (mode_suffix_index,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
- type_suffix_index = NUM_TYPE_SUFFIXES);
|
||||
+ type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
+ group_suffix_index = GROUP_none);
|
||||
tree resolve_to (mode_suffix_index,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
- type_suffix_index = NUM_TYPE_SUFFIXES);
|
||||
+ type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
+ group_suffix_index = GROUP_none);
|
||||
|
||||
type_suffix_index infer_integer_scalar_type (unsigned int);
|
||||
type_suffix_index infer_pointer_type (unsigned int, bool = false);
|
||||
@@ -641,6 +674,11 @@ class function_shape
|
||||
public:
|
||||
virtual bool explicit_type_suffix_p (unsigned int) const = 0;
|
||||
|
||||
+ /* True if the group suffix is present in overloaded names.
|
||||
+ This isn't meaningful for pre-SME intrinsics, and true is
|
||||
+ more common than false, so provide a default definition. */
|
||||
+ virtual bool explicit_group_suffix_p () const { return true; }
|
||||
+
|
||||
/* Define all functions associated with the given group. */
|
||||
virtual void build (function_builder &,
|
||||
const function_group_info &) const = 0;
|
||||
@@ -669,6 +707,7 @@ private:
|
||||
|
||||
extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
|
||||
extern const mode_suffix_info mode_suffixes[MODE_none + 1];
|
||||
+extern const group_suffix_info group_suffixes[NUM_GROUP_SUFFIXES];
|
||||
|
||||
extern tree scalar_types[NUM_VECTOR_TYPES];
|
||||
extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
|
||||
@@ -728,9 +767,11 @@ function_instance (const char *base_name_in,
|
||||
const function_shape *shape_in,
|
||||
mode_suffix_index mode_suffix_id_in,
|
||||
const type_suffix_pair &type_suffix_ids_in,
|
||||
+ group_suffix_index group_suffix_id_in,
|
||||
predication_index pred_in)
|
||||
: base_name (base_name_in), base (base_in), shape (shape_in),
|
||||
- mode_suffix_id (mode_suffix_id_in), pred (pred_in)
|
||||
+ mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in),
|
||||
+ pred (pred_in)
|
||||
{
|
||||
memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids));
|
||||
}
|
||||
@@ -741,9 +782,10 @@ function_instance::operator== (const function_instance &other) const
|
||||
return (base == other.base
|
||||
&& shape == other.shape
|
||||
&& mode_suffix_id == other.mode_suffix_id
|
||||
- && pred == other.pred
|
||||
&& type_suffix_ids[0] == other.type_suffix_ids[0]
|
||||
- && type_suffix_ids[1] == other.type_suffix_ids[1]);
|
||||
+ && type_suffix_ids[1] == other.type_suffix_ids[1]
|
||||
+ && group_suffix_id == other.group_suffix_id
|
||||
+ && pred == other.pred);
|
||||
}
|
||||
|
||||
inline bool
|
||||
@@ -815,6 +857,13 @@ function_instance::type_suffix (unsigned int i) const
|
||||
return type_suffixes[type_suffix_ids[i]];
|
||||
}
|
||||
|
||||
+/* Return information about the function's group suffix. */
|
||||
+inline const group_suffix_info &
|
||||
+function_instance::group_suffix () const
|
||||
+{
|
||||
+ return group_suffixes[group_suffix_id];
|
||||
+}
|
||||
+
|
||||
/* Return the scalar type associated with type suffix I. */
|
||||
inline tree
|
||||
function_instance::scalar_type (unsigned int i) const
|
||||
--
|
||||
2.33.0
|
||||
|
||||
230
0158-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
Normal file
230
0158-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
Normal file
@ -0,0 +1,230 @@
|
||||
From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:20 +0000
|
||||
Subject: [PATCH 066/157] [Backport][SME] aarch64: Add sve_type to SVE builtins
|
||||
code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92
|
||||
|
||||
Until now, the SVE ACLE code had mostly been able to represent
|
||||
individual SVE arguments with just an element type suffix (s32, u32,
|
||||
etc.). However, the SME2 ACLE provides many overloaded intrinsics
|
||||
that operate on tuples rather than single vectors. This patch
|
||||
therefore adds a new type (sve_type) that combines an element
|
||||
type suffix with a vector count. This is enough to uniquely
|
||||
represent all SVE ACLE types.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h (sve_type): New struct.
|
||||
(sve_type::operator==): New function.
|
||||
(function_resolver::get_vector_type): Delete.
|
||||
(function_resolver::report_no_such_form): Take an sve_type rather
|
||||
than a type_suffix_index.
|
||||
* config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New
|
||||
function.
|
||||
(function_resolver::get_vector_type): Delete.
|
||||
(function_resolver::report_no_such_form): Take an sve_type rather
|
||||
than a type_suffix_index.
|
||||
(find_sve_type): New function, split out from...
|
||||
(function_resolver::infer_vector_or_tuple_type): ...here.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++----------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 37 ++++++++-
|
||||
2 files changed, 88 insertions(+), 42 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index dc3fd80da..cc676bfe1 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type)
|
||||
return NUM_TYPE_SUFFIXES;
|
||||
}
|
||||
|
||||
+/* Return the vector type associated with TYPE. */
|
||||
+static tree
|
||||
+get_vector_type (sve_type type)
|
||||
+{
|
||||
+ auto vector_type = type_suffixes[type.type].vector_type;
|
||||
+ return acle_vector_types[type.num_vectors - 1][vector_type];
|
||||
+}
|
||||
+
|
||||
/* Report an error against LOCATION that the user has tried to use
|
||||
function FNDECL when extension EXTENSION is disabled. */
|
||||
static void
|
||||
@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location,
|
||||
{
|
||||
}
|
||||
|
||||
-/* Return the vector type associated with type suffix TYPE. */
|
||||
-tree
|
||||
-function_resolver::get_vector_type (type_suffix_index type)
|
||||
-{
|
||||
- return acle_vector_types[0][type_suffixes[type].vector_type];
|
||||
-}
|
||||
-
|
||||
/* Return the <stdint.h> name associated with TYPE. Using the <stdint.h>
|
||||
name should be more user-friendly than the underlying canonical type,
|
||||
since it makes the signedness and bitwidth explicit. */
|
||||
@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i)
|
||||
|| SCALAR_FLOAT_TYPE_P (type));
|
||||
}
|
||||
|
||||
-/* Report that the function has no form that takes type suffix TYPE.
|
||||
+/* Report that the function has no form that takes type TYPE.
|
||||
Return error_mark_node. */
|
||||
tree
|
||||
-function_resolver::report_no_such_form (type_suffix_index type)
|
||||
+function_resolver::report_no_such_form (sve_type type)
|
||||
{
|
||||
error_at (location, "%qE has no form that takes %qT arguments",
|
||||
fndecl, get_vector_type (type));
|
||||
@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno,
|
||||
return type;
|
||||
}
|
||||
|
||||
+/* If TYPE is an SVE predicate or vector type, or a tuple of such a type,
|
||||
+ return the associated sve_type, otherwise return an invalid sve_type. */
|
||||
+static sve_type
|
||||
+find_sve_type (const_tree type)
|
||||
+{
|
||||
+ /* A linear search should be OK here, since the code isn't hot and
|
||||
+ the number of types is only small. */
|
||||
+ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
|
||||
+ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
|
||||
+ {
|
||||
+ vector_type_index type_i = type_suffixes[suffix_i].vector_type;
|
||||
+ tree this_type = acle_vector_types[size_i][type_i];
|
||||
+ if (this_type && matches_type_p (this_type, type))
|
||||
+ return { type_suffix_index (suffix_i), size_i + 1 };
|
||||
+ }
|
||||
+
|
||||
+ return {};
|
||||
+}
|
||||
+
|
||||
/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
|
||||
vectors; NUM_VECTORS is 1 for the former. Return the associated type
|
||||
suffix on success, using TYPE_SUFFIX_b for predicates. Report an error
|
||||
@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
|
||||
if (actual == error_mark_node)
|
||||
return NUM_TYPE_SUFFIXES;
|
||||
|
||||
- /* A linear search should be OK here, since the code isn't hot and
|
||||
- the number of types is only small. */
|
||||
- for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
|
||||
- for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
|
||||
- {
|
||||
- vector_type_index type_i = type_suffixes[suffix_i].vector_type;
|
||||
- tree type = acle_vector_types[size_i][type_i];
|
||||
- if (type && matches_type_p (type, actual))
|
||||
- {
|
||||
- if (size_i + 1 == num_vectors)
|
||||
- return type_suffix_index (suffix_i);
|
||||
-
|
||||
- if (num_vectors == 1)
|
||||
- error_at (location, "passing %qT to argument %d of %qE, which"
|
||||
- " expects a single SVE vector rather than a tuple",
|
||||
- actual, argno + 1, fndecl);
|
||||
- else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t)
|
||||
- /* num_vectors is always != 1, so the singular isn't needed. */
|
||||
- error_n (location, num_vectors, "%qT%d%qE%d",
|
||||
- "passing single vector %qT to argument %d"
|
||||
- " of %qE, which expects a tuple of %d vectors",
|
||||
- actual, argno + 1, fndecl, num_vectors);
|
||||
- else
|
||||
- /* num_vectors is always != 1, so the singular isn't needed. */
|
||||
- error_n (location, num_vectors, "%qT%d%qE%d",
|
||||
- "passing %qT to argument %d of %qE, which"
|
||||
- " expects a tuple of %d vectors", actual, argno + 1,
|
||||
- fndecl, num_vectors);
|
||||
- return NUM_TYPE_SUFFIXES;
|
||||
- }
|
||||
- }
|
||||
+ if (auto sve_type = find_sve_type (actual))
|
||||
+ {
|
||||
+ if (sve_type.num_vectors == num_vectors)
|
||||
+ return sve_type.type;
|
||||
+
|
||||
+ if (num_vectors == 1)
|
||||
+ error_at (location, "passing %qT to argument %d of %qE, which"
|
||||
+ " expects a single SVE vector rather than a tuple",
|
||||
+ actual, argno + 1, fndecl);
|
||||
+ else if (sve_type.num_vectors == 1
|
||||
+ && sve_type.type != TYPE_SUFFIX_b)
|
||||
+ /* num_vectors is always != 1, so the singular isn't needed. */
|
||||
+ error_n (location, num_vectors, "%qT%d%qE%d",
|
||||
+ "passing single vector %qT to argument %d"
|
||||
+ " of %qE, which expects a tuple of %d vectors",
|
||||
+ actual, argno + 1, fndecl, num_vectors);
|
||||
+ else
|
||||
+ /* num_vectors is always != 1, so the singular isn't needed. */
|
||||
+ error_n (location, num_vectors, "%qT%d%qE%d",
|
||||
+ "passing %qT to argument %d of %qE, which"
|
||||
+ " expects a tuple of %d vectors", actual, argno + 1,
|
||||
+ fndecl, num_vectors);
|
||||
+ return NUM_TYPE_SUFFIXES;
|
||||
+ }
|
||||
|
||||
if (num_vectors == 1)
|
||||
error_at (location, "passing %qT to argument %d of %qE, which"
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 374c57e93..f4f2c415f 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -263,6 +263,40 @@ struct group_suffix_info
|
||||
unsigned int vectors_per_tuple;
|
||||
};
|
||||
|
||||
+/* Represents an SVE vector, predicate, tuple of vectors, or tuple of
|
||||
+ predicates. There is also a representation of "no type"/"invalid type". */
|
||||
+struct sve_type
|
||||
+{
|
||||
+ sve_type () = default;
|
||||
+ sve_type (type_suffix_index type) : type (type), num_vectors (1) {}
|
||||
+ sve_type (type_suffix_index type, unsigned int num_vectors)
|
||||
+ : type (type), num_vectors (num_vectors) {}
|
||||
+
|
||||
+ /* Return true if the type is valid. */
|
||||
+ explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; }
|
||||
+
|
||||
+ bool operator== (const sve_type &) const;
|
||||
+ bool operator!= (const sve_type &x) const { return !operator== (x); }
|
||||
+
|
||||
+ /* This is one of:
|
||||
+
|
||||
+ - TYPE_SUFFIX_b for svbool_t-based types
|
||||
+ - TYPE_SUFFIX_c for svcount_t-based types
|
||||
+ - the type suffix of a data element for SVE data vectors and tuples
|
||||
+ - NUM_TYPE_SUFFIXES for invalid types. */
|
||||
+ type_suffix_index type = NUM_TYPE_SUFFIXES;
|
||||
+
|
||||
+ /* If the type is a tuple, this is the number of vectors in the tuple,
|
||||
+ otherwise it is 1. */
|
||||
+ unsigned int num_vectors = 1;
|
||||
+};
|
||||
+
|
||||
+inline bool
|
||||
+sve_type::operator== (const sve_type &other) const
|
||||
+{
|
||||
+ return type == other.type && num_vectors == other.num_vectors;
|
||||
+}
|
||||
+
|
||||
/* Static information about a set of functions. */
|
||||
struct function_group_info
|
||||
{
|
||||
@@ -413,12 +447,11 @@ public:
|
||||
function_resolver (location_t, const function_instance &, tree,
|
||||
vec<tree, va_gc> &);
|
||||
|
||||
- tree get_vector_type (type_suffix_index);
|
||||
const char *get_scalar_type_name (type_suffix_index);
|
||||
tree get_argument_type (unsigned int);
|
||||
bool scalar_argument_p (unsigned int);
|
||||
|
||||
- tree report_no_such_form (type_suffix_index);
|
||||
+ tree report_no_such_form (sve_type);
|
||||
tree lookup_form (mode_suffix_index,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1474
0159-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
Normal file
1474
0159-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
Normal file
File diff suppressed because it is too large
Load Diff
698
0160-Backport-SME-aarch64-Replace-vague-previous-argument.patch
Normal file
698
0160-Backport-SME-aarch64-Replace-vague-previous-argument.patch
Normal file
@ -0,0 +1,698 @@
|
||||
From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:21 +0000
|
||||
Subject: [PATCH 068/157] [Backport][SME] aarch64: Replace vague "previous
|
||||
arguments" message
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875
|
||||
|
||||
If an SVE ACLE intrinsic requires two arguments to have the
|
||||
same type, the C resolver would report mismatches as "argument N
|
||||
has type T2, but previous arguments had type T1". This patch makes
|
||||
the message say which argument had type T1.
|
||||
|
||||
This is needed to give decent error messages for some SME cases.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h
|
||||
(function_resolver::require_matching_vector_type): Add a parameter
|
||||
that specifies the number of the earlier argument that is being
|
||||
matched against.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(function_resolver::require_matching_vector_type): Likewise.
|
||||
(require_derived_vector_type): Update calls accordingly.
|
||||
(function_resolver::resolve_unary): Likewise.
|
||||
(function_resolver::resolve_uniform): Likewise.
|
||||
(function_resolver::resolve_uniform_opt_n): Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
(binary_long_lane_def::resolve): Likewise.
|
||||
(clast_def::resolve, ternary_uint_def::resolve): Likewise.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous
|
||||
arguments had" with "but argument N had".
|
||||
---
|
||||
.../aarch64/aarch64-sve-builtins-shapes.cc | 6 ++--
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 17 +++++------
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 3 +-
|
||||
.../aarch64/sve/acle/general-c/binary_1.c | 6 ++--
|
||||
.../sve/acle/general-c/binary_lane_1.c | 2 +-
|
||||
.../sve/acle/general-c/binary_long_lane_1.c | 2 +-
|
||||
.../sve/acle/general-c/binary_long_opt_n_1.c | 8 +++---
|
||||
.../acle/general-c/binary_narrowb_opt_n_1.c | 8 +++---
|
||||
.../acle/general-c/binary_narrowt_opt_n_1.c | 8 +++---
|
||||
.../sve/acle/general-c/binary_opt_n_2.c | 14 +++++-----
|
||||
.../sve/acle/general-c/binary_opt_n_3.c | 16 +++++------
|
||||
.../sve/acle/general-c/binary_rotate_1.c | 2 +-
|
||||
.../sve/acle/general-c/binary_to_uint_1.c | 4 +--
|
||||
.../aarch64/sve/acle/general-c/clast_1.c | 2 +-
|
||||
.../aarch64/sve/acle/general-c/compare_1.c | 14 +++++-----
|
||||
.../sve/acle/general-c/compare_opt_n_1.c | 14 +++++-----
|
||||
.../aarch64/sve/acle/general-c/create_1.c | 6 ++--
|
||||
.../aarch64/sve/acle/general-c/create_3.c | 6 ++--
|
||||
.../aarch64/sve/acle/general-c/create_5.c | 6 ++--
|
||||
.../aarch64/sve/acle/general-c/mmla_1.c | 14 +++++-----
|
||||
.../sve/acle/general-c/ternary_lane_1.c | 4 +--
|
||||
.../acle/general-c/ternary_lane_rotate_1.c | 4 +--
|
||||
.../sve/acle/general-c/ternary_opt_n_1.c | 28 +++++++++----------
|
||||
.../sve/acle/general-c/ternary_rotate_1.c | 4 +--
|
||||
.../general-c/ternary_shift_right_imm_1.c | 6 ++--
|
||||
.../sve/acle/general-c/ternary_uint_1.c | 6 ++--
|
||||
.../aarch64/sve/acle/general-c/tmad_1.c | 2 +-
|
||||
.../aarch64/sve/acle/general-c/unary_1.c | 8 +++---
|
||||
.../aarch64/sve/acle/general-c/undeclared_2.c | 2 +-
|
||||
29 files changed, 112 insertions(+), 110 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
index 3ecef026c..40aa418e0 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0>
|
||||
type_suffix_index type, result_type;
|
||||
if (!r.check_gp_argument (3, i, nargs)
|
||||
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
|
||||
- || !r.require_matching_vector_type (i + 1, type)
|
||||
+ || !r.require_matching_vector_type (i + 1, i, type)
|
||||
|| !r.require_integer_immediate (i + 2)
|
||||
|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
|
||||
return error_mark_node;
|
||||
@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0>
|
||||
{
|
||||
type_suffix_index type;
|
||||
if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
|
||||
- || !r.require_matching_vector_type (i + 1, type))
|
||||
+ || !r.require_matching_vector_type (i + 1, i, type))
|
||||
return error_mark_node;
|
||||
return r.resolve_to (MODE_none, type);
|
||||
}
|
||||
@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0>
|
||||
type_suffix_index type;
|
||||
if (!r.check_gp_argument (3, i, nargs)
|
||||
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
|
||||
- || !r.require_matching_vector_type (i + 1, type)
|
||||
+ || !r.require_matching_vector_type (i + 1, i, type)
|
||||
|| !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned))
|
||||
return error_mark_node;
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index 4e94e3633..1545fd78d 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno,
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* Like require_vector_type, but TYPE is inferred from previous arguments
|
||||
+/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO
|
||||
rather than being a fixed part of the function signature. This changes
|
||||
the nature of the error messages. */
|
||||
bool
|
||||
function_resolver::require_matching_vector_type (unsigned int argno,
|
||||
+ unsigned int first_argno,
|
||||
type_suffix_index type)
|
||||
{
|
||||
type_suffix_index new_type = infer_vector_type (argno);
|
||||
@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno,
|
||||
if (type != new_type)
|
||||
{
|
||||
error_at (location, "passing %qT to argument %d of %qE, but"
|
||||
- " previous arguments had type %qT",
|
||||
+ " argument %d had type %qT",
|
||||
get_vector_type (new_type), argno + 1, fndecl,
|
||||
- get_vector_type (type));
|
||||
+ first_argno + 1, get_vector_type (type));
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno,
|
||||
{
|
||||
/* There's no need to resolve this case out of order. */
|
||||
gcc_assert (argno > first_argno);
|
||||
- return require_matching_vector_type (argno, first_type);
|
||||
+ return require_matching_vector_type (argno, first_argno, first_type);
|
||||
}
|
||||
|
||||
/* Use FIRST_TYPE to get the expected type class and element size. */
|
||||
@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass,
|
||||
so we can use normal left-to-right resolution. */
|
||||
if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
|
||||
|| !require_vector_type (1, VECTOR_TYPE_svbool_t)
|
||||
- || !require_matching_vector_type (2, type))
|
||||
+ || !require_matching_vector_type (2, 0, type))
|
||||
return error_mark_node;
|
||||
}
|
||||
else
|
||||
@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
|
||||
|| (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
|
||||
return error_mark_node;
|
||||
|
||||
- i += 1;
|
||||
+ unsigned int first_arg = i++;
|
||||
for (; i < nargs - nimm; ++i)
|
||||
- if (!require_matching_vector_type (i, type))
|
||||
+ if (!require_matching_vector_type (i, first_arg, type))
|
||||
return error_mark_node;
|
||||
|
||||
for (; i < nargs; ++i)
|
||||
@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops)
|
||||
|
||||
unsigned int first_arg = i++;
|
||||
for (; i < nargs - 1; ++i)
|
||||
- if (!require_matching_vector_type (i, type))
|
||||
+ if (!require_matching_vector_type (i, first_arg, type))
|
||||
return error_mark_node;
|
||||
|
||||
return finish_opt_n_resolution (i, first_arg, type);
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index 5a4f35123..f7d6cc084 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -476,7 +476,8 @@ public:
|
||||
bool require_vector_or_scalar_type (unsigned int);
|
||||
|
||||
bool require_vector_type (unsigned int, vector_type_index);
|
||||
- bool require_matching_vector_type (unsigned int, type_suffix_index);
|
||||
+ bool require_matching_vector_type (unsigned int, unsigned int,
|
||||
+ type_suffix_index);
|
||||
bool require_derived_vector_type (unsigned int, unsigned int,
|
||||
type_suffix_index,
|
||||
type_class_index = SAME_TYPE_CLASS,
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
|
||||
index 4343146de..2e919d287 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
|
||||
@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16)
|
||||
{
|
||||
svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */
|
||||
svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */
|
||||
- svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */
|
||||
- svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
|
||||
- svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */
|
||||
+ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
|
||||
+ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
|
||||
svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
|
||||
index 10b6b7e81..81533b25d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
|
||||
@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
|
||||
svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */
|
||||
svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */
|
||||
svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */
|
||||
- svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but argument 1 had type 'svfloat32_t'} } */
|
||||
svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */
|
||||
svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
|
||||
index 805863f76..25b620877 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
|
||||
@@ -21,7 +21,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
|
||||
svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */
|
||||
svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */
|
||||
svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */
|
||||
- svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */
|
||||
+ svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but argument 1 had type 'svuint32_t'} } */
|
||||
svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */
|
||||
svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
|
||||
index ee704eeae..1f513dde9 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
|
||||
@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */
|
||||
svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */
|
||||
svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */
|
||||
- svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
|
||||
- svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
|
||||
- svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
|
||||
- svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint16_t'} } */
|
||||
+ svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
|
||||
+ svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
|
||||
+ svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
|
||||
+ svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint16_t'} } */
|
||||
svaddlb (u8, 0);
|
||||
svaddlb (u16, 0);
|
||||
svaddlb (u32, 0);
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
|
||||
index 8ca549ba9..4a29b5c43 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
|
||||
@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svaddhnb (s64, s64);
|
||||
svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */
|
||||
svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */
|
||||
- svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
|
||||
+ svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
|
||||
+ svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
|
||||
+ svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
|
||||
+ svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
|
||||
svaddhnb (u8, 0); /* { dg-error {'svaddhnb' has no form that takes 'svuint8_t' arguments} } */
|
||||
svaddhnb (u16, 0);
|
||||
svaddhnb (u32, 0);
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
|
||||
index 2b537965b..4a442616e 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
|
||||
@@ -28,10 +28,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */
|
||||
svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */
|
||||
svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */
|
||||
- svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
|
||||
- svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
|
||||
+ svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
|
||||
+ svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
|
||||
+ svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
|
||||
+ svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
|
||||
svaddhnt (u8, u8, 0); /* { dg-error {'svaddhnt' has no form that takes 'svuint8_t' arguments} } */
|
||||
svaddhnt (u16, u16, 0); /* { dg-error {passing 'svuint16_t' instead of the expected 'svuint8_t' to argument 1 of 'svaddhnt', after passing 'svuint16_t' to argument 2} } */
|
||||
svaddhnt (s8, u16, 0); /* { dg-error {arguments 1 and 2 of 'svaddhnt' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
|
||||
index a151f90d1..40447cf83 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
|
||||
@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */
|
||||
svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */
|
||||
svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */
|
||||
- svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
|
||||
svadd_x (pg, u8, u8);
|
||||
- svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
|
||||
svadd_x (pg, u8, 0);
|
||||
|
||||
- svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
svadd_x (pg, f16, f16);
|
||||
svadd_x (pg, f16, 1);
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
|
||||
index 70ec9c585..94e20bc91 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
|
||||
@@ -11,19 +11,19 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */
|
||||
svand_z (pg, pg, pg);
|
||||
svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */
|
||||
- svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
|
||||
svand_z (pg, u8, u8);
|
||||
- svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
|
||||
- svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
|
||||
- svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
|
||||
- svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
|
||||
svand_z (pg, u8, 0);
|
||||
|
||||
- svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */
|
||||
+ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svbool_t'} } */
|
||||
svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */
|
||||
|
||||
- svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */
|
||||
svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */
|
||||
svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
|
||||
index 7669e4a02..8939ce258 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
|
||||
@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
|
||||
svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */
|
||||
svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */
|
||||
svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */
|
||||
- svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but argument 2 had type 'svfloat32_t'} } */
|
||||
svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */
|
||||
svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */
|
||||
svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
|
||||
index 154662487..2c3fe5df1 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
|
||||
@@ -12,8 +12,8 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32)
|
||||
svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */
|
||||
svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */
|
||||
svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */
|
||||
- svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */
|
||||
- svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */
|
||||
+ svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svbool_t'} } */
|
||||
+ svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svint32_t'} } */
|
||||
svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */
|
||||
svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
|
||||
index ba1b2520f..47ce47328 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
|
||||
@@ -10,6 +10,6 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i)
|
||||
svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */
|
||||
svclasta (pg, i, s32);
|
||||
svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */
|
||||
- svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */
|
||||
+ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but argument 2 had type 'svint32_t'} } */
|
||||
svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
|
||||
index 5474124cc..0dd0ad910 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
|
||||
@@ -13,15 +13,15 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */
|
||||
svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */
|
||||
svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */
|
||||
- svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
|
||||
svmatch (pg, u8, u8);
|
||||
- svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
|
||||
svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */
|
||||
|
||||
- svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */
|
||||
svmatch (pg, f16, f16); /* { dg-error {'svmatch' has no form that takes 'svfloat16_t' arguments} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
|
||||
index 6faa73972..cfa50d387 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
|
||||
@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */
|
||||
svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */
|
||||
svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */
|
||||
- svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
|
||||
svcmpeq (pg, u8, u8);
|
||||
- svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
|
||||
- svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
|
||||
- svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
|
||||
- svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
|
||||
svcmpeq (pg, u8, 0);
|
||||
|
||||
- svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */
|
||||
svcmpeq (pg, f16, f16);
|
||||
svcmpeq (pg, f16, 1);
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
|
||||
index 83e4a5600..7a617aa15 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
|
||||
@@ -10,11 +10,11 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64,
|
||||
*ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */
|
||||
*ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */
|
||||
*ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */
|
||||
- *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */
|
||||
- *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */
|
||||
+ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */
|
||||
+ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */
|
||||
*ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */
|
||||
*ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */
|
||||
- *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */
|
||||
+ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */
|
||||
*ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */
|
||||
*ptr = svcreate2 (u8, u8);
|
||||
*ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
|
||||
index e3302f7e7..40f3a1fed 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
|
||||
@@ -11,11 +11,11 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64,
|
||||
*ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */
|
||||
*ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */
|
||||
*ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */
|
||||
- *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */
|
||||
- *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */
|
||||
+ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */
|
||||
*ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */
|
||||
*ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */
|
||||
- *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */
|
||||
+ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but argument 1 had type 'svbool_t'} } */
|
||||
*ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */
|
||||
*ptr = svcreate3 (f16, f16, f16);
|
||||
*ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
|
||||
index c850c94f0..bf3dd5d75 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
|
||||
@@ -12,11 +12,11 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64,
|
||||
*ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */
|
||||
*ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */
|
||||
*ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */
|
||||
- *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */
|
||||
- *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */
|
||||
+ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but argument 1 had type 'svint32_t'} } */
|
||||
+ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but argument 1 had type 'svint32_t'} } */
|
||||
*ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */
|
||||
*ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */
|
||||
- *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */
|
||||
+ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */
|
||||
*ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */
|
||||
*ptr = svcreate4 (s32, s32, s32, s32);
|
||||
*ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
|
||||
index 7fc7bb67b..ca2ab8a6f 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
|
||||
@@ -44,13 +44,13 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32,
|
||||
svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */
|
||||
|
||||
svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
|
||||
- svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
|
||||
- svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
|
||||
- svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
|
||||
+ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
|
||||
+ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
|
||||
+ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
|
||||
|
||||
svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
|
||||
svmmla (f32, f32, f32);
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
|
||||
index 520c11f79..0a67f82bf 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
|
||||
@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
|
||||
svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
- svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */
|
||||
svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */
|
||||
svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
|
||||
index 3163d130c..60c9c466e 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
|
||||
@@ -14,8 +14,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
|
||||
svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
|
||||
- svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */
|
||||
+ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */
|
||||
svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */
|
||||
svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
|
||||
index ac789c2be..6ca223475 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
|
||||
@@ -11,24 +11,24 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
|
||||
svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */
|
||||
svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */
|
||||
svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */
|
||||
- svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
svmla_x (pg, u8, u8, u8);
|
||||
- svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */
|
||||
- svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
- svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
+ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
|
||||
svmla_x (pg, u8, u8, 0);
|
||||
|
||||
- svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
- svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
|
||||
+ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
+ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
|
||||
svmla_x (pg, f16, f16, f16);
|
||||
svmla_x (pg, f16, f16, 1);
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
|
||||
index bb6740289..68b2cfc1d 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
|
||||
@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
|
||||
svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
|
||||
svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
|
||||
svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
|
||||
- svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */
|
||||
- svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */
|
||||
+ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */
|
||||
svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */
|
||||
svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */
|
||||
svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
|
||||
index cfe601631..134cf98fd 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
|
||||
@@ -11,10 +11,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16,
|
||||
{
|
||||
const int one = 1;
|
||||
pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */
|
||||
- pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */
|
||||
+ pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but argument 1 had type 'svbool_t'} } */
|
||||
s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */
|
||||
- s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */
|
||||
- s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */
|
||||
+ s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */
|
||||
+ s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */
|
||||
s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */
|
||||
s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */
|
||||
s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
|
||||
index 5fb497701..a639562b1 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
|
||||
@@ -15,14 +15,14 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16,
|
||||
|
||||
svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */
|
||||
svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */
|
||||
- svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */
|
||||
+ svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svuint8_t'} } */
|
||||
svtbx (u8, u8, u8);
|
||||
svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
svtbx (u8, u8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */
|
||||
svtbx (u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
svtbx (u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
|
||||
- svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svint8_t'} } */
|
||||
+ svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svint8_t'} } */
|
||||
svtbx (s8, s8, u8);
|
||||
svtbx (s8, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
svtbx (s8, s8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */
|
||||
@@ -36,7 +36,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16,
|
||||
svtbx (u16, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
svtbx (u16, u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
|
||||
- svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but previous arguments had type 'svint16_t'} } */
|
||||
+ svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but argument 1 had type 'svint16_t'} } */
|
||||
svtbx (s16, s16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */
|
||||
svtbx (s16, s16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
|
||||
svtbx (s16, s16, u16);
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
|
||||
index c2eda93e3..992b50199 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
|
||||
@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
|
||||
svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */
|
||||
svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */
|
||||
svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */
|
||||
- svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */
|
||||
+ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but argument 1 had type 'svfloat32_t'} } */
|
||||
svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */
|
||||
svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */
|
||||
svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
|
||||
index 8c865a0e6..9c9c383dd 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
|
||||
@@ -13,9 +13,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32)
|
||||
svabs_m (s32, pg, s32);
|
||||
svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */
|
||||
svabs_m (f32, pg, f32);
|
||||
- svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
|
||||
- svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
|
||||
- svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
|
||||
- svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */
|
||||
+ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
|
||||
+ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
|
||||
+ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
|
||||
+ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svbool_t'} } */
|
||||
svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
|
||||
index 7e869bda8..6ffd3d9e8 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
|
||||
@@ -9,7 +9,7 @@ f (svint8_t s8, svuint16_t u16, svfloat32_t f32,
|
||||
u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */
|
||||
f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */
|
||||
s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */
|
||||
- u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */
|
||||
+ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but argument 1 had type 'svuint16_t'} } */
|
||||
f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */
|
||||
pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
368
0161-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
Normal file
368
0161-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
Normal file
@ -0,0 +1,368 @@
|
||||
From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:21 +0000
|
||||
Subject: [PATCH 069/157] [Backport][SME] aarch64: Make more use of sve_type in
|
||||
ACLE code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd
|
||||
|
||||
This patch makes some functions operate on sve_type, rather than just
|
||||
on type suffixes. It also allows an overload to be resolved based on
|
||||
a mode and sve_type. In this case the sve_type is used to derive the
|
||||
group size as well as a type suffix.
|
||||
|
||||
This is needed for the SME2 intrinsics and the new tuple forms of
|
||||
svreinterpret. No functional change intended on its own.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.h
|
||||
(function_resolver::lookup_form): Add an overload that takes
|
||||
an sve_type rather than type and group suffixes.
|
||||
(function_resolver::resolve_to): Likewise.
|
||||
(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
|
||||
(function_resolver::infer_tuple_type): Likewise.
|
||||
(function_resolver::require_matching_vector_type): Take an sve_type
|
||||
rather than a type_suffix_index.
|
||||
(function_resolver::require_derived_vector_type): Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group):
|
||||
New function.
|
||||
(function_resolver::lookup_form): Add an overload that takes
|
||||
an sve_type rather than type and group suffixes.
|
||||
(function_resolver::resolve_to): Likewise.
|
||||
(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
|
||||
(function_resolver::infer_tuple_type): Likewise.
|
||||
(function_resolver::infer_vector_type): Update accordingly.
|
||||
(function_resolver::require_matching_vector_type): Take an sve_type
|
||||
rather than a type_suffix_index.
|
||||
(function_resolver::require_derived_vector_type): Likewise.
|
||||
* config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve)
|
||||
(set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update
|
||||
calls accordingly.
|
||||
---
|
||||
.../aarch64/aarch64-sve-builtins-shapes.cc | 16 +--
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 111 +++++++++++++-----
|
||||
gcc/config/aarch64/aarch64-sve-builtins.h | 12 +-
|
||||
3 files changed, 95 insertions(+), 44 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
index 40aa418e0..f187b4cb2 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
|
||||
@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0>
|
||||
resolve (function_resolver &r) const OVERRIDE
|
||||
{
|
||||
unsigned int i, nargs;
|
||||
- type_suffix_index type;
|
||||
+ sve_type type;
|
||||
if (!r.check_gp_argument (2, i, nargs)
|
||||
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
|
||||
+ || !(type = r.infer_tuple_type (i))
|
||||
|| !r.require_integer_immediate (i + 1))
|
||||
return error_mark_node;
|
||||
|
||||
@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0>
|
||||
resolve (function_resolver &r) const OVERRIDE
|
||||
{
|
||||
unsigned int i, nargs;
|
||||
- type_suffix_index type;
|
||||
+ sve_type type;
|
||||
if (!r.check_gp_argument (3, i, nargs)
|
||||
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
|
||||
+ || !(type = r.infer_tuple_type (i))
|
||||
|| !r.require_integer_immediate (i + 1)
|
||||
|| !r.require_derived_vector_type (i + 2, i, type))
|
||||
return error_mark_node;
|
||||
@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0>
|
||||
gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
|
||||
|
||||
unsigned int i, nargs;
|
||||
- type_suffix_index type;
|
||||
+ sve_type type;
|
||||
if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs)
|
||||
|| !r.require_pointer_type (i)
|
||||
|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))
|
||||
- || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES))
|
||||
+ || !(type = r.infer_tuple_type (nargs - 1)))
|
||||
return error_mark_node;
|
||||
|
||||
return r.resolve_to (r.mode_suffix_id, type);
|
||||
@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0>
|
||||
resolve (function_resolver &r) const OVERRIDE
|
||||
{
|
||||
unsigned int i, nargs;
|
||||
- type_suffix_index type;
|
||||
+ sve_type type;
|
||||
if (!r.check_gp_argument (2, i, nargs)
|
||||
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
|
||||
+ || !(type = r.infer_tuple_type (i))
|
||||
|| !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
|
||||
return error_mark_node;
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index 1545fd78d..e98274f8a 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type)
|
||||
return NUM_TYPE_SUFFIXES;
|
||||
}
|
||||
|
||||
+/* Return the implicit group suffix for intrinsics that operate on NVECTORS
|
||||
+ vectors. */
|
||||
+static group_suffix_index
|
||||
+num_vectors_to_group (unsigned int nvectors)
|
||||
+{
|
||||
+ switch (nvectors)
|
||||
+ {
|
||||
+ case 1: return GROUP_none;
|
||||
+ case 2: return GROUP_x2;
|
||||
+ case 3: return GROUP_x3;
|
||||
+ case 4: return GROUP_x4;
|
||||
+ }
|
||||
+ gcc_unreachable ();
|
||||
+}
|
||||
+
|
||||
/* Return the vector type associated with TYPE. */
|
||||
static tree
|
||||
get_vector_type (sve_type type)
|
||||
@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode,
|
||||
return rfn ? rfn->decl : NULL_TREE;
|
||||
}
|
||||
|
||||
+/* Silently check whether there is an instance of the function that has the
|
||||
+ mode suffix given by MODE and the type and group suffixes implied by TYPE.
|
||||
+ If the overloaded function has an explicit first type suffix (like
|
||||
+ conversions do), TYPE describes the implicit second type suffix.
|
||||
+ Otherwise, TYPE describes the only type suffix.
|
||||
+
|
||||
+ Return the decl of the function if it exists, otherwise return null. */
|
||||
+tree
|
||||
+function_resolver::lookup_form (mode_suffix_index mode, sve_type type)
|
||||
+{
|
||||
+ type_suffix_index type0 = type_suffix_ids[0];
|
||||
+ type_suffix_index type1 = type_suffix_ids[1];
|
||||
+ (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type;
|
||||
+
|
||||
+ group_suffix_index group = group_suffix_id;
|
||||
+ if (group == GROUP_none && type.num_vectors != vectors_per_tuple ())
|
||||
+ group = num_vectors_to_group (type.num_vectors);
|
||||
+
|
||||
+ return lookup_form (mode, type0, type1, group);
|
||||
+}
|
||||
+
|
||||
/* Resolve the function to one with the mode suffix given by MODE, the
|
||||
type suffixes given by TYPE0 and TYPE1, and group suffix given by
|
||||
GROUP. Return its function decl on success, otherwise report an
|
||||
@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode,
|
||||
return res;
|
||||
}
|
||||
|
||||
+/* Resolve the function to one that has the suffixes associated with MODE
|
||||
+ and TYPE; see lookup_form for how TYPE is interpreted. Return the
|
||||
+ function decl on success, otherwise report an error and return
|
||||
+ error_mark_node. */
|
||||
+tree
|
||||
+function_resolver::resolve_to (mode_suffix_index mode, sve_type type)
|
||||
+{
|
||||
+ if (tree res = lookup_form (mode, type))
|
||||
+ return res;
|
||||
+
|
||||
+ return report_no_such_form (type);
|
||||
+}
|
||||
+
|
||||
/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
|
||||
Return the associated type suffix on success, otherwise report an
|
||||
error and return NUM_TYPE_SUFFIXES. */
|
||||
@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno)
|
||||
|
||||
/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
|
||||
vectors; NUM_VECTORS is 1 for the former. Return the associated type
|
||||
- suffix on success, using TYPE_SUFFIX_b for predicates. Report an error
|
||||
- and return NUM_TYPE_SUFFIXES on failure. */
|
||||
-type_suffix_index
|
||||
+ on success. Report an error on failure. */
|
||||
+sve_type
|
||||
function_resolver::infer_vector_or_tuple_type (unsigned int argno,
|
||||
unsigned int num_vectors)
|
||||
{
|
||||
auto type = infer_sve_type (argno);
|
||||
if (!type)
|
||||
- return NUM_TYPE_SUFFIXES;
|
||||
+ return type;
|
||||
|
||||
if (type.num_vectors == num_vectors)
|
||||
- return type.type;
|
||||
+ return type;
|
||||
|
||||
report_incorrect_num_vectors (argno, type, num_vectors);
|
||||
- return NUM_TYPE_SUFFIXES;
|
||||
+ return {};
|
||||
}
|
||||
|
||||
/* Require argument ARGNO to have some form of vector type. Return the
|
||||
@@ -1447,7 +1495,9 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
|
||||
type_suffix_index
|
||||
function_resolver::infer_vector_type (unsigned int argno)
|
||||
{
|
||||
- return infer_vector_or_tuple_type (argno, 1);
|
||||
+ if (auto type = infer_vector_or_tuple_type (argno, 1))
|
||||
+ return type.type;
|
||||
+ return NUM_TYPE_SUFFIXES;
|
||||
}
|
||||
|
||||
/* Like infer_vector_type, but also require the type to be integral. */
|
||||
@@ -1512,10 +1562,9 @@ function_resolver::infer_sd_vector_type (unsigned int argno)
|
||||
|
||||
/* If the function operates on tuples of vectors, require argument ARGNO to be
|
||||
a tuple with the appropriate number of vectors, otherwise require it to be
|
||||
- a single vector. Return the associated type suffix on success, using
|
||||
- TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES
|
||||
+ a single vector. Return the associated type on success. Report an error
|
||||
on failure. */
|
||||
-type_suffix_index
|
||||
+sve_type
|
||||
function_resolver::infer_tuple_type (unsigned int argno)
|
||||
{
|
||||
return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
|
||||
@@ -1567,10 +1616,10 @@ function_resolver::require_vector_type (unsigned int argno,
|
||||
bool
|
||||
function_resolver::require_matching_vector_type (unsigned int argno,
|
||||
unsigned int first_argno,
|
||||
- type_suffix_index type)
|
||||
+ sve_type type)
|
||||
{
|
||||
- type_suffix_index new_type = infer_vector_type (argno);
|
||||
- if (new_type == NUM_TYPE_SUFFIXES)
|
||||
+ sve_type new_type = infer_sve_type (argno);
|
||||
+ if (!new_type)
|
||||
return false;
|
||||
|
||||
if (type != new_type)
|
||||
@@ -1613,15 +1662,13 @@ function_resolver::require_matching_vector_type (unsigned int argno,
|
||||
bool function_resolver::
|
||||
require_derived_vector_type (unsigned int argno,
|
||||
unsigned int first_argno,
|
||||
- type_suffix_index first_type,
|
||||
+ sve_type first_type,
|
||||
type_class_index expected_tclass,
|
||||
unsigned int expected_bits)
|
||||
{
|
||||
/* If the type needs to match FIRST_ARGNO exactly, use the preferred
|
||||
- error message for that case. The VECTOR_TYPE_P test excludes tuple
|
||||
- types, which we handle below instead. */
|
||||
- bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno));
|
||||
- if (both_vectors_p
|
||||
+ error message for that case. */
|
||||
+ if (first_type.num_vectors == 1
|
||||
&& expected_tclass == SAME_TYPE_CLASS
|
||||
&& expected_bits == SAME_SIZE)
|
||||
{
|
||||
@@ -1631,17 +1678,18 @@ require_derived_vector_type (unsigned int argno,
|
||||
}
|
||||
|
||||
/* Use FIRST_TYPE to get the expected type class and element size. */
|
||||
+ auto &first_type_suffix = type_suffixes[first_type.type];
|
||||
type_class_index orig_expected_tclass = expected_tclass;
|
||||
if (expected_tclass == NUM_TYPE_CLASSES)
|
||||
- expected_tclass = type_suffixes[first_type].tclass;
|
||||
+ expected_tclass = first_type_suffix.tclass;
|
||||
|
||||
unsigned int orig_expected_bits = expected_bits;
|
||||
if (expected_bits == SAME_SIZE)
|
||||
- expected_bits = type_suffixes[first_type].element_bits;
|
||||
+ expected_bits = first_type_suffix.element_bits;
|
||||
else if (expected_bits == HALF_SIZE)
|
||||
- expected_bits = type_suffixes[first_type].element_bits / 2;
|
||||
+ expected_bits = first_type_suffix.element_bits / 2;
|
||||
else if (expected_bits == QUARTER_SIZE)
|
||||
- expected_bits = type_suffixes[first_type].element_bits / 4;
|
||||
+ expected_bits = first_type_suffix.element_bits / 4;
|
||||
|
||||
/* If the expected type doesn't depend on FIRST_TYPE at all,
|
||||
just check for the fixed choice of vector type. */
|
||||
@@ -1655,13 +1703,14 @@ require_derived_vector_type (unsigned int argno,
|
||||
|
||||
/* Require the argument to be some form of SVE vector type,
|
||||
without being specific about the type of vector we want. */
|
||||
- type_suffix_index actual_type = infer_vector_type (argno);
|
||||
- if (actual_type == NUM_TYPE_SUFFIXES)
|
||||
+ sve_type actual_type = infer_vector_type (argno);
|
||||
+ if (!actual_type)
|
||||
return false;
|
||||
|
||||
/* Exit now if we got the right type. */
|
||||
- bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass);
|
||||
- bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits);
|
||||
+ auto &actual_type_suffix = type_suffixes[actual_type.type];
|
||||
+ bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass);
|
||||
+ bool size_ok_p = (actual_type_suffix.element_bits == expected_bits);
|
||||
if (tclass_ok_p && size_ok_p)
|
||||
return true;
|
||||
|
||||
@@ -1701,7 +1750,9 @@ require_derived_vector_type (unsigned int argno,
|
||||
|
||||
/* If the arguments have consistent type classes, but a link between
|
||||
the sizes has been broken, try to describe the error in those terms. */
|
||||
- if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE)
|
||||
+ if (first_type.num_vectors == 1
|
||||
+ && tclass_ok_p
|
||||
+ && orig_expected_bits == SAME_SIZE)
|
||||
{
|
||||
if (argno < first_argno)
|
||||
{
|
||||
@@ -1718,11 +1769,11 @@ require_derived_vector_type (unsigned int argno,
|
||||
|
||||
/* Likewise in reverse: look for cases in which the sizes are consistent
|
||||
but a link between the type classes has been broken. */
|
||||
- if (both_vectors_p
|
||||
+ if (first_type.num_vectors == 1
|
||||
&& size_ok_p
|
||||
&& orig_expected_tclass == SAME_TYPE_CLASS
|
||||
- && type_suffixes[first_type].integer_p
|
||||
- && type_suffixes[actual_type].integer_p)
|
||||
+ && first_type_suffix.integer_p
|
||||
+ && actual_type_suffix.integer_p)
|
||||
{
|
||||
if (argno < first_argno)
|
||||
{
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
index f7d6cc084..a7cfff7c1 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
|
||||
@@ -458,28 +458,28 @@ public:
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
group_suffix_index = GROUP_none);
|
||||
+ tree lookup_form (mode_suffix_index, sve_type);
|
||||
tree resolve_to (mode_suffix_index,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
type_suffix_index = NUM_TYPE_SUFFIXES,
|
||||
group_suffix_index = GROUP_none);
|
||||
+ tree resolve_to (mode_suffix_index, sve_type);
|
||||
|
||||
type_suffix_index infer_integer_scalar_type (unsigned int);
|
||||
type_suffix_index infer_pointer_type (unsigned int, bool = false);
|
||||
sve_type infer_sve_type (unsigned int);
|
||||
- type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
|
||||
+ sve_type infer_vector_or_tuple_type (unsigned int, unsigned int);
|
||||
type_suffix_index infer_vector_type (unsigned int);
|
||||
type_suffix_index infer_integer_vector_type (unsigned int);
|
||||
type_suffix_index infer_unsigned_vector_type (unsigned int);
|
||||
type_suffix_index infer_sd_vector_type (unsigned int);
|
||||
- type_suffix_index infer_tuple_type (unsigned int);
|
||||
+ sve_type infer_tuple_type (unsigned int);
|
||||
|
||||
bool require_vector_or_scalar_type (unsigned int);
|
||||
|
||||
bool require_vector_type (unsigned int, vector_type_index);
|
||||
- bool require_matching_vector_type (unsigned int, unsigned int,
|
||||
- type_suffix_index);
|
||||
- bool require_derived_vector_type (unsigned int, unsigned int,
|
||||
- type_suffix_index,
|
||||
+ bool require_matching_vector_type (unsigned int, unsigned int, sve_type);
|
||||
+ bool require_derived_vector_type (unsigned int, unsigned int, sve_type,
|
||||
type_class_index = SAME_TYPE_CLASS,
|
||||
unsigned int = SAME_SIZE);
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
106
0162-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
Normal file
106
0162-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
Normal file
@ -0,0 +1,106 @@
|
||||
From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:22 +0000
|
||||
Subject: [PATCH 070/157] [Backport][SME] aarch64: Tweak error message for
|
||||
(tuple,vector) pairs
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958
|
||||
|
||||
SME2 adds more intrinsics that take a tuple of vectors followed
|
||||
by a single vector, with the two arguments expected to have the
|
||||
same element type. Unlike with the existing svset* intrinsics,
|
||||
the size of the tuple is not fixed by the overloaded function name.
|
||||
|
||||
This patch adds an error message that (hopefully) copes better
|
||||
with that combination.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(function_resolver::require_derived_vector_type): Add a specific
|
||||
error message for the case in which the caller wants a single
|
||||
vector whose element type matches a previous tuyple argument.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected
|
||||
error message.
|
||||
* gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise.
|
||||
* gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-sve-builtins.cc | 13 +++++++++++++
|
||||
.../gcc.target/aarch64/sve/acle/general-c/set_1.c | 4 ++--
|
||||
.../gcc.target/aarch64/sve/acle/general-c/set_3.c | 4 ++--
|
||||
.../gcc.target/aarch64/sve/acle/general-c/set_5.c | 4 ++--
|
||||
4 files changed, 19 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
index e98274f8a..9224916a7 100644
|
||||
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
|
||||
@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno,
|
||||
if (!actual_type)
|
||||
return false;
|
||||
|
||||
+ if (orig_expected_tclass == SAME_TYPE_CLASS
|
||||
+ && orig_expected_bits == SAME_SIZE)
|
||||
+ {
|
||||
+ if (actual_type.type == first_type.type)
|
||||
+ return true;
|
||||
+
|
||||
+ error_at (location, "passing %qT to argument %d of %qE, but"
|
||||
+ " argument %d was a tuple of %qT",
|
||||
+ get_vector_type (actual_type), argno + 1, fndecl,
|
||||
+ first_argno + 1, get_vector_type (first_type.type));
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
/* Exit now if we got the right type. */
|
||||
auto &actual_type_suffix = type_suffixes[actual_type.type];
|
||||
bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass);
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
|
||||
index f07c76102..f2a6da536 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
|
||||
@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x)
|
||||
u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
|
||||
u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
|
||||
u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */
|
||||
- u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
|
||||
- u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
|
||||
+ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
|
||||
+ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
|
||||
u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */
|
||||
u8x2 = svset2 (u8x2, 0, u8);
|
||||
f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
|
||||
index 543a1bea8..92b955f83 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
|
||||
@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4,
|
||||
f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
|
||||
f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
|
||||
f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */
|
||||
- f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
|
||||
- f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
|
||||
+ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
|
||||
+ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
|
||||
f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */
|
||||
f16x3 = svset3 (f16x3, 0, f16);
|
||||
f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
|
||||
index be911a731..f0696fb07 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
|
||||
@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x)
|
||||
s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
|
||||
s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
|
||||
s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */
|
||||
- s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
|
||||
- s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
|
||||
+ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
|
||||
+ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
|
||||
s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */
|
||||
s32x4 = svset4 (s32x4, 0, s32);
|
||||
f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1236
0163-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
Normal file
1236
0163-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,90 @@
|
||||
From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Mon, 27 Nov 2023 13:38:16 +0000
|
||||
Subject: [PATCH 072/157] [Backport][SME] attribs: Use existing traits for
|
||||
excl_hash_traits
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31
|
||||
|
||||
excl_hash_traits can be defined more simply by reusing existing traits.
|
||||
|
||||
gcc/
|
||||
* attribs.cc (excl_hash_traits): Delete.
|
||||
(test_attribute_exclusions): Use pair_hash and nofree_string_hash
|
||||
instead.
|
||||
---
|
||||
gcc/attribs.cc | 45 +++------------------------------------------
|
||||
1 file changed, 3 insertions(+), 42 deletions(-)
|
||||
|
||||
diff --git a/gcc/attribs.cc b/gcc/attribs.cc
|
||||
index b219f8780..16d05b1da 100644
|
||||
--- a/gcc/attribs.cc
|
||||
+++ b/gcc/attribs.cc
|
||||
@@ -2555,47 +2555,6 @@ namespace selftest
|
||||
|
||||
typedef std::pair<const char *, const char *> excl_pair;
|
||||
|
||||
-struct excl_hash_traits: typed_noop_remove<excl_pair>
|
||||
-{
|
||||
- typedef excl_pair value_type;
|
||||
- typedef value_type compare_type;
|
||||
-
|
||||
- static hashval_t hash (const value_type &x)
|
||||
- {
|
||||
- hashval_t h1 = htab_hash_string (x.first);
|
||||
- hashval_t h2 = htab_hash_string (x.second);
|
||||
- return h1 ^ h2;
|
||||
- }
|
||||
-
|
||||
- static bool equal (const value_type &x, const value_type &y)
|
||||
- {
|
||||
- return !strcmp (x.first, y.first) && !strcmp (x.second, y.second);
|
||||
- }
|
||||
-
|
||||
- static void mark_deleted (value_type &x)
|
||||
- {
|
||||
- x = value_type (NULL, NULL);
|
||||
- }
|
||||
-
|
||||
- static const bool empty_zero_p = false;
|
||||
-
|
||||
- static void mark_empty (value_type &x)
|
||||
- {
|
||||
- x = value_type ("", "");
|
||||
- }
|
||||
-
|
||||
- static bool is_deleted (const value_type &x)
|
||||
- {
|
||||
- return !x.first && !x.second;
|
||||
- }
|
||||
-
|
||||
- static bool is_empty (const value_type &x)
|
||||
- {
|
||||
- return !*x.first && !*x.second;
|
||||
- }
|
||||
-};
|
||||
-
|
||||
-
|
||||
/* Self-test to verify that each attribute exclusion is symmetric,
|
||||
meaning that if attribute A is encoded as incompatible with
|
||||
attribute B then the opposite relationship is also encoded.
|
||||
@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove<excl_pair>
|
||||
static void
|
||||
test_attribute_exclusions ()
|
||||
{
|
||||
+ using excl_hash_traits = pair_hash<nofree_string_hash, nofree_string_hash>;
|
||||
+
|
||||
/* Iterate over the array of attribute tables first (with TI0 as
|
||||
the index) and over the array of attribute_spec in each table
|
||||
(with SI0 as the index). */
|
||||
const size_t ntables = ARRAY_SIZE (attribute_tables);
|
||||
|
||||
/* Set of pairs of mutually exclusive attributes. */
|
||||
- typedef hash_set<excl_pair, false, excl_hash_traits> exclusion_set;
|
||||
+ typedef hash_set<excl_hash_traits> exclusion_set;
|
||||
exclusion_set excl_set;
|
||||
|
||||
for (size_t ti0 = 0; ti0 != ntables; ++ti0)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2369
0165-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
Normal file
2369
0165-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
Normal file
File diff suppressed because it is too large
Load Diff
64
0166-Backport-SME-aarch64-Fix-plugin-header-install.patch
Normal file
64
0166-Backport-SME-aarch64-Fix-plugin-header-install.patch
Normal file
@ -0,0 +1,64 @@
|
||||
From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001
|
||||
From: Jakub Jelinek <jakub@redhat.com>
|
||||
Date: Thu, 22 Dec 2022 11:15:47 +0100
|
||||
Subject: [PATCH 074/157] [Backport][SME] aarch64: Fix plugin header install
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d
|
||||
|
||||
The r13-2943-g11a113d501ff64 made aarch64.h include
|
||||
aarch64-option-extensions.def, but that file isn't installed
|
||||
for building plugins.
|
||||
|
||||
On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote:
|
||||
> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def)
|
||||
> be in TM_H instead? The first two OPTIONS_H_EXTRA entries seem to be
|
||||
> for aarch64-opt.h (included via aarch64.opt).
|
||||
>
|
||||
> I guess TM_H should also have aarch64-arches.def, since it's included
|
||||
> for aarch64_feature.
|
||||
|
||||
gcc/Makefile.in has
|
||||
TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H)
|
||||
and
|
||||
OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA)
|
||||
which means that adding something into TM_H when it is already in
|
||||
OPTIONS_H_EXTRA is a unnecessary.
|
||||
It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h)
|
||||
and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def
|
||||
(included by aarch64.h) aren't needed for options.h, so I think the
|
||||
right patch would be following.
|
||||
|
||||
2022-12-22 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def,
|
||||
add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and
|
||||
aarch64-option-extensions.def.
|
||||
(OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor
|
||||
aarch64-tuning-flags.def.
|
||||
---
|
||||
gcc/config/aarch64/t-aarch64 | 8 ++++----
|
||||
1 file changed, 4 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
|
||||
index ba74abc0a..6a21a248f 100644
|
||||
--- a/gcc/config/aarch64/t-aarch64
|
||||
+++ b/gcc/config/aarch64/t-aarch64
|
||||
@@ -18,11 +18,11 @@
|
||||
# along with GCC; see the file COPYING3. If not see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
|
||||
-TM_H += $(srcdir)/config/aarch64/aarch64-cores.def
|
||||
+TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
|
||||
+ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \
|
||||
+ $(srcdir)/config/aarch64/aarch64-option-extensions.def
|
||||
OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \
|
||||
- $(srcdir)/config/aarch64/aarch64-arches.def \
|
||||
- $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
|
||||
- $(srcdir)/config/aarch64/aarch64-tuning-flags.def
|
||||
+ $(srcdir)/config/aarch64/aarch64-arches.def
|
||||
|
||||
$(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true
|
||||
s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1178
0167-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
Normal file
1178
0167-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
Normal file
File diff suppressed because it is too large
Load Diff
330
0168-Backport-SME-aarch64-Add-sme.patch
Normal file
330
0168-Backport-SME-aarch64-Add-sme.patch
Normal file
@ -0,0 +1,330 @@
|
||||
From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 5 Dec 2023 10:11:23 +0000
|
||||
Subject: [PATCH 076/157] [Backport][SME] aarch64: Add +sme
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c
|
||||
|
||||
This patch adds the +sme ISA feature and requires it to be present
|
||||
when compiling arm_streaming code. (arm_streaming_compatible code
|
||||
does not necessarily assume the presence of SME. It just has to
|
||||
work when SME is present and streaming mode is enabled.)
|
||||
|
||||
gcc/
|
||||
* doc/invoke.texi: Document SME.
|
||||
* doc/sourcebuild.texi: Document aarch64_sve.
|
||||
* config/aarch64/aarch64-option-extensions.def (sme): Define.
|
||||
* config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro.
|
||||
(TARGET_SME): Likewise.
|
||||
* config/aarch64/aarch64.cc (aarch64_override_options_internal):
|
||||
Ensure that SME is present when compiling streaming code.
|
||||
|
||||
gcc/testsuite/
|
||||
* lib/target-supports.exp (check_effective_target_aarch64_sme): New
|
||||
target test.
|
||||
* gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled
|
||||
if it isn't by default.
|
||||
* g++.target/aarch64/sme/aarch64-sme.exp: Likewise.
|
||||
* gcc.target/aarch64/sme/streaming_mode_3.c: New test.
|
||||
---
|
||||
.../aarch64/aarch64-option-extensions.def | 2 +
|
||||
gcc/config/aarch64/aarch64.cc | 33 ++++++++++
|
||||
gcc/config/aarch64/aarch64.h | 5 ++
|
||||
gcc/doc/invoke.texi | 2 +
|
||||
gcc/doc/sourcebuild.texi | 2 +
|
||||
.../g++.target/aarch64/sme/aarch64-sme.exp | 10 ++-
|
||||
.../gcc.target/aarch64/sme/aarch64-sme.exp | 10 ++-
|
||||
.../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++
|
||||
.../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++
|
||||
gcc/testsuite/lib/target-supports.exp | 12 ++++
|
||||
10 files changed, 157 insertions(+), 4 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
index bdf4baf30..faee64a79 100644
|
||||
--- a/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
|
||||
@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
|
||||
|
||||
AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
|
||||
|
||||
+AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
|
||||
+
|
||||
#undef AARCH64_OPT_EXTENSION
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 904166b21..8f8395201 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Implement TARGET_START_CALL_ARGS. */
|
||||
+
|
||||
+static void
|
||||
+aarch64_start_call_args (cumulative_args_t ca_v)
|
||||
+{
|
||||
+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
|
||||
+
|
||||
+ if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON))
|
||||
+ {
|
||||
+ error ("calling a streaming function requires the ISA extension %qs",
|
||||
+ "sme");
|
||||
+ inform (input_location, "you can enable %qs using the command-line"
|
||||
+ " option %<-march%>, or by using the %<target%>"
|
||||
+ " attribute or pragma", "sme");
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* This function is used by the call expanders of the machine description.
|
||||
RESULT is the register in which the result is returned. It's NULL for
|
||||
"call" and "sibcall".
|
||||
@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||
&& !fixed_regs[R18_REGNUM])
|
||||
error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
|
||||
|
||||
+ if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
|
||||
+ && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
|
||||
+ {
|
||||
+ error ("streaming functions require the ISA extension %qs", "sme");
|
||||
+ inform (input_location, "you can enable %qs using the command-line"
|
||||
+ " option %<-march%>, or by using the %<target%>"
|
||||
+ " attribute or pragma", "sme");
|
||||
+ opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
|
||||
+ auto new_flags = (opts->x_aarch64_asm_isa_flags
|
||||
+ | feature_deps::SME ().enable);
|
||||
+ aarch64_set_asm_isa_flags (opts, new_flags);
|
||||
+ }
|
||||
+
|
||||
initialize_aarch64_code_model (opts);
|
||||
initialize_aarch64_tls_size (opts);
|
||||
|
||||
@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode ()
|
||||
#undef TARGET_FUNCTION_VALUE_REGNO_P
|
||||
#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
|
||||
|
||||
+#undef TARGET_START_CALL_ARGS
|
||||
+#define TARGET_START_CALL_ARGS aarch64_start_call_args
|
||||
+
|
||||
#undef TARGET_GIMPLE_FOLD_BUILTIN
|
||||
#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 84215c8c3..dd2de4e88 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
||||
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
|
||||
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
|
||||
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
|
||||
+#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME)
|
||||
#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A)
|
||||
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
|
||||
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
|
||||
@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
|
||||
/* SVE2 SM4 instructions, enabled through +sve2-sm4. */
|
||||
#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
|
||||
|
||||
+/* SME instructions, enabled through +sme. Note that this does not
|
||||
+ imply anything about the state of PSTATE.SM. */
|
||||
+#define TARGET_SME (AARCH64_ISA_SME)
|
||||
+
|
||||
/* ARMv8.3-A features. */
|
||||
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
|
||||
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 53709b246..2420b05d9 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy},
|
||||
Enable the Flag Manipulation instructions Extension.
|
||||
@item pauth
|
||||
Enable the Pointer Authentication Extension.
|
||||
+@item sme
|
||||
+Enable the Scalable Matrix Extension.
|
||||
|
||||
@end table
|
||||
|
||||
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
|
||||
index 454fae11a..80936a0eb 100644
|
||||
--- a/gcc/doc/sourcebuild.texi
|
||||
+++ b/gcc/doc/sourcebuild.texi
|
||||
@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian.
|
||||
@item aarch64_small_fpic
|
||||
Binutils installed on test system supports relocation types required by -fpic
|
||||
for AArch64 small memory model.
|
||||
+@item aarch64_sme
|
||||
+AArch64 target that generates instructions for SME.
|
||||
@item aarch64_sve_hw
|
||||
AArch64 target that is able to generate and execute SVE code (regardless of
|
||||
whether it does so by default).
|
||||
diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
|
||||
index 72fcd0bd9..1c3e69cde 100644
|
||||
--- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
|
||||
+++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
|
||||
@@ -30,10 +30,16 @@ load_lib g++-dg.exp
|
||||
# Initialize `dg'.
|
||||
dg-init
|
||||
|
||||
-aarch64-with-arch-dg-options "" {
|
||||
+if { [check_effective_target_aarch64_sme] } {
|
||||
+ set sme_flags ""
|
||||
+} else {
|
||||
+ set sme_flags "-march=armv9-a+sme"
|
||||
+}
|
||||
+
|
||||
+aarch64-with-arch-dg-options $sme_flags {
|
||||
# Main loop.
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
|
||||
- "" ""
|
||||
+ "" $sme_flags
|
||||
}
|
||||
|
||||
# All done.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
|
||||
index c990e5924..011310e80 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
|
||||
@@ -30,10 +30,16 @@ load_lib gcc-dg.exp
|
||||
# Initialize `dg'.
|
||||
dg-init
|
||||
|
||||
-aarch64-with-arch-dg-options "" {
|
||||
+if { [check_effective_target_aarch64_sme] } {
|
||||
+ set sme_flags ""
|
||||
+} else {
|
||||
+ set sme_flags "-march=armv9-a+sme"
|
||||
+}
|
||||
+
|
||||
+aarch64-with-arch-dg-options $sme_flags {
|
||||
# Main loop.
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
|
||||
- "" ""
|
||||
+ "" $sme_flags
|
||||
}
|
||||
|
||||
# All done.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
|
||||
new file mode 100644
|
||||
index 000000000..45ec92321
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
|
||||
@@ -0,0 +1,63 @@
|
||||
+// { dg-options "" }
|
||||
+
|
||||
+#pragma GCC target "+nosme"
|
||||
+
|
||||
+void sc_a () [[arm::streaming_compatible]] {}
|
||||
+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+void ns_a () {}
|
||||
+
|
||||
+void sc_b () [[arm::streaming_compatible]] {}
|
||||
+void ns_b () {}
|
||||
+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+
|
||||
+void sc_c () [[arm::streaming_compatible]] {}
|
||||
+void sc_d () [[arm::streaming_compatible]] {}
|
||||
+
|
||||
+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+
|
||||
+void ns_c () {}
|
||||
+void ns_d () {}
|
||||
+
|
||||
+void sc_e () [[arm::streaming_compatible]];
|
||||
+void s_e () [[arm::streaming]];
|
||||
+void ns_e ();
|
||||
+
|
||||
+#pragma GCC target "+sme"
|
||||
+
|
||||
+void sc_f () [[arm::streaming_compatible]] {}
|
||||
+void s_f () [[arm::streaming]] {}
|
||||
+void ns_f () {}
|
||||
+
|
||||
+void sc_g () [[arm::streaming_compatible]] {}
|
||||
+void ns_g () {}
|
||||
+void s_g () [[arm::streaming]] {}
|
||||
+
|
||||
+void sc_h () [[arm::streaming_compatible]] {}
|
||||
+void sc_i () [[arm::streaming_compatible]] {}
|
||||
+
|
||||
+void s_h () [[arm::streaming]] {}
|
||||
+void s_i () [[arm::streaming]] {}
|
||||
+
|
||||
+void ns_h () {}
|
||||
+void ns_i () {}
|
||||
+
|
||||
+void sc_j () [[arm::streaming_compatible]];
|
||||
+void s_j () [[arm::streaming]];
|
||||
+void ns_j ();
|
||||
+
|
||||
+#pragma GCC target "+sme"
|
||||
+
|
||||
+void sc_k () [[arm::streaming_compatible]] {}
|
||||
+
|
||||
+#pragma GCC target "+nosme"
|
||||
+#pragma GCC target "+sme"
|
||||
+
|
||||
+void s_k () [[arm::streaming]] {}
|
||||
+
|
||||
+#pragma GCC target "+nosme"
|
||||
+#pragma GCC target "+sme"
|
||||
+
|
||||
+void ns_k () {}
|
||||
+
|
||||
+#pragma GCC target "+nosme"
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
|
||||
new file mode 100644
|
||||
index 000000000..50e92f2e1
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
|
||||
@@ -0,0 +1,22 @@
|
||||
+// { dg-options "-mgeneral-regs-only" }
|
||||
+
|
||||
+void sc_a () [[arm::streaming_compatible]] {}
|
||||
+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+void ns_a () {}
|
||||
+
|
||||
+void sc_b () [[arm::streaming_compatible]] {}
|
||||
+void ns_b () {}
|
||||
+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+
|
||||
+void sc_c () [[arm::streaming_compatible]] {}
|
||||
+void sc_d () [[arm::streaming_compatible]] {}
|
||||
+
|
||||
+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
|
||||
+
|
||||
+void ns_c () {}
|
||||
+void ns_d () {}
|
||||
+
|
||||
+void sc_e () [[arm::streaming_compatible]];
|
||||
+void s_e () [[arm::streaming]];
|
||||
+void ns_e ();
|
||||
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||
index bd89d4f52..e2a9ef5fa 100644
|
||||
--- a/gcc/testsuite/lib/target-supports.exp
|
||||
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||
@@ -3887,6 +3887,18 @@ proc aarch64_sve_bits { } {
|
||||
}]
|
||||
}
|
||||
|
||||
+# Return 1 if this is an AArch64 target that generates instructions for SME.
|
||||
+proc check_effective_target_aarch64_sme { } {
|
||||
+ if { ![istarget aarch64*-*-*] } {
|
||||
+ return 0
|
||||
+ }
|
||||
+ return [check_no_compiler_messages aarch64_sme assembly {
|
||||
+ #if !defined (__ARM_FEATURE_SME)
|
||||
+ #error FOO
|
||||
+ #endif
|
||||
+ }]
|
||||
+}
|
||||
+
|
||||
# Return 1 if this is a compiler supporting ARC atomic operations
|
||||
proc check_effective_target_arc_atomic { } {
|
||||
return [check_no_compiler_messages arc_atomic assembly {
|
||||
--
|
||||
2.33.0
|
||||
|
||||
168
0169-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
Normal file
168
0169-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
Normal file
@ -0,0 +1,168 @@
|
||||
From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001
|
||||
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
|
||||
Date: Thu, 1 Jun 2023 09:37:06 +0100
|
||||
Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r
|
||||
alternatives to 64-bit vector move patterns
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
|
||||
|
||||
We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
|
||||
to the mov patterns. This straightforward patch does that and for the pair variants too.
|
||||
For the testcase in the code we now generate the optimal assembly without any superfluous
|
||||
GP<->SIMD moves.
|
||||
|
||||
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
|
||||
Add =r,m and =r,m alternatives.
|
||||
(load_pair<DREG:mode><DREG2:mode>): Likewise.
|
||||
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/aarch64/xreg-vec-modes_1.c: New test.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++--------
|
||||
.../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++
|
||||
2 files changed, 65 insertions(+), 17 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index 2d688edf5..b5c52ba16 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -116,26 +116,28 @@
|
||||
|
||||
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
|
||||
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
|
||||
- "=w, m, m, w, ?r, ?w, ?r, w, w")
|
||||
+ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
|
||||
(match_operand:VDMOV 1 "general_operand"
|
||||
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
|
||||
+ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
|
||||
"TARGET_FLOAT
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||||
"@
|
||||
ldr\t%d0, %1
|
||||
+ ldr\t%x0, %1
|
||||
str\txzr, %0
|
||||
str\t%d1, %0
|
||||
+ str\t%x1, %0
|
||||
* return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
|
||||
* return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
|
||||
fmov\t%d0, %1
|
||||
mov\t%0, %1
|
||||
* return aarch64_output_simd_mov_immediate (operands[1], 64);
|
||||
fmov\t%d0, xzr"
|
||||
- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
|
||||
- neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
||||
+ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
|
||||
+ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
||||
mov_reg, neon_move<q>, f_mcr")
|
||||
- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
|
||||
+ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
|
||||
@@ -177,31 +179,35 @@
|
||||
)
|
||||
|
||||
(define_insn "load_pair<DREG:mode><DREG2:mode>"
|
||||
- [(set (match_operand:DREG 0 "register_operand" "=w")
|
||||
- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
|
||||
- (set (match_operand:DREG2 2 "register_operand" "=w")
|
||||
- (match_operand:DREG2 3 "memory_operand" "m"))]
|
||||
+ [(set (match_operand:DREG 0 "register_operand" "=w,r")
|
||||
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
|
||||
+ (set (match_operand:DREG2 2 "register_operand" "=w,r")
|
||||
+ (match_operand:DREG2 3 "memory_operand" "m,m"))]
|
||||
"TARGET_FLOAT
|
||||
&& rtx_equal_p (XEXP (operands[3], 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (operands[1], 0),
|
||||
GET_MODE_SIZE (<DREG:MODE>mode)))"
|
||||
- "ldp\\t%d0, %d2, %z1"
|
||||
- [(set_attr "type" "neon_ldp")]
|
||||
+ "@
|
||||
+ ldp\t%d0, %d2, %z1
|
||||
+ ldp\t%x0, %x2, %z1"
|
||||
+ [(set_attr "type" "neon_ldp,load_16")]
|
||||
)
|
||||
|
||||
(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
|
||||
- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
|
||||
- (match_operand:DREG 1 "register_operand" "w"))
|
||||
- (set (match_operand:DREG2 2 "memory_operand" "=m")
|
||||
- (match_operand:DREG2 3 "register_operand" "w"))]
|
||||
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
|
||||
+ (match_operand:DREG 1 "register_operand" "w,r"))
|
||||
+ (set (match_operand:DREG2 2 "memory_operand" "=m,m")
|
||||
+ (match_operand:DREG2 3 "register_operand" "w,r"))]
|
||||
"TARGET_FLOAT
|
||||
&& rtx_equal_p (XEXP (operands[2], 0),
|
||||
plus_constant (Pmode,
|
||||
XEXP (operands[0], 0),
|
||||
GET_MODE_SIZE (<DREG:MODE>mode)))"
|
||||
- "stp\\t%d1, %d3, %z0"
|
||||
- [(set_attr "type" "neon_stp")]
|
||||
+ "@
|
||||
+ stp\t%d1, %d3, %z0
|
||||
+ stp\t%x1, %x3, %z0"
|
||||
+ [(set_attr "type" "neon_stp,store_16")]
|
||||
)
|
||||
|
||||
(define_insn "load_pair<VQ:mode><VQ2:mode>"
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
||||
new file mode 100644
|
||||
index 000000000..fc4dcb1ad
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
|
||||
@@ -0,0 +1,42 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" } } */
|
||||
+
|
||||
+typedef unsigned int v2si __attribute__((vector_size (8)));
|
||||
+
|
||||
+#define force_gp(V1) asm volatile ("" \
|
||||
+ : "=r"(V1) \
|
||||
+ : "r"(V1) \
|
||||
+ : /* No clobbers */);
|
||||
+
|
||||
+/*
|
||||
+** foo:
|
||||
+** ldr (x[0-9]+), \[x1\]
|
||||
+** str \1, \[x0\]
|
||||
+** ret
|
||||
+*/
|
||||
+
|
||||
+void
|
||||
+foo (v2si *a, v2si *b)
|
||||
+{
|
||||
+ v2si tmp = *b;
|
||||
+ force_gp (tmp);
|
||||
+ *a = tmp;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** foo2:
|
||||
+** ldp (x[0-9]+), (x[0-9]+), \[x0\]
|
||||
+** stp \1, \2, \[x1\]
|
||||
+** ret
|
||||
+*/
|
||||
+void
|
||||
+foo2 (v2si *a, v2si *b)
|
||||
+{
|
||||
+ v2si t1 = *a;
|
||||
+ v2si t2 = a[1];
|
||||
+ force_gp (t1);
|
||||
+ force_gp (t2);
|
||||
+ *b = t1;
|
||||
+ b[1] = t2;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
167
0170-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
Normal file
167
0170-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
Normal file
@ -0,0 +1,167 @@
|
||||
From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001
|
||||
From: Tamar Christina <tamar.christina@arm.com>
|
||||
Date: Wed, 18 Oct 2023 09:34:01 +0100
|
||||
Subject: [PATCH 078/157] [Backport][SME] AArch64: Rewrite simd move immediate
|
||||
patterns to new syntax
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa
|
||||
|
||||
This rewrites the simd MOV patterns to use the new compact syntax.
|
||||
No change in semantics is expected. This will be needed in follow on patches.
|
||||
|
||||
This also merges the splits into the define_insn which will also be needed soon.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
PR tree-optimization/109154
|
||||
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
|
||||
Rewrite to new syntax.
|
||||
(*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
|
||||
splits.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 116 ++++++++++++-----------------
|
||||
1 file changed, 47 insertions(+), 69 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index b5c52ba16..1f4b30642 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -115,54 +115,59 @@
|
||||
)
|
||||
|
||||
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
|
||||
- [(set (match_operand:VDMOV 0 "nonimmediate_operand"
|
||||
- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
|
||||
- (match_operand:VDMOV 1 "general_operand"
|
||||
- "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
|
||||
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand")
|
||||
+ (match_operand:VDMOV 1 "general_operand"))]
|
||||
"TARGET_FLOAT
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||||
- "@
|
||||
- ldr\t%d0, %1
|
||||
- ldr\t%x0, %1
|
||||
- str\txzr, %0
|
||||
- str\t%d1, %0
|
||||
- str\t%x1, %0
|
||||
- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
|
||||
- * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
|
||||
- fmov\t%d0, %1
|
||||
- mov\t%0, %1
|
||||
- * return aarch64_output_simd_mov_immediate (operands[1], 64);
|
||||
- fmov\t%d0, xzr"
|
||||
- [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
|
||||
- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
|
||||
- mov_reg, neon_move<q>, f_mcr")
|
||||
- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
|
||||
-)
|
||||
-
|
||||
-(define_insn "*aarch64_simd_mov<VQMOV:mode>"
|
||||
- [(set (match_operand:VQMOV 0 "nonimmediate_operand"
|
||||
- "=w, Umn, m, w, ?r, ?w, ?r, w, w")
|
||||
- (match_operand:VQMOV 1 "general_operand"
|
||||
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
|
||||
+ {@ [cons: =0, 1; attrs: type, arch]
|
||||
+ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
|
||||
+ [r , m ; load_8 , * ] ldr\t%x0, %1
|
||||
+ [m , Dz; store_8 , * ] str\txzr, %0
|
||||
+ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
|
||||
+ [m , r ; store_8 , * ] str\t%x1, %0
|
||||
+ [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
|
||||
+ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
|
||||
+ [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
|
||||
+ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
|
||||
+ [?w, r ; f_mcr , * ] fmov\t%d0, %1
|
||||
+ [?r, r ; mov_reg , * ] mov\t%0, %1
|
||||
+ [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
|
||||
+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr
|
||||
+ }
|
||||
+)
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
|
||||
+ [(set (match_operand:VQMOV 0 "nonimmediate_operand")
|
||||
+ (match_operand:VQMOV 1 "general_operand"))]
|
||||
"TARGET_FLOAT
|
||||
&& (register_operand (operands[0], <MODE>mode)
|
||||
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
|
||||
- "@
|
||||
- ldr\t%q0, %1
|
||||
- stp\txzr, xzr, %0
|
||||
- str\t%q1, %0
|
||||
- mov\t%0.<Vbtype>, %1.<Vbtype>
|
||||
- #
|
||||
- #
|
||||
- #
|
||||
- * return aarch64_output_simd_mov_immediate (operands[1], 128);
|
||||
- fmov\t%d0, xzr"
|
||||
- [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
|
||||
- neon_logic<q>, multiple, multiple,\
|
||||
- multiple, neon_move<q>, fmov")
|
||||
- (set_attr "length" "4,4,4,4,8,8,8,4,4")
|
||||
- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
|
||||
+ {@ [cons: =0, 1; attrs: type, arch, length]
|
||||
+ [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
|
||||
+ [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
|
||||
+ [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
|
||||
+ [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
|
||||
+ [?r , w ; multiple , * , 8] #
|
||||
+ [?w , r ; multiple , * , 8] #
|
||||
+ [?r , r ; multiple , * , 8] #
|
||||
+ [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
|
||||
+ [w , Dz; fmov , * , 4] fmov\t%d0, xzr
|
||||
+ }
|
||||
+ "&& reload_completed
|
||||
+ && (REG_P (operands[0])
|
||||
+ && REG_P (operands[1])
|
||||
+ && !(FP_REGNUM_P (REGNO (operands[0]))
|
||||
+ && FP_REGNUM_P (REGNO (operands[1]))))"
|
||||
+ [(const_int 0)]
|
||||
+ {
|
||||
+ if (GP_REGNUM_P (REGNO (operands[0]))
|
||||
+ && GP_REGNUM_P (REGNO (operands[1])))
|
||||
+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
|
||||
+ else
|
||||
+ aarch64_split_simd_move (operands[0], operands[1]);
|
||||
+ DONE;
|
||||
+ }
|
||||
)
|
||||
|
||||
;; When storing lane zero we can use the normal STR and its more permissive
|
||||
@@ -238,33 +243,6 @@
|
||||
[(set_attr "type" "neon_stp_q")]
|
||||
)
|
||||
|
||||
-
|
||||
-(define_split
|
||||
- [(set (match_operand:VQMOV 0 "register_operand" "")
|
||||
- (match_operand:VQMOV 1 "register_operand" ""))]
|
||||
- "TARGET_FLOAT
|
||||
- && reload_completed
|
||||
- && GP_REGNUM_P (REGNO (operands[0]))
|
||||
- && GP_REGNUM_P (REGNO (operands[1]))"
|
||||
- [(const_int 0)]
|
||||
-{
|
||||
- aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
|
||||
- DONE;
|
||||
-})
|
||||
-
|
||||
-(define_split
|
||||
- [(set (match_operand:VQMOV 0 "register_operand" "")
|
||||
- (match_operand:VQMOV 1 "register_operand" ""))]
|
||||
- "TARGET_FLOAT
|
||||
- && reload_completed
|
||||
- && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
|
||||
- || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
|
||||
- [(const_int 0)]
|
||||
-{
|
||||
- aarch64_split_simd_move (operands[0], operands[1]);
|
||||
- DONE;
|
||||
-})
|
||||
-
|
||||
(define_expand "@aarch64_split_simd_mov<mode>"
|
||||
[(set (match_operand:VQMOV 0)
|
||||
(match_operand:VQMOV 1))]
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001
|
||||
From: Tamar Christina <tamar.christina@arm.com>
|
||||
Date: Tue, 20 Jun 2023 08:54:42 +0100
|
||||
Subject: [PATCH 079/157] [Backport][SME] AArch64: remove test comment from
|
||||
*mov<mode>_aarch64
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a
|
||||
|
||||
I accidentally left a test comment in the final version of the patch.
|
||||
This removes the comment.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64.md (*mov<mode>_aarch64): Drop test comment.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.md | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 29a665e45..1ec23fae8 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -1213,7 +1213,7 @@
|
||||
[m, r Z ; store_4 , * ] str<size>\\t%w1, %0
|
||||
[m, w ; store_4 , * ] str\t%<size>1, %0
|
||||
[r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0]
|
||||
- [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1 /*foo */
|
||||
+ [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1
|
||||
[w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
|
||||
[w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
|
||||
[w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1552
0172-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
Normal file
1552
0172-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
Normal file
File diff suppressed because it is too large
Load Diff
4506
0173-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
Normal file
4506
0173-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
Normal file
File diff suppressed because it is too large
Load Diff
250
0174-Backport-SME-AArch64-Support-new-tbranch-optab.patch
Normal file
250
0174-Backport-SME-AArch64-Support-new-tbranch-optab.patch
Normal file
@ -0,0 +1,250 @@
|
||||
From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001
|
||||
From: Tamar Christina <tamar.christina@arm.com>
|
||||
Date: Mon, 12 Dec 2022 15:18:56 +0000
|
||||
Subject: [PATCH 082/157] [Backport][SME] AArch64: Support new tbranch optab.
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11
|
||||
|
||||
This implements the new tbranch optab for AArch64.
|
||||
|
||||
we cannot emit one big RTL for the final instruction immediately.
|
||||
The reason that all comparisons in the AArch64 backend expand to separate CC
|
||||
compares, and separate testing of the operands is for ifcvt.
|
||||
|
||||
The separate CC compare is needed so ifcvt can produce csel, cset etc from the
|
||||
compares. Unlike say combine, ifcvt can not do recog on a parallel with a
|
||||
clobber. Should we emit the instruction directly then ifcvt will not be able
|
||||
to say, make a csel, because we have no patterns which handle zero_extract and
|
||||
compare. (unlike combine ifcvt cannot transform the extract into an AND).
|
||||
|
||||
While you could provide various patterns for this (and I did try) you end up
|
||||
with broken patterns because you can't add the clobber to the CC register. If
|
||||
you do, ifcvt recog fails.
|
||||
|
||||
i.e.
|
||||
|
||||
int
|
||||
f1 (int x)
|
||||
{
|
||||
if (x & 1)
|
||||
return 1;
|
||||
return x;
|
||||
}
|
||||
|
||||
We lose csel here.
|
||||
|
||||
Secondly the reason the compare with an explicit CC mode is needed is so that
|
||||
ifcvt can transform the operation into a version that doesn't require the flags
|
||||
to be set. But it only does so if it know the explicit usage of the CC reg.
|
||||
|
||||
For instance
|
||||
|
||||
int
|
||||
foo (int a, int b)
|
||||
{
|
||||
return ((a & (1 << 25)) ? 5 : 4);
|
||||
}
|
||||
|
||||
Doesn't require a comparison, the optimal form is:
|
||||
|
||||
foo(int, int):
|
||||
ubfx x0, x0, 25, 1
|
||||
add w0, w0, 4
|
||||
ret
|
||||
|
||||
and no compare is actually needed. If you represent the instruction using an
|
||||
ANDS instead of a zero_extract then you get close, but you end up with an ands
|
||||
followed by an add, which is a slower operation.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
|
||||
(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
|
||||
(tbranch_<code><mode>4): New.
|
||||
* config/aarch64/iterators.md(ZEROM, zerom): New.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/aarch64/tbz_1.c: New test.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.md | 33 ++++++--
|
||||
gcc/config/aarch64/iterators.md | 2 +
|
||||
gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++
|
||||
3 files changed, 122 insertions(+), 8 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 079c8a3f9..2becc888e 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -953,12 +953,29 @@
|
||||
(const_int 1)))]
|
||||
)
|
||||
|
||||
-(define_insn "*tb<optab><mode>1"
|
||||
+(define_expand "tbranch_<code><mode>3"
|
||||
[(set (pc) (if_then_else
|
||||
- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
|
||||
- (const_int 1)
|
||||
- (match_operand 1
|
||||
- "aarch64_simd_shift_imm_<mode>" "n"))
|
||||
+ (EQL (match_operand:ALLI 0 "register_operand")
|
||||
+ (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
|
||||
+ (label_ref (match_operand 2 ""))
|
||||
+ (pc)))]
|
||||
+ ""
|
||||
+{
|
||||
+ rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
|
||||
+ rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
|
||||
+ rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
|
||||
+ emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
|
||||
+ operands[1] = const0_rtx;
|
||||
+ operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
|
||||
+ operands[1]);
|
||||
+})
|
||||
+
|
||||
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
|
||||
+ [(set (pc) (if_then_else
|
||||
+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
|
||||
+ (const_int 1)
|
||||
+ (match_operand 1
|
||||
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
|
||||
(const_int 0))
|
||||
(label_ref (match_operand 2 "" ""))
|
||||
(pc)))
|
||||
@@ -969,15 +986,15 @@
|
||||
{
|
||||
if (get_attr_far_branch (insn) == 1)
|
||||
return aarch64_gen_far_branch (operands, 2, "Ltb",
|
||||
- "<inv_tb>\\t%<w>0, %1, ");
|
||||
+ "<inv_tb>\\t%<ALLI:w>0, %1, ");
|
||||
else
|
||||
{
|
||||
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
|
||||
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
|
||||
+ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
|
||||
}
|
||||
}
|
||||
else
|
||||
- return "<tbz>\t%<w>0, %1, %l2";
|
||||
+ return "<tbz>\t%<ALLI:w>0, %1, %l2";
|
||||
}
|
||||
[(set_attr "type" "branch")
|
||||
(set (attr "length")
|
||||
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
||||
index 226dea48a..b616f5c9a 100644
|
||||
--- a/gcc/config/aarch64/iterators.md
|
||||
+++ b/gcc/config/aarch64/iterators.md
|
||||
@@ -1104,6 +1104,8 @@
|
||||
|
||||
;; Give the number of bits in the mode
|
||||
(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
|
||||
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
|
||||
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
|
||||
|
||||
;; Give the ordinal of the MSB in the mode
|
||||
(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||||
new file mode 100644
|
||||
index 000000000..39deb58e2
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||||
@@ -0,0 +1,95 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
|
||||
+
|
||||
+#include <stdbool.h>
|
||||
+
|
||||
+void h(void);
|
||||
+
|
||||
+/*
|
||||
+** g1:
|
||||
+** tbnz w[0-9]+, #?0, .L([0-9]+)
|
||||
+** ret
|
||||
+** ...
|
||||
+*/
|
||||
+void g1(bool x)
|
||||
+{
|
||||
+ if (__builtin_expect (x, 0))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g2:
|
||||
+** tbz w[0-9]+, #?0, .L([0-9]+)
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g2(bool x)
|
||||
+{
|
||||
+ if (__builtin_expect (x, 1))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g3_ge:
|
||||
+** tbnz w[0-9]+, #?31, .L[0-9]+
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g3_ge(int x)
|
||||
+{
|
||||
+ if (__builtin_expect (x >= 0, 1))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g3_gt:
|
||||
+** cmp w[0-9]+, 0
|
||||
+** ble .L[0-9]+
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g3_gt(int x)
|
||||
+{
|
||||
+ if (__builtin_expect (x > 0, 1))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g3_lt:
|
||||
+** tbz w[0-9]+, #?31, .L[0-9]+
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g3_lt(int x)
|
||||
+{
|
||||
+ if (__builtin_expect (x < 0, 1))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g3_le:
|
||||
+** cmp w[0-9]+, 0
|
||||
+** bgt .L[0-9]+
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g3_le(int x)
|
||||
+{
|
||||
+ if (__builtin_expect (x <= 0, 1))
|
||||
+ h ();
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** g5:
|
||||
+** mov w[0-9]+, 65279
|
||||
+** tst w[0-9]+, w[0-9]+
|
||||
+** beq .L[0-9]+
|
||||
+** b h
|
||||
+** ...
|
||||
+*/
|
||||
+void g5(int x)
|
||||
+{
|
||||
+ if (__builtin_expect (x & 0xfeff, 1))
|
||||
+ h ();
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
381
0175-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
Normal file
381
0175-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
Normal file
@ -0,0 +1,381 @@
|
||||
From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:04 +0100
|
||||
Subject: [PATCH 083/157] [Backport][SME] aarch64: Use local frame vars in
|
||||
shrink-wrapping code
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd
|
||||
|
||||
aarch64_layout_frame uses a shorthand for referring to
|
||||
cfun->machine->frame:
|
||||
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
|
||||
This patch does the same for some other heavy users of the structure.
|
||||
No functional change intended.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_save_callee_saves): Use
|
||||
a local shorthand for cfun->machine->frame.
|
||||
(aarch64_restore_callee_saves, aarch64_get_separate_components):
|
||||
(aarch64_process_components): Likewise.
|
||||
(aarch64_allocate_and_probe_stack_space): Likewise.
|
||||
(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
|
||||
(aarch64_layout_frame): Use existing shorthand for one more case.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 123 ++++++++++++++++++----------------
|
||||
1 file changed, 64 insertions(+), 59 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 08a98f8ba..b7da1d0be 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void)
|
||||
frame.is_scs_enabled
|
||||
= (!crtl->calls_eh_return
|
||||
&& sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
|
||||
- && known_ge (cfun->machine->frame.reg_offset[LR_REGNUM], 0));
|
||||
+ && known_ge (frame.reg_offset[LR_REGNUM], 0));
|
||||
|
||||
/* When shadow call stack is enabled, the scs_pop in the epilogue will
|
||||
restore x30, and we don't need to pop x30 again in the traditional
|
||||
@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
unsigned start, unsigned limit, bool skip_wb,
|
||||
bool hard_fp_valid_p)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
rtx_insn *insn;
|
||||
unsigned regno;
|
||||
unsigned regno2;
|
||||
@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
||||
|
||||
if (skip_wb
|
||||
- && (regno == cfun->machine->frame.wb_push_candidate1
|
||||
- || regno == cfun->machine->frame.wb_push_candidate2))
|
||||
+ && (regno == frame.wb_push_candidate1
|
||||
+ || regno == frame.wb_push_candidate2))
|
||||
continue;
|
||||
|
||||
if (cfun->machine->reg_is_wrapped_separately[regno])
|
||||
@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = start_offset + cfun->machine->frame.reg_offset[regno];
|
||||
+ offset = start_offset + frame.reg_offset[regno];
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
poly_int64 sp_offset = offset;
|
||||
|
||||
@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
{
|
||||
gcc_assert (known_eq (start_offset, 0));
|
||||
poly_int64 fp_offset
|
||||
- = cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
+ = frame.below_hard_fp_saved_regs_size;
|
||||
if (hard_fp_valid_p)
|
||||
base_rtx = hard_frame_pointer_rtx;
|
||||
else
|
||||
@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
&& (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
||||
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||
&& known_eq (GET_MODE_SIZE (mode),
|
||||
- cfun->machine->frame.reg_offset[regno2]
|
||||
- - cfun->machine->frame.reg_offset[regno]))
|
||||
+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
||||
{
|
||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||
rtx mem2;
|
||||
@@ -9472,6 +9472,7 @@ static void
|
||||
aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
|
||||
unsigned limit, bool skip_wb, rtx *cfi_ops)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
unsigned regno;
|
||||
unsigned regno2;
|
||||
poly_int64 offset;
|
||||
@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
|
||||
rtx reg, mem;
|
||||
|
||||
if (skip_wb
|
||||
- && (regno == cfun->machine->frame.wb_pop_candidate1
|
||||
- || regno == cfun->machine->frame.wb_pop_candidate2))
|
||||
+ && (regno == frame.wb_pop_candidate1
|
||||
+ || regno == frame.wb_pop_candidate2))
|
||||
continue;
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = start_offset + cfun->machine->frame.reg_offset[regno];
|
||||
+ offset = start_offset + frame.reg_offset[regno];
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
|
||||
@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
|
||||
&& (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
||||
&& !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||
&& known_eq (GET_MODE_SIZE (mode),
|
||||
- cfun->machine->frame.reg_offset[regno2]
|
||||
- - cfun->machine->frame.reg_offset[regno]))
|
||||
+ frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
||||
{
|
||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||
rtx mem2;
|
||||
@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
|
||||
static sbitmap
|
||||
aarch64_get_separate_components (void)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
|
||||
bitmap_clear (components);
|
||||
|
||||
@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void)
|
||||
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
continue;
|
||||
|
||||
- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
|
||||
+ poly_int64 offset = frame.reg_offset[regno];
|
||||
|
||||
/* If the register is saved in the first SVE save slot, we use
|
||||
it as a stack probe for -fstack-clash-protection. */
|
||||
if (flag_stack_clash_protection
|
||||
- && maybe_ne (cfun->machine->frame.below_hard_fp_saved_regs_size, 0)
|
||||
+ && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
|
||||
&& known_eq (offset, 0))
|
||||
continue;
|
||||
|
||||
/* Get the offset relative to the register we'll use. */
|
||||
if (frame_pointer_needed)
|
||||
- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
+ offset -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
offset += crtl->outgoing_args_size;
|
||||
|
||||
@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void)
|
||||
/* If the spare predicate register used by big-endian SVE code
|
||||
is call-preserved, it must be saved in the main prologue
|
||||
before any saves that use it. */
|
||||
- if (cfun->machine->frame.spare_pred_reg != INVALID_REGNUM)
|
||||
- bitmap_clear_bit (components, cfun->machine->frame.spare_pred_reg);
|
||||
+ if (frame.spare_pred_reg != INVALID_REGNUM)
|
||||
+ bitmap_clear_bit (components, frame.spare_pred_reg);
|
||||
|
||||
- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
|
||||
- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
|
||||
+ unsigned reg1 = frame.wb_push_candidate1;
|
||||
+ unsigned reg2 = frame.wb_push_candidate2;
|
||||
/* If registers have been chosen to be stored/restored with
|
||||
writeback don't interfere with them to avoid having to output explicit
|
||||
stack adjustment instructions. */
|
||||
@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
|
||||
static void
|
||||
aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
|
||||
? HARD_FRAME_POINTER_REGNUM
|
||||
: STACK_POINTER_REGNUM);
|
||||
@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
|
||||
rtx reg = gen_rtx_REG (mode, regno);
|
||||
- poly_int64 offset = cfun->machine->frame.reg_offset[regno];
|
||||
+ poly_int64 offset = frame.reg_offset[regno];
|
||||
if (frame_pointer_needed)
|
||||
- offset -= cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
+ offset -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
offset += crtl->outgoing_args_size;
|
||||
|
||||
@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
break;
|
||||
}
|
||||
|
||||
- poly_int64 offset2 = cfun->machine->frame.reg_offset[regno2];
|
||||
+ poly_int64 offset2 = frame.reg_offset[regno2];
|
||||
/* The next register is not of the same class or its offset is not
|
||||
mergeable with the current one into a pair. */
|
||||
if (aarch64_sve_mode_p (mode)
|
||||
|| !satisfies_constraint_Ump (mem)
|
||||
|| GP_REGNUM_P (regno) != GP_REGNUM_P (regno2)
|
||||
|| (crtl->abi->id () == ARM_PCS_SIMD && FP_REGNUM_P (regno))
|
||||
- || maybe_ne ((offset2 - cfun->machine->frame.reg_offset[regno]),
|
||||
+ || maybe_ne ((offset2 - frame.reg_offset[regno]),
|
||||
GET_MODE_SIZE (mode)))
|
||||
{
|
||||
insn = emit_insn (set);
|
||||
@@ -9840,7 +9842,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
/* REGNO2 can be saved/restored in a pair with REGNO. */
|
||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||
if (frame_pointer_needed)
|
||||
- offset2 -= cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
+ offset2 -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
offset2 += crtl->outgoing_args_size;
|
||||
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
|
||||
@@ -9935,6 +9937,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
bool frame_related_p,
|
||||
bool final_adjustment_p)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
HOST_WIDE_INT guard_size
|
||||
= 1 << param_stack_clash_protection_guard_size;
|
||||
HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
|
||||
@@ -9955,25 +9958,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
register as a probe. We can't assume that LR was saved at position 0
|
||||
though, so treat any space below it as unprobed. */
|
||||
if (final_adjustment_p
|
||||
- && known_eq (cfun->machine->frame.below_hard_fp_saved_regs_size, 0))
|
||||
+ && known_eq (frame.below_hard_fp_saved_regs_size, 0))
|
||||
{
|
||||
- poly_int64 lr_offset = cfun->machine->frame.reg_offset[LR_REGNUM];
|
||||
+ poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
|
||||
if (known_ge (lr_offset, 0))
|
||||
min_probe_threshold -= lr_offset.to_constant ();
|
||||
else
|
||||
gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
|
||||
}
|
||||
|
||||
- poly_int64 frame_size = cfun->machine->frame.frame_size;
|
||||
+ poly_int64 frame_size = frame.frame_size;
|
||||
|
||||
/* We should always have a positive probe threshold. */
|
||||
gcc_assert (min_probe_threshold > 0);
|
||||
|
||||
if (flag_stack_clash_protection && !final_adjustment_p)
|
||||
{
|
||||
- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
|
||||
- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
|
||||
- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
|
||||
+ poly_int64 initial_adjust = frame.initial_adjust;
|
||||
+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
+ poly_int64 final_adjust = frame.final_adjust;
|
||||
|
||||
if (known_eq (frame_size, 0))
|
||||
{
|
||||
@@ -10262,17 +10265,18 @@ aarch64_epilogue_uses (int regno)
|
||||
void
|
||||
aarch64_expand_prologue (void)
|
||||
{
|
||||
- poly_int64 frame_size = cfun->machine->frame.frame_size;
|
||||
- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
|
||||
- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
|
||||
- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
|
||||
- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
|
||||
- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
+ poly_int64 frame_size = frame.frame_size;
|
||||
+ poly_int64 initial_adjust = frame.initial_adjust;
|
||||
+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
|
||||
+ poly_int64 final_adjust = frame.final_adjust;
|
||||
+ poly_int64 callee_offset = frame.callee_offset;
|
||||
+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
poly_int64 below_hard_fp_saved_regs_size
|
||||
- = cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
- unsigned reg1 = cfun->machine->frame.wb_push_candidate1;
|
||||
- unsigned reg2 = cfun->machine->frame.wb_push_candidate2;
|
||||
- bool emit_frame_chain = cfun->machine->frame.emit_frame_chain;
|
||||
+ = frame.below_hard_fp_saved_regs_size;
|
||||
+ unsigned reg1 = frame.wb_push_candidate1;
|
||||
+ unsigned reg2 = frame.wb_push_candidate2;
|
||||
+ bool emit_frame_chain = frame.emit_frame_chain;
|
||||
rtx_insn *insn;
|
||||
|
||||
if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
|
||||
@@ -10303,7 +10307,7 @@ aarch64_expand_prologue (void)
|
||||
}
|
||||
|
||||
/* Push return address to shadow call stack. */
|
||||
- if (cfun->machine->frame.is_scs_enabled)
|
||||
+ if (frame.is_scs_enabled)
|
||||
emit_insn (gen_scs_push ());
|
||||
|
||||
if (flag_stack_usage_info)
|
||||
@@ -10342,7 +10346,7 @@ aarch64_expand_prologue (void)
|
||||
|
||||
/* The offset of the frame chain record (if any) from the current SP. */
|
||||
poly_int64 chain_offset = (initial_adjust + callee_adjust
|
||||
- - cfun->machine->frame.hard_fp_offset);
|
||||
+ - frame.hard_fp_offset);
|
||||
gcc_assert (known_ge (chain_offset, 0));
|
||||
|
||||
/* The offset of the bottom of the save area from the current SP. */
|
||||
@@ -10445,16 +10449,17 @@ aarch64_use_return_insn_p (void)
|
||||
void
|
||||
aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
{
|
||||
- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
|
||||
- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
|
||||
- poly_int64 final_adjust = cfun->machine->frame.final_adjust;
|
||||
- poly_int64 callee_offset = cfun->machine->frame.callee_offset;
|
||||
- poly_int64 sve_callee_adjust = cfun->machine->frame.sve_callee_adjust;
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
+ poly_int64 initial_adjust = frame.initial_adjust;
|
||||
+ HOST_WIDE_INT callee_adjust = frame.callee_adjust;
|
||||
+ poly_int64 final_adjust = frame.final_adjust;
|
||||
+ poly_int64 callee_offset = frame.callee_offset;
|
||||
+ poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
poly_int64 below_hard_fp_saved_regs_size
|
||||
- = cfun->machine->frame.below_hard_fp_saved_regs_size;
|
||||
- unsigned reg1 = cfun->machine->frame.wb_pop_candidate1;
|
||||
- unsigned reg2 = cfun->machine->frame.wb_pop_candidate2;
|
||||
- unsigned int last_gpr = (cfun->machine->frame.is_scs_enabled
|
||||
+ = frame.below_hard_fp_saved_regs_size;
|
||||
+ unsigned reg1 = frame.wb_pop_candidate1;
|
||||
+ unsigned reg2 = frame.wb_pop_candidate2;
|
||||
+ unsigned int last_gpr = (frame.is_scs_enabled
|
||||
? R29_REGNUM : R30_REGNUM);
|
||||
rtx cfi_ops = NULL;
|
||||
rtx_insn *insn;
|
||||
@@ -10488,7 +10493,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
/* We need to add memory barrier to prevent read from deallocated stack. */
|
||||
bool need_barrier_p
|
||||
= maybe_ne (get_frame_size ()
|
||||
- + cfun->machine->frame.saved_varargs_size, 0);
|
||||
+ + frame.saved_varargs_size, 0);
|
||||
|
||||
/* Emit a barrier to prevent loads from a deallocated stack. */
|
||||
if (maybe_gt (final_adjust, crtl->outgoing_args_size)
|
||||
@@ -10569,7 +10574,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
}
|
||||
|
||||
/* Pop return address from shadow call stack. */
|
||||
- if (cfun->machine->frame.is_scs_enabled)
|
||||
+ if (frame.is_scs_enabled)
|
||||
{
|
||||
machine_mode mode = aarch64_reg_save_mode (R30_REGNUM);
|
||||
rtx reg = gen_rtx_REG (mode, R30_REGNUM);
|
||||
@@ -13023,24 +13028,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
|
||||
poly_int64
|
||||
aarch64_initial_elimination_offset (unsigned from, unsigned to)
|
||||
{
|
||||
+ aarch64_frame &frame = cfun->machine->frame;
|
||||
+
|
||||
if (to == HARD_FRAME_POINTER_REGNUM)
|
||||
{
|
||||
if (from == ARG_POINTER_REGNUM)
|
||||
- return cfun->machine->frame.hard_fp_offset;
|
||||
+ return frame.hard_fp_offset;
|
||||
|
||||
if (from == FRAME_POINTER_REGNUM)
|
||||
- return cfun->machine->frame.hard_fp_offset
|
||||
- - cfun->machine->frame.locals_offset;
|
||||
+ return frame.hard_fp_offset - frame.locals_offset;
|
||||
}
|
||||
|
||||
if (to == STACK_POINTER_REGNUM)
|
||||
{
|
||||
if (from == FRAME_POINTER_REGNUM)
|
||||
- return cfun->machine->frame.frame_size
|
||||
- - cfun->machine->frame.locals_offset;
|
||||
+ return frame.frame_size - frame.locals_offset;
|
||||
}
|
||||
|
||||
- return cfun->machine->frame.frame_size;
|
||||
+ return frame.frame_size;
|
||||
}
|
||||
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
103
0176-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
Normal file
103
0176-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
Normal file
@ -0,0 +1,103 @@
|
||||
From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:04 +0100
|
||||
Subject: [PATCH] [Backport][SME] aarch64: Avoid a use of callee_offset
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b
|
||||
|
||||
When we emit the frame chain, i.e. when we reach Here in this statement
|
||||
of aarch64_expand_prologue:
|
||||
|
||||
if (emit_frame_chain)
|
||||
{
|
||||
// Here
|
||||
...
|
||||
}
|
||||
|
||||
the stack is in one of two states:
|
||||
|
||||
- We've allocated up to the frame chain, but no more.
|
||||
|
||||
- We've allocated the whole frame, and the frame chain is within easy
|
||||
reach of the new SP.
|
||||
|
||||
The offset of the frame chain from the current SP is available
|
||||
in aarch64_frame as callee_offset. It is also available as the
|
||||
chain_offset local variable, where the latter is calculated from other
|
||||
data. (However, chain_offset is not always equal to callee_offset when
|
||||
!emit_frame_chain, so chain_offset isn't redundant.)
|
||||
|
||||
In c600df9a4060da3c6121ff4d0b93f179eafd69d1 I switched to using
|
||||
chain_offset for the initialisation of the hard frame pointer:
|
||||
|
||||
aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
|
||||
- stack_pointer_rtx, callee_offset,
|
||||
+ stack_pointer_rtx, chain_offset,
|
||||
tmp1_rtx, tmp0_rtx, frame_pointer_needed);
|
||||
|
||||
But the later REG_CFA_ADJUST_CFA handling still used callee_offset.
|
||||
|
||||
I think the difference is harmless, but it's more logical for the
|
||||
CFA note to be in sync, and it's more convenient for later patches
|
||||
if it uses chain_offset.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_expand_prologue): Use
|
||||
chain_offset rather than callee_offset.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 4 +---
|
||||
1 file changed, 1 insertion(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index b7da1d0be..fbd7a079a 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno)
|
||||
current FP is also set up if it is in use. */
|
||||
|
||||
void
|
||||
aarch64_expand_prologue (void)
|
||||
{
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
poly_int64 frame_size = frame.frame_size;
|
||||
poly_int64 initial_adjust = frame.initial_adjust;
|
||||
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
|
||||
poly_int64 final_adjust = frame.final_adjust;
|
||||
- poly_int64 callee_offset = frame.callee_offset;
|
||||
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
poly_int64 below_hard_fp_saved_regs_size
|
||||
= frame.below_hard_fp_saved_regs_size;
|
||||
unsigned reg1 = frame.wb_push_candidate1;
|
||||
unsigned reg2 = frame.wb_push_candidate2;
|
||||
bool emit_frame_chain = frame.emit_frame_chain;
|
||||
rtx_insn *insn;
|
||||
|
||||
if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
|
||||
{
|
||||
@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void)
|
||||
the CFA based on the frame pointer. We therefore need new
|
||||
DW_CFA_expressions to re-express the save slots with addresses
|
||||
based on the frame pointer. */
|
||||
rtx_insn *insn = get_last_insn ();
|
||||
gcc_assert (RTX_FRAME_RELATED_P (insn));
|
||||
|
||||
/* Add an explicit CFA definition if this was previously
|
||||
implicit. */
|
||||
if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
|
||||
{
|
||||
- rtx src = plus_constant (Pmode, stack_pointer_rtx,
|
||||
- callee_offset);
|
||||
+ rtx src = plus_constant (Pmode, stack_pointer_rtx, chain_offset);
|
||||
add_reg_note (insn, REG_CFA_ADJUST_CFA,
|
||||
gen_rtx_SET (hard_frame_pointer_rtx, src));
|
||||
}
|
||||
|
||||
/* Change the save slot expressions for the registers that
|
||||
we've already saved. */
|
||||
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2],
|
||||
hard_frame_pointer_rtx, UNITS_PER_WORD);
|
||||
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
|
||||
hard_frame_pointer_rtx, 0);
|
||||
--
|
||||
2.38.1.windows.1
|
||||
|
||||
@ -0,0 +1,51 @@
|
||||
From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:05 +0100
|
||||
Subject: [PATCH 085/157] [Backport][SME] aarch64: Explicitly handle frames
|
||||
with no saved registers
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5
|
||||
|
||||
If a frame has no saved registers, it can be allocated in one go.
|
||||
There is no need to treat the areas below and above the saved
|
||||
registers as separate.
|
||||
|
||||
And if we allocate the frame in one go, it should be allocated
|
||||
as the initial_adjust rather than the final_adjust. This allows the
|
||||
frame size to grow to guard_size - guard_used_by_caller before a stack
|
||||
probe is needed. (A frame with no register saves is necessarily a
|
||||
leaf frame.)
|
||||
|
||||
This is a no-op as thing stand, since a leaf function will have
|
||||
no outgoing arguments, and so all the frame will be above where
|
||||
the saved registers normally go.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Explicitly
|
||||
allocate the frame in one go if there are no saved registers.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index fbd7a079a..c59af6b1c 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void)
|
||||
|
||||
HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
|
||||
HOST_WIDE_INT const_saved_regs_size;
|
||||
- if (frame.frame_size.is_constant (&const_size)
|
||||
- && const_size < max_push_offset
|
||||
- && known_eq (frame.hard_fp_offset, const_size))
|
||||
+ if (known_eq (frame.saved_regs_size, 0))
|
||||
+ frame.initial_adjust = frame.frame_size;
|
||||
+ else if (frame.frame_size.is_constant (&const_size)
|
||||
+ && const_size < max_push_offset
|
||||
+ && known_eq (frame.hard_fp_offset, const_size))
|
||||
{
|
||||
/* Simple, small frame with no outgoing arguments:
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
236
0178-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
Normal file
236
0178-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
Normal file
@ -0,0 +1,236 @@
|
||||
From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:05 +0100
|
||||
Subject: [PATCH 086/157] [Backport][SME] aarch64: Add bytes_below_saved_regs
|
||||
to frame info
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976
|
||||
|
||||
The frame layout code currently hard-codes the assumption that
|
||||
the number of bytes below the saved registers is equal to the
|
||||
size of the outgoing arguments. This patch abstracts that
|
||||
value into a new field of aarch64_frame.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::bytes_below_saved_regs): New
|
||||
field.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it,
|
||||
and use it instead of crtl->outgoing_args_size.
|
||||
(aarch64_get_separate_components): Use bytes_below_saved_regs instead
|
||||
of outgoing_args_size.
|
||||
(aarch64_process_components): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 71 ++++++++++++++++++-----------------
|
||||
gcc/config/aarch64/aarch64.h | 5 +++
|
||||
2 files changed, 41 insertions(+), 35 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index c59af6b1c..5533dd85b 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void)
|
||||
gcc_assert (crtl->is_leaf
|
||||
|| maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
|
||||
|
||||
+ frame.bytes_below_saved_regs = crtl->outgoing_args_size;
|
||||
+
|
||||
/* Now assign stack slots for the registers. Start with the predicate
|
||||
registers, since predicate LDR and STR have a relatively small
|
||||
offset range. These saves happen below the hard frame pointer. */
|
||||
@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void)
|
||||
|
||||
poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
|
||||
|
||||
- poly_int64 above_outgoing_args
|
||||
+ poly_int64 saved_regs_and_above
|
||||
= aligned_upper_bound (varargs_and_saved_regs_size
|
||||
+ get_frame_size (),
|
||||
STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
|
||||
frame.hard_fp_offset
|
||||
- = above_outgoing_args - frame.below_hard_fp_saved_regs_size;
|
||||
+ = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
|
||||
|
||||
/* Both these values are already aligned. */
|
||||
- gcc_assert (multiple_p (crtl->outgoing_args_size,
|
||||
+ gcc_assert (multiple_p (frame.bytes_below_saved_regs,
|
||||
STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
- frame.frame_size = above_outgoing_args + crtl->outgoing_args_size;
|
||||
+ frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
|
||||
|
||||
frame.locals_offset = frame.saved_varargs_size;
|
||||
|
||||
@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void)
|
||||
else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
|
||||
max_push_offset = 256;
|
||||
|
||||
- HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
|
||||
+ HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
|
||||
HOST_WIDE_INT const_saved_regs_size;
|
||||
if (known_eq (frame.saved_regs_size, 0))
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void)
|
||||
&& const_size < max_push_offset
|
||||
&& known_eq (frame.hard_fp_offset, const_size))
|
||||
{
|
||||
- /* Simple, small frame with no outgoing arguments:
|
||||
+ /* Simple, small frame with no data below the saved registers.
|
||||
|
||||
stp reg1, reg2, [sp, -frame_size]!
|
||||
stp reg3, reg4, [sp, 16] */
|
||||
frame.callee_adjust = const_size;
|
||||
}
|
||||
- else if (crtl->outgoing_args_size.is_constant (&const_outgoing_args_size)
|
||||
+ else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
|
||||
&& frame.saved_regs_size.is_constant (&const_saved_regs_size)
|
||||
- && const_outgoing_args_size + const_saved_regs_size < 512
|
||||
- /* We could handle this case even with outgoing args, provided
|
||||
- that the number of args left us with valid offsets for all
|
||||
- predicate and vector save slots. It's such a rare case that
|
||||
- it hardly seems worth the effort though. */
|
||||
- && (!saves_below_hard_fp_p || const_outgoing_args_size == 0)
|
||||
+ && const_below_saved_regs + const_saved_regs_size < 512
|
||||
+ /* We could handle this case even with data below the saved
|
||||
+ registers, provided that that data left us with valid offsets
|
||||
+ for all predicate and vector save slots. It's such a rare
|
||||
+ case that it hardly seems worth the effort though. */
|
||||
+ && (!saves_below_hard_fp_p || const_below_saved_regs == 0)
|
||||
&& !(cfun->calls_alloca
|
||||
&& frame.hard_fp_offset.is_constant (&const_fp_offset)
|
||||
&& const_fp_offset < max_push_offset))
|
||||
{
|
||||
- /* Frame with small outgoing arguments:
|
||||
+ /* Frame with small area below the saved registers:
|
||||
|
||||
sub sp, sp, frame_size
|
||||
- stp reg1, reg2, [sp, outgoing_args_size]
|
||||
- stp reg3, reg4, [sp, outgoing_args_size + 16] */
|
||||
+ stp reg1, reg2, [sp, bytes_below_saved_regs]
|
||||
+ stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
- frame.callee_offset = const_outgoing_args_size;
|
||||
+ frame.callee_offset = const_below_saved_regs;
|
||||
}
|
||||
else if (saves_below_hard_fp_p
|
||||
&& known_eq (frame.saved_regs_size,
|
||||
@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void)
|
||||
|
||||
sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
|
||||
save SVE registers relative to SP
|
||||
- sub sp, sp, outgoing_args_size */
|
||||
+ sub sp, sp, bytes_below_saved_regs */
|
||||
frame.initial_adjust = (frame.hard_fp_offset
|
||||
+ frame.below_hard_fp_saved_regs_size);
|
||||
- frame.final_adjust = crtl->outgoing_args_size;
|
||||
+ frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
|
||||
&& const_fp_offset < max_push_offset)
|
||||
{
|
||||
- /* Frame with large outgoing arguments or SVE saves, but with
|
||||
- a small local area:
|
||||
+ /* Frame with large area below the saved registers, or with SVE saves,
|
||||
+ but with a small area above:
|
||||
|
||||
stp reg1, reg2, [sp, -hard_fp_offset]!
|
||||
stp reg3, reg4, [sp, 16]
|
||||
[sub sp, sp, below_hard_fp_saved_regs_size]
|
||||
[save SVE registers relative to SP]
|
||||
- sub sp, sp, outgoing_args_size */
|
||||
+ sub sp, sp, bytes_below_saved_regs */
|
||||
frame.callee_adjust = const_fp_offset;
|
||||
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
- frame.final_adjust = crtl->outgoing_args_size;
|
||||
+ frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
else
|
||||
{
|
||||
- /* Frame with large local area and outgoing arguments or SVE saves,
|
||||
- using frame pointer:
|
||||
+ /* General case:
|
||||
|
||||
sub sp, sp, hard_fp_offset
|
||||
stp x29, x30, [sp, 0]
|
||||
@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void)
|
||||
stp reg3, reg4, [sp, 16]
|
||||
[sub sp, sp, below_hard_fp_saved_regs_size]
|
||||
[save SVE registers relative to SP]
|
||||
- sub sp, sp, outgoing_args_size */
|
||||
+ sub sp, sp, bytes_below_saved_regs */
|
||||
frame.initial_adjust = frame.hard_fp_offset;
|
||||
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
- frame.final_adjust = crtl->outgoing_args_size;
|
||||
+ frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
|
||||
/* Make sure the individual adjustments add up to the full frame size. */
|
||||
@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void)
|
||||
if (frame_pointer_needed)
|
||||
offset -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
- offset += crtl->outgoing_args_size;
|
||||
+ offset += frame.bytes_below_saved_regs;
|
||||
|
||||
/* Check that we can access the stack slot of the register with one
|
||||
direct load with no adjustments needed. */
|
||||
@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
if (frame_pointer_needed)
|
||||
offset -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
- offset += crtl->outgoing_args_size;
|
||||
+ offset += frame.bytes_below_saved_regs;
|
||||
|
||||
rtx addr = plus_constant (Pmode, ptr_reg, offset);
|
||||
rtx mem = gen_frame_mem (mode, addr);
|
||||
@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
if (frame_pointer_needed)
|
||||
offset2 -= frame.below_hard_fp_saved_regs_size;
|
||||
else
|
||||
- offset2 += crtl->outgoing_args_size;
|
||||
+ offset2 += frame.bytes_below_saved_regs;
|
||||
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
|
||||
rtx mem2 = gen_frame_mem (mode, addr2);
|
||||
rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
|
||||
@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
|
||||
registers. If POLY_SIZE is not large enough to require a probe this function
|
||||
will only adjust the stack. When allocating the stack space
|
||||
FRAME_RELATED_P is then used to indicate if the allocation is frame related.
|
||||
- FINAL_ADJUSTMENT_P indicates whether we are allocating the outgoing
|
||||
- arguments. If we are then we ensure that any allocation larger than the ABI
|
||||
- defined buffer needs a probe so that the invariant of having a 1KB buffer is
|
||||
- maintained.
|
||||
+ FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
|
||||
+ the saved registers. If we are then we ensure that any allocation
|
||||
+ larger than the ABI defined buffer needs a probe so that the
|
||||
+ invariant of having a 1KB buffer is maintained.
|
||||
|
||||
We emit barriers after each stack adjustment to prevent optimizations from
|
||||
breaking the invariant that we never drop the stack more than a page. This
|
||||
@@ -10132,7 +10133,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
/* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
|
||||
be probed. This maintains the requirement that each page is probed at
|
||||
least once. For initial probing we probe only if the allocation is
|
||||
- more than GUARD_SIZE - buffer, and for the outgoing arguments we probe
|
||||
+ more than GUARD_SIZE - buffer, and below the saved registers we probe
|
||||
if the amount is larger than buffer. GUARD_SIZE - buffer + buffer ==
|
||||
GUARD_SIZE. This works that for any allocation that is large enough to
|
||||
trigger a probe here, we'll have at least one, and if they're not large
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 8f0ac2cde..9e0ca380e 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -801,6 +801,11 @@ struct GTY (()) aarch64_frame
|
||||
/* The size of the callee-save registers with a slot in REG_OFFSET. */
|
||||
poly_int64 saved_regs_size;
|
||||
|
||||
+ /* The number of bytes between the bottom of the static frame (the bottom
|
||||
+ of the outgoing arguments) and the bottom of the register save area.
|
||||
+ This value is always a multiple of STACK_BOUNDARY. */
|
||||
+ poly_int64 bytes_below_saved_regs;
|
||||
+
|
||||
/* The size of the callee-save registers with a slot in REG_OFFSET that
|
||||
are saved below the hard frame pointer. */
|
||||
poly_int64 below_hard_fp_saved_regs_size;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,87 @@
|
||||
From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:06 +0100
|
||||
Subject: [PATCH 087/157] [Backport][SME] aarch64: Add bytes_below_hard_fp to
|
||||
frame info
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8
|
||||
|
||||
Following on from the previous bytes_below_saved_regs patch, this one
|
||||
records the number of bytes that are below the hard frame pointer.
|
||||
This eventually replaces below_hard_fp_saved_regs_size.
|
||||
|
||||
If a frame pointer is not needed, the epilogue adds final_adjust
|
||||
to the stack pointer before restoring registers:
|
||||
|
||||
aarch64_add_sp (tmp1_rtx, tmp0_rtx, final_adjust, true);
|
||||
|
||||
Therefore, if the epilogue needs to restore the stack pointer from
|
||||
the hard frame pointer, the directly corresponding offset is:
|
||||
|
||||
-bytes_below_hard_fp + final_adjust
|
||||
|
||||
i.e. go from the hard frame pointer to the bottom of the frame,
|
||||
then add the same amount as if we were using the stack pointer
|
||||
from the outset.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::bytes_below_hard_fp): New
|
||||
field.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize it.
|
||||
(aarch64_expand_epilogue): Use it instead of
|
||||
below_hard_fp_saved_regs_size.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 6 +++---
|
||||
gcc/config/aarch64/aarch64.h | 5 +++++
|
||||
2 files changed, 8 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 5533dd85b..2bb49b9b0 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void)
|
||||
of the callee save area. */
|
||||
bool saves_below_hard_fp_p = maybe_ne (offset, 0);
|
||||
frame.below_hard_fp_saved_regs_size = offset;
|
||||
+ frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
|
||||
if (frame.emit_frame_chain)
|
||||
{
|
||||
/* FP and LR are placed in the linkage record. */
|
||||
@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
poly_int64 final_adjust = frame.final_adjust;
|
||||
poly_int64 callee_offset = frame.callee_offset;
|
||||
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
- poly_int64 below_hard_fp_saved_regs_size
|
||||
- = frame.below_hard_fp_saved_regs_size;
|
||||
+ poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
|
||||
unsigned reg1 = frame.wb_pop_candidate1;
|
||||
unsigned reg2 = frame.wb_pop_candidate2;
|
||||
unsigned int last_gpr = (frame.is_scs_enabled
|
||||
@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
is restored on the instruction doing the writeback. */
|
||||
aarch64_add_offset (Pmode, stack_pointer_rtx,
|
||||
hard_frame_pointer_rtx,
|
||||
- -callee_offset - below_hard_fp_saved_regs_size,
|
||||
+ -bytes_below_hard_fp + final_adjust,
|
||||
tmp1_rtx, tmp0_rtx, callee_adjust == 0);
|
||||
else
|
||||
/* The case where we need to re-use the register here is very rare, so
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 9e0ca380e..dedc5b32f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame
|
||||
are saved below the hard frame pointer. */
|
||||
poly_int64 below_hard_fp_saved_regs_size;
|
||||
|
||||
+ /* The number of bytes between the bottom of the static frame (the bottom
|
||||
+ of the outgoing arguments) and the hard frame pointer. This value is
|
||||
+ always a multiple of STACK_BOUNDARY. */
|
||||
+ poly_int64 bytes_below_hard_fp;
|
||||
+
|
||||
/* Offset from the base of the frame (incomming SP) to the
|
||||
top of the locals area. This value is always a multiple of
|
||||
STACK_BOUNDARY. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
126
0180-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
Normal file
126
0180-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
Normal file
@ -0,0 +1,126 @@
|
||||
From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 20 Jun 2023 21:48:38 +0100
|
||||
Subject: [PATCH 088/157] [Backport][SME] aarch64: Robustify stack tie handling
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb
|
||||
|
||||
The SVE handling of stack clash protection copied the stack
|
||||
pointer to X11 before the probe and set up X11 as the CFA
|
||||
for unwind purposes:
|
||||
|
||||
/* This is done to provide unwinding information for the stack
|
||||
adjustments we're about to do, however to prevent the optimizers
|
||||
from removing the R11 move and leaving the CFA note (which would be
|
||||
very wrong) we tie the old and new stack pointer together.
|
||||
The tie will expand to nothing but the optimizers will not touch
|
||||
the instruction. */
|
||||
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
|
||||
emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
|
||||
emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
|
||||
|
||||
/* We want the CFA independent of the stack pointer for the
|
||||
duration of the loop. */
|
||||
add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
|
||||
RTX_FRAME_RELATED_P (insn) = 1;
|
||||
|
||||
-fcprop-registers is now smart enough to realise that X11 = SP,
|
||||
replace X11 with SP in the stack tie, and delete the instruction
|
||||
created above.
|
||||
|
||||
This patch tries to prevent that by making stack_tie fussy about
|
||||
the register numbers. It fixes failures in
|
||||
gcc.target/aarch64/sve/pcs/stack_clash*.c.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.md (stack_tie): Hard-code the first
|
||||
register operand to the stack pointer. Require the second register
|
||||
operand to have the number specified in a separate const_int operand.
|
||||
* config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function.
|
||||
(aarch64_allocate_and_probe_stack_space): Use it.
|
||||
(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
|
||||
(aarch64_expand_epilogue): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++----
|
||||
gcc/config/aarch64/aarch64.md | 7 ++++---
|
||||
2 files changed, 18 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 2bb49b9b0..4d505c6fc 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
|
||||
return STACK_CLASH_CALLER_GUARD;
|
||||
}
|
||||
|
||||
+/* Emit a stack tie that acts as a scheduling barrier for all previous and
|
||||
+ subsequent memory accesses and that requires the stack pointer and REG
|
||||
+ to have their current values. REG can be stack_pointer_rtx if no
|
||||
+ other register's value needs to be fixed. */
|
||||
+
|
||||
+static void
|
||||
+aarch64_emit_stack_tie (rtx reg)
|
||||
+{
|
||||
+ emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode)));
|
||||
+}
|
||||
|
||||
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
|
||||
registers. If POLY_SIZE is not large enough to require a probe this function
|
||||
@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
the instruction. */
|
||||
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
|
||||
emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
|
||||
- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
|
||||
+ aarch64_emit_stack_tie (stack_ptr_copy);
|
||||
|
||||
/* We want the CFA independent of the stack pointer for the
|
||||
duration of the loop. */
|
||||
@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void)
|
||||
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
|
||||
hard_frame_pointer_rtx, 0);
|
||||
}
|
||||
- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
|
||||
+ aarch64_emit_stack_tie (hard_frame_pointer_rtx);
|
||||
}
|
||||
|
||||
aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
|
||||
@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
|| cfun->calls_alloca
|
||||
|| crtl->calls_eh_return)
|
||||
{
|
||||
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
|
||||
+ aarch64_emit_stack_tie (stack_pointer_rtx);
|
||||
need_barrier_p = false;
|
||||
}
|
||||
|
||||
@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
callee_adjust != 0, &cfi_ops);
|
||||
|
||||
if (need_barrier_p)
|
||||
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
|
||||
+ aarch64_emit_stack_tie (stack_pointer_rtx);
|
||||
|
||||
if (callee_adjust != 0)
|
||||
aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 2becc888e..2ce123255 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -7088,10 +7088,11 @@
|
||||
|
||||
(define_insn "stack_tie"
|
||||
[(set (mem:BLK (scratch))
|
||||
- (unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
|
||||
- (match_operand:DI 1 "register_operand" "rk")]
|
||||
+ (unspec:BLK [(reg:DI SP_REGNUM)
|
||||
+ (match_operand:DI 0 "register_operand" "rk")
|
||||
+ (match_operand:DI 1 "const_int_operand")]
|
||||
UNSPEC_PRLG_STK))]
|
||||
- ""
|
||||
+ "REGNO (operands[0]) == INTVAL (operands[1])"
|
||||
""
|
||||
[(set_attr "length" "0")]
|
||||
)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
228
0181-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
Normal file
228
0181-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
Normal file
@ -0,0 +1,228 @@
|
||||
From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:06 +0100
|
||||
Subject: [PATCH 089/157] [Backport][SME] aarch64: Tweak
|
||||
aarch64_save/restore_callee_saves
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db
|
||||
|
||||
aarch64_save_callee_saves and aarch64_restore_callee_saves took
|
||||
a parameter called start_offset that gives the offset of the
|
||||
bottom of the saved register area from the current stack pointer.
|
||||
However, it's more convenient for later patches if we use the
|
||||
bottom of the entire frame as the reference point, rather than
|
||||
the bottom of the saved registers.
|
||||
|
||||
Doing that removes the need for the callee_offset field.
|
||||
Other than that, this is not a win on its own. It only really
|
||||
makes sense in combination with the follow-on patches.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::callee_offset): Delete.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Remove
|
||||
callee_offset handling.
|
||||
(aarch64_save_callee_saves): Replace the start_offset parameter
|
||||
with a bytes_below_sp parameter.
|
||||
(aarch64_restore_callee_saves): Likewise.
|
||||
(aarch64_expand_prologue): Update accordingly.
|
||||
(aarch64_expand_epilogue): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 56 +++++++++++++++++------------------
|
||||
gcc/config/aarch64/aarch64.h | 4 ---
|
||||
2 files changed, 28 insertions(+), 32 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 4d505c6fc..a0a4c7ac3 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void)
|
||||
frame.final_adjust = 0;
|
||||
frame.callee_adjust = 0;
|
||||
frame.sve_callee_adjust = 0;
|
||||
- frame.callee_offset = 0;
|
||||
|
||||
frame.wb_pop_candidate1 = frame.wb_push_candidate1;
|
||||
frame.wb_pop_candidate2 = frame.wb_push_candidate2;
|
||||
@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void)
|
||||
stp reg1, reg2, [sp, bytes_below_saved_regs]
|
||||
stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
- frame.callee_offset = const_below_saved_regs;
|
||||
}
|
||||
else if (saves_below_hard_fp_p
|
||||
&& known_eq (frame.saved_regs_size,
|
||||
@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
|
||||
}
|
||||
|
||||
/* Emit code to save the callee-saved registers from register number START
|
||||
- to LIMIT to the stack at the location starting at offset START_OFFSET,
|
||||
- skipping any write-back candidates if SKIP_WB is true. HARD_FP_VALID_P
|
||||
- is true if the hard frame pointer has been set up. */
|
||||
+ to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
|
||||
+ bytes above the bottom of the static frame. Skip any write-back
|
||||
+ candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
|
||||
+ frame pointer has been set up. */
|
||||
|
||||
static void
|
||||
-aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
+aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
unsigned start, unsigned limit, bool skip_wb,
|
||||
bool hard_fp_valid_p)
|
||||
{
|
||||
@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = start_offset + frame.reg_offset[regno];
|
||||
+ offset = (frame.reg_offset[regno]
|
||||
+ + frame.bytes_below_saved_regs
|
||||
+ - bytes_below_sp);
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
poly_int64 sp_offset = offset;
|
||||
|
||||
@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
else if (GP_REGNUM_P (regno)
|
||||
&& (!offset.is_constant (&const_offset) || const_offset >= 512))
|
||||
{
|
||||
- gcc_assert (known_eq (start_offset, 0));
|
||||
- poly_int64 fp_offset
|
||||
- = frame.below_hard_fp_saved_regs_size;
|
||||
+ poly_int64 fp_offset = frame.bytes_below_hard_fp - bytes_below_sp;
|
||||
if (hard_fp_valid_p)
|
||||
base_rtx = hard_frame_pointer_rtx;
|
||||
else
|
||||
@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
|
||||
}
|
||||
|
||||
/* Emit code to restore the callee registers from register number START
|
||||
- up to and including LIMIT. Restore from the stack offset START_OFFSET,
|
||||
- skipping any write-back candidates if SKIP_WB is true. Write the
|
||||
- appropriate REG_CFA_RESTORE notes into CFI_OPS. */
|
||||
+ up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
|
||||
+ bytes above the bottom of the static frame. Skip any write-back
|
||||
+ candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
|
||||
+ notes into CFI_OPS. */
|
||||
|
||||
static void
|
||||
-aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
|
||||
+aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
||||
unsigned limit, bool skip_wb, rtx *cfi_ops)
|
||||
{
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = start_offset + frame.reg_offset[regno];
|
||||
+ offset = (frame.reg_offset[regno]
|
||||
+ + frame.bytes_below_saved_regs
|
||||
+ - bytes_below_sp);
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
|
||||
@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void)
|
||||
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
|
||||
poly_int64 final_adjust = frame.final_adjust;
|
||||
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
- poly_int64 below_hard_fp_saved_regs_size
|
||||
- = frame.below_hard_fp_saved_regs_size;
|
||||
unsigned reg1 = frame.wb_push_candidate1;
|
||||
unsigned reg2 = frame.wb_push_candidate2;
|
||||
bool emit_frame_chain = frame.emit_frame_chain;
|
||||
@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void)
|
||||
- frame.hard_fp_offset);
|
||||
gcc_assert (known_ge (chain_offset, 0));
|
||||
|
||||
- /* The offset of the bottom of the save area from the current SP. */
|
||||
- poly_int64 saved_regs_offset = chain_offset - below_hard_fp_saved_regs_size;
|
||||
+ /* The offset of the current SP from the bottom of the static frame. */
|
||||
+ poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
|
||||
|
||||
if (emit_frame_chain)
|
||||
{
|
||||
@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void)
|
||||
{
|
||||
reg1 = R29_REGNUM;
|
||||
reg2 = R30_REGNUM;
|
||||
- aarch64_save_callee_saves (saved_regs_offset, reg1, reg2,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
|
||||
false, false);
|
||||
}
|
||||
else
|
||||
@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void)
|
||||
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
|
||||
}
|
||||
|
||||
- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
|
||||
callee_adjust != 0 || emit_frame_chain,
|
||||
emit_frame_chain);
|
||||
if (maybe_ne (sve_callee_adjust, 0))
|
||||
@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void)
|
||||
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
|
||||
sve_callee_adjust,
|
||||
!frame_pointer_needed, false);
|
||||
- saved_regs_offset += sve_callee_adjust;
|
||||
+ bytes_below_sp -= sve_callee_adjust;
|
||||
}
|
||||
- aarch64_save_callee_saves (saved_regs_offset, P0_REGNUM, P15_REGNUM,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
|
||||
false, emit_frame_chain);
|
||||
- aarch64_save_callee_saves (saved_regs_offset, V0_REGNUM, V31_REGNUM,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
|
||||
callee_adjust != 0 || emit_frame_chain,
|
||||
emit_frame_chain);
|
||||
|
||||
/* We may need to probe the final adjustment if it is larger than the guard
|
||||
that is assumed by the called. */
|
||||
+ gcc_assert (known_eq (bytes_below_sp, final_adjust));
|
||||
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
|
||||
!frame_pointer_needed, true);
|
||||
}
|
||||
@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
poly_int64 initial_adjust = frame.initial_adjust;
|
||||
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
|
||||
poly_int64 final_adjust = frame.final_adjust;
|
||||
- poly_int64 callee_offset = frame.callee_offset;
|
||||
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
|
||||
poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
|
||||
unsigned reg1 = frame.wb_pop_candidate1;
|
||||
@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
|
||||
/* Restore the vector registers before the predicate registers,
|
||||
so that we can use P4 as a temporary for big-endian SVE frames. */
|
||||
- aarch64_restore_callee_saves (callee_offset, V0_REGNUM, V31_REGNUM,
|
||||
+ aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
|
||||
callee_adjust != 0, &cfi_ops);
|
||||
- aarch64_restore_callee_saves (callee_offset, P0_REGNUM, P15_REGNUM,
|
||||
+ aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
|
||||
false, &cfi_ops);
|
||||
if (maybe_ne (sve_callee_adjust, 0))
|
||||
aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
|
||||
@@ -10545,7 +10545,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
/* When shadow call stack is enabled, the scs_pop in the epilogue will
|
||||
restore x30, we don't need to restore x30 again in the traditional
|
||||
way. */
|
||||
- aarch64_restore_callee_saves (callee_offset - sve_callee_adjust,
|
||||
+ aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
|
||||
R0_REGNUM, last_gpr,
|
||||
callee_adjust != 0, &cfi_ops);
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index dedc5b32f..a1db4f689 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -837,10 +837,6 @@ struct GTY (()) aarch64_frame
|
||||
It is zero when no push is used. */
|
||||
HOST_WIDE_INT callee_adjust;
|
||||
|
||||
- /* The offset from SP to the callee-save registers after initial_adjust.
|
||||
- It may be non-zero if no push is used (ie. callee_adjust == 0). */
|
||||
- poly_int64 callee_offset;
|
||||
-
|
||||
/* The size of the stack adjustment before saving or after restoring
|
||||
SVE registers. */
|
||||
poly_int64 sve_callee_adjust;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,47 @@
|
||||
From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:07 +0100
|
||||
Subject: [PATCH 090/157] [Backport][SME] aarch64: Only calculate chain_offset
|
||||
if there is a chain
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645
|
||||
|
||||
After previous patches, it is no longer necessary to calculate
|
||||
a chain_offset in cases where there is no chain record.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_expand_prologue): Move the
|
||||
calculation of chain_offset into the emit_frame_chain block.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 10 +++++-----
|
||||
1 file changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index a0a4c7ac3..bef6a658b 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void)
|
||||
if (callee_adjust != 0)
|
||||
aarch64_push_regs (reg1, reg2, callee_adjust);
|
||||
|
||||
- /* The offset of the frame chain record (if any) from the current SP. */
|
||||
- poly_int64 chain_offset = (initial_adjust + callee_adjust
|
||||
- - frame.hard_fp_offset);
|
||||
- gcc_assert (known_ge (chain_offset, 0));
|
||||
-
|
||||
/* The offset of the current SP from the bottom of the static frame. */
|
||||
poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
|
||||
|
||||
if (emit_frame_chain)
|
||||
{
|
||||
+ /* The offset of the frame chain record (if any) from the current SP. */
|
||||
+ poly_int64 chain_offset = (initial_adjust + callee_adjust
|
||||
+ - frame.hard_fp_offset);
|
||||
+ gcc_assert (known_ge (chain_offset, 0));
|
||||
+
|
||||
if (callee_adjust == 0)
|
||||
{
|
||||
reg1 = R29_REGNUM;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,94 @@
|
||||
From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:07 +0100
|
||||
Subject: [PATCH 091/157] [Backport][SME] aarch64: Rename locals_offset to
|
||||
bytes_above_locals
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854
|
||||
|
||||
locals_offset was described as:
|
||||
|
||||
/* Offset from the base of the frame (incomming SP) to the
|
||||
top of the locals area. This value is always a multiple of
|
||||
STACK_BOUNDARY. */
|
||||
|
||||
This is implicitly an “upside down” view of the frame: the incoming
|
||||
SP is at offset 0, and anything N bytes below the incoming SP is at
|
||||
offset N (rather than -N).
|
||||
|
||||
However, reg_offset instead uses a “right way up” view; that is,
|
||||
it views offsets in address terms. Something above X is at a
|
||||
positive offset from X and something below X is at a negative
|
||||
offset from X.
|
||||
|
||||
Also, even on FRAME_GROWS_DOWNWARD targets like AArch64,
|
||||
target-independent code views offsets in address terms too:
|
||||
locals are allocated at negative offsets to virtual_stack_vars.
|
||||
|
||||
It seems confusing to have *_offset fields of the same structure
|
||||
using different polarities like this. This patch tries to avoid
|
||||
that by renaming locals_offset to bytes_above_locals.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::locals_offset): Rename to...
|
||||
(aarch64_frame::bytes_above_locals): ...this.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame)
|
||||
(aarch64_initial_elimination_offset): Update accordingly.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 6 +++---
|
||||
gcc/config/aarch64/aarch64.h | 6 +++---
|
||||
2 files changed, 6 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index bef6a658b..992f71bbd 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void)
|
||||
STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
|
||||
|
||||
- frame.locals_offset = frame.saved_varargs_size;
|
||||
+ frame.bytes_above_locals = frame.saved_varargs_size;
|
||||
|
||||
frame.initial_adjust = 0;
|
||||
frame.final_adjust = 0;
|
||||
@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
|
||||
return frame.hard_fp_offset;
|
||||
|
||||
if (from == FRAME_POINTER_REGNUM)
|
||||
- return frame.hard_fp_offset - frame.locals_offset;
|
||||
+ return frame.hard_fp_offset - frame.bytes_above_locals;
|
||||
}
|
||||
|
||||
if (to == STACK_POINTER_REGNUM)
|
||||
{
|
||||
if (from == FRAME_POINTER_REGNUM)
|
||||
- return frame.frame_size - frame.locals_offset;
|
||||
+ return frame.frame_size - frame.bytes_above_locals;
|
||||
}
|
||||
|
||||
return frame.frame_size;
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index a1db4f689..2acff9a96 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame
|
||||
always a multiple of STACK_BOUNDARY. */
|
||||
poly_int64 bytes_below_hard_fp;
|
||||
|
||||
- /* Offset from the base of the frame (incomming SP) to the
|
||||
- top of the locals area. This value is always a multiple of
|
||||
+ /* The number of bytes between the top of the locals area and the top
|
||||
+ of the frame (the incomming SP). This value is always a multiple of
|
||||
STACK_BOUNDARY. */
|
||||
- poly_int64 locals_offset;
|
||||
+ poly_int64 bytes_above_locals;
|
||||
|
||||
/* Offset from the base of the frame (incomming SP) to the
|
||||
hard_frame_pointer. This value is always a multiple of
|
||||
--
|
||||
2.33.0
|
||||
|
||||
151
0184-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
Normal file
151
0184-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
Normal file
@ -0,0 +1,151 @@
|
||||
From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:08 +0100
|
||||
Subject: [PATCH 092/157] [Backport][SME] aarch64: Rename hard_fp_offset to
|
||||
bytes_above_hard_fp
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6
|
||||
|
||||
Similarly to the previous locals_offset patch, hard_fp_offset
|
||||
was described as:
|
||||
|
||||
/* Offset from the base of the frame (incomming SP) to the
|
||||
hard_frame_pointer. This value is always a multiple of
|
||||
STACK_BOUNDARY. */
|
||||
poly_int64 hard_fp_offset;
|
||||
|
||||
which again took an “upside-down” view: higher offsets meant lower
|
||||
addresses. This patch renames the field to bytes_above_hard_fp instead.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::hard_fp_offset): Rename
|
||||
to...
|
||||
(aarch64_frame::bytes_above_hard_fp): ...this.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame)
|
||||
(aarch64_expand_prologue): Update accordingly.
|
||||
(aarch64_initial_elimination_offset): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 26 +++++++++++++-------------
|
||||
gcc/config/aarch64/aarch64.h | 6 +++---
|
||||
2 files changed, 16 insertions(+), 16 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 992f71bbd..67199a026 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void)
|
||||
+ get_frame_size (),
|
||||
STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
|
||||
- frame.hard_fp_offset
|
||||
+ frame.bytes_above_hard_fp
|
||||
= saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
|
||||
|
||||
/* Both these values are already aligned. */
|
||||
@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void)
|
||||
else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
|
||||
max_push_offset = 256;
|
||||
|
||||
- HOST_WIDE_INT const_size, const_below_saved_regs, const_fp_offset;
|
||||
+ HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
|
||||
HOST_WIDE_INT const_saved_regs_size;
|
||||
if (known_eq (frame.saved_regs_size, 0))
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
else if (frame.frame_size.is_constant (&const_size)
|
||||
&& const_size < max_push_offset
|
||||
- && known_eq (frame.hard_fp_offset, const_size))
|
||||
+ && known_eq (frame.bytes_above_hard_fp, const_size))
|
||||
{
|
||||
/* Simple, small frame with no data below the saved registers.
|
||||
|
||||
@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void)
|
||||
case that it hardly seems worth the effort though. */
|
||||
&& (!saves_below_hard_fp_p || const_below_saved_regs == 0)
|
||||
&& !(cfun->calls_alloca
|
||||
- && frame.hard_fp_offset.is_constant (&const_fp_offset)
|
||||
- && const_fp_offset < max_push_offset))
|
||||
+ && frame.bytes_above_hard_fp.is_constant (&const_above_fp)
|
||||
+ && const_above_fp < max_push_offset))
|
||||
{
|
||||
/* Frame with small area below the saved registers:
|
||||
|
||||
@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void)
|
||||
sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
|
||||
save SVE registers relative to SP
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
- frame.initial_adjust = (frame.hard_fp_offset
|
||||
+ frame.initial_adjust = (frame.bytes_above_hard_fp
|
||||
+ frame.below_hard_fp_saved_regs_size);
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
- else if (frame.hard_fp_offset.is_constant (&const_fp_offset)
|
||||
- && const_fp_offset < max_push_offset)
|
||||
+ else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
|
||||
+ && const_above_fp < max_push_offset)
|
||||
{
|
||||
/* Frame with large area below the saved registers, or with SVE saves,
|
||||
but with a small area above:
|
||||
@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void)
|
||||
[sub sp, sp, below_hard_fp_saved_regs_size]
|
||||
[save SVE registers relative to SP]
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
- frame.callee_adjust = const_fp_offset;
|
||||
+ frame.callee_adjust = const_above_fp;
|
||||
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void)
|
||||
[sub sp, sp, below_hard_fp_saved_regs_size]
|
||||
[save SVE registers relative to SP]
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
- frame.initial_adjust = frame.hard_fp_offset;
|
||||
+ frame.initial_adjust = frame.bytes_above_hard_fp;
|
||||
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void)
|
||||
{
|
||||
/* The offset of the frame chain record (if any) from the current SP. */
|
||||
poly_int64 chain_offset = (initial_adjust + callee_adjust
|
||||
- - frame.hard_fp_offset);
|
||||
+ - frame.bytes_above_hard_fp);
|
||||
gcc_assert (known_ge (chain_offset, 0));
|
||||
|
||||
if (callee_adjust == 0)
|
||||
@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
|
||||
if (to == HARD_FRAME_POINTER_REGNUM)
|
||||
{
|
||||
if (from == ARG_POINTER_REGNUM)
|
||||
- return frame.hard_fp_offset;
|
||||
+ return frame.bytes_above_hard_fp;
|
||||
|
||||
if (from == FRAME_POINTER_REGNUM)
|
||||
- return frame.hard_fp_offset - frame.bytes_above_locals;
|
||||
+ return frame.bytes_above_hard_fp - frame.bytes_above_locals;
|
||||
}
|
||||
|
||||
if (to == STACK_POINTER_REGNUM)
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 2acff9a96..0f7822c3d 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame
|
||||
STACK_BOUNDARY. */
|
||||
poly_int64 bytes_above_locals;
|
||||
|
||||
- /* Offset from the base of the frame (incomming SP) to the
|
||||
- hard_frame_pointer. This value is always a multiple of
|
||||
+ /* The number of bytes between the hard_frame_pointer and the top of
|
||||
+ the frame (the incomming SP). This value is always a multiple of
|
||||
STACK_BOUNDARY. */
|
||||
- poly_int64 hard_fp_offset;
|
||||
+ poly_int64 bytes_above_hard_fp;
|
||||
|
||||
/* The size of the frame. This value is the offset from base of the
|
||||
frame (incomming SP) to the stack_pointer. This value is always
|
||||
--
|
||||
2.33.0
|
||||
|
||||
37
0185-Backport-SME-aarch64-Tweak-frame_size-comment.patch
Normal file
37
0185-Backport-SME-aarch64-Tweak-frame_size-comment.patch
Normal file
@ -0,0 +1,37 @@
|
||||
From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:08 +0100
|
||||
Subject: [PATCH 093/157] [Backport][SME] aarch64: Tweak frame_size comment
|
||||
MIME-Version: 1.0
|
||||
Content-Type: text/plain; charset=UTF-8
|
||||
Content-Transfer-Encoding: 8bit
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3
|
||||
|
||||
This patch fixes another case in which a value was described with
|
||||
an “upside-down” view.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.h | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 0f7822c3d..39abca051 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame
|
||||
STACK_BOUNDARY. */
|
||||
poly_int64 bytes_above_hard_fp;
|
||||
|
||||
- /* The size of the frame. This value is the offset from base of the
|
||||
- frame (incomming SP) to the stack_pointer. This value is always
|
||||
+ /* The size of the frame, i.e. the number of bytes between the bottom
|
||||
+ of the outgoing arguments and the incoming SP. This value is always
|
||||
a multiple of STACK_BOUNDARY. */
|
||||
poly_int64 frame_size;
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
198
0186-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
Normal file
198
0186-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
Normal file
@ -0,0 +1,198 @@
|
||||
From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:09 +0100
|
||||
Subject: [PATCH 094/157] [Backport][SME] aarch64: Measure reg_offset from the
|
||||
bottom of the frame
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54
|
||||
|
||||
reg_offset was measured from the bottom of the saved register area.
|
||||
This made perfect sense with the original layout, since the bottom
|
||||
of the saved register area was also the hard frame pointer address.
|
||||
It became slightly less obvious with SVE, since we save SVE
|
||||
registers below the hard frame pointer, but it still made sense.
|
||||
|
||||
However, if we want to allow different frame layouts, it's more
|
||||
convenient and obvious to measure reg_offset from the bottom of
|
||||
the frame. After previous patches, it's also a slight simplification
|
||||
in its own right.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame): Add comment above
|
||||
reg_offset.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Walk offsets
|
||||
from the bottom of the frame, rather than the bottom of the saved
|
||||
register area. Measure reg_offset from the bottom of the frame
|
||||
rather than the bottom of the saved register area.
|
||||
(aarch64_save_callee_saves): Update accordingly.
|
||||
(aarch64_restore_callee_saves): Likewise.
|
||||
(aarch64_get_separate_components): Likewise.
|
||||
(aarch64_process_components): Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 53 ++++++++++++++++-------------------
|
||||
gcc/config/aarch64/aarch64.h | 3 ++
|
||||
2 files changed, 27 insertions(+), 29 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 67199a026..df8a83b04 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void)
|
||||
static void
|
||||
aarch64_layout_frame (void)
|
||||
{
|
||||
- poly_int64 offset = 0;
|
||||
int regno, last_fp_reg = INVALID_REGNUM;
|
||||
machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
|
||||
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
|
||||
@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void)
|
||||
gcc_assert (crtl->is_leaf
|
||||
|| maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
|
||||
|
||||
- frame.bytes_below_saved_regs = crtl->outgoing_args_size;
|
||||
+ poly_int64 offset = crtl->outgoing_args_size;
|
||||
+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
+ frame.bytes_below_saved_regs = offset;
|
||||
|
||||
/* Now assign stack slots for the registers. Start with the predicate
|
||||
registers, since predicate LDR and STR have a relatively small
|
||||
@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void)
|
||||
offset += BYTES_PER_SVE_PRED;
|
||||
}
|
||||
|
||||
- if (maybe_ne (offset, 0))
|
||||
+ poly_int64 saved_prs_size = offset - frame.bytes_below_saved_regs;
|
||||
+ if (maybe_ne (saved_prs_size, 0))
|
||||
{
|
||||
/* If we have any vector registers to save above the predicate registers,
|
||||
the offset of the vector register save slots need to be a multiple
|
||||
@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void)
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
else
|
||||
{
|
||||
- if (known_le (offset, vector_save_size))
|
||||
- offset = vector_save_size;
|
||||
- else if (known_le (offset, vector_save_size * 2))
|
||||
- offset = vector_save_size * 2;
|
||||
+ if (known_le (saved_prs_size, vector_save_size))
|
||||
+ offset = frame.bytes_below_saved_regs + vector_save_size;
|
||||
+ else if (known_le (saved_prs_size, vector_save_size * 2))
|
||||
+ offset = frame.bytes_below_saved_regs + vector_save_size * 2;
|
||||
else
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void)
|
||||
|
||||
/* OFFSET is now the offset of the hard frame pointer from the bottom
|
||||
of the callee save area. */
|
||||
- bool saves_below_hard_fp_p = maybe_ne (offset, 0);
|
||||
- frame.below_hard_fp_saved_regs_size = offset;
|
||||
- frame.bytes_below_hard_fp = offset + frame.bytes_below_saved_regs;
|
||||
+ frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
+ bool saves_below_hard_fp_p
|
||||
+ = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
|
||||
+ frame.bytes_below_hard_fp = offset;
|
||||
if (frame.emit_frame_chain)
|
||||
{
|
||||
/* FP and LR are placed in the linkage record. */
|
||||
@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void)
|
||||
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
|
||||
- frame.saved_regs_size = offset;
|
||||
+ frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
|
||||
- poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
|
||||
+ poly_int64 varargs_and_saved_regs_size
|
||||
+ = frame.saved_regs_size + frame.saved_varargs_size;
|
||||
|
||||
poly_int64 saved_regs_and_above
|
||||
= aligned_upper_bound (varargs_and_saved_regs_size
|
||||
@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = (frame.reg_offset[regno]
|
||||
- + frame.bytes_below_saved_regs
|
||||
- - bytes_below_sp);
|
||||
+ offset = frame.reg_offset[regno] - bytes_below_sp;
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
poly_int64 sp_offset = offset;
|
||||
|
||||
@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
reg = gen_rtx_REG (mode, regno);
|
||||
- offset = (frame.reg_offset[regno]
|
||||
- + frame.bytes_below_saved_regs
|
||||
- - bytes_below_sp);
|
||||
+ offset = frame.reg_offset[regno] - bytes_below_sp;
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
|
||||
@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void)
|
||||
it as a stack probe for -fstack-clash-protection. */
|
||||
if (flag_stack_clash_protection
|
||||
&& maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
|
||||
- && known_eq (offset, 0))
|
||||
+ && known_eq (offset, frame.bytes_below_saved_regs))
|
||||
continue;
|
||||
|
||||
/* Get the offset relative to the register we'll use. */
|
||||
if (frame_pointer_needed)
|
||||
- offset -= frame.below_hard_fp_saved_regs_size;
|
||||
- else
|
||||
- offset += frame.bytes_below_saved_regs;
|
||||
+ offset -= frame.bytes_below_hard_fp;
|
||||
|
||||
/* Check that we can access the stack slot of the register with one
|
||||
direct load with no adjustments needed. */
|
||||
@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
rtx reg = gen_rtx_REG (mode, regno);
|
||||
poly_int64 offset = frame.reg_offset[regno];
|
||||
if (frame_pointer_needed)
|
||||
- offset -= frame.below_hard_fp_saved_regs_size;
|
||||
- else
|
||||
- offset += frame.bytes_below_saved_regs;
|
||||
+ offset -= frame.bytes_below_hard_fp;
|
||||
|
||||
rtx addr = plus_constant (Pmode, ptr_reg, offset);
|
||||
rtx mem = gen_frame_mem (mode, addr);
|
||||
@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
|
||||
/* REGNO2 can be saved/restored in a pair with REGNO. */
|
||||
rtx reg2 = gen_rtx_REG (mode, regno2);
|
||||
if (frame_pointer_needed)
|
||||
- offset2 -= frame.below_hard_fp_saved_regs_size;
|
||||
- else
|
||||
- offset2 += frame.bytes_below_saved_regs;
|
||||
+ offset2 -= frame.bytes_below_hard_fp;
|
||||
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
|
||||
rtx mem2 = gen_frame_mem (mode, addr2);
|
||||
rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
|
||||
@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
if (final_adjustment_p
|
||||
&& known_eq (frame.below_hard_fp_saved_regs_size, 0))
|
||||
{
|
||||
- poly_int64 lr_offset = frame.reg_offset[LR_REGNUM];
|
||||
+ poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
|
||||
+ - frame.bytes_below_saved_regs);
|
||||
if (known_ge (lr_offset, 0))
|
||||
min_probe_threshold -= lr_offset.to_constant ();
|
||||
else
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 39abca051..f340237d0 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune;
|
||||
#ifdef HAVE_POLY_INT_H
|
||||
struct GTY (()) aarch64_frame
|
||||
{
|
||||
+ /* The offset from the bottom of the static frame (the bottom of the
|
||||
+ outgoing arguments) of each register save slot, or -2 if no save is
|
||||
+ needed. */
|
||||
poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
|
||||
|
||||
/* The number of extra stack bytes taken up by register varargs.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,58 @@
|
||||
From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:09 +0100
|
||||
Subject: [PATCH 095/157] [Backport][SME] aarch64: Simplify top of frame
|
||||
allocation
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1
|
||||
|
||||
After previous patches, it no longer really makes sense to allocate
|
||||
the top of the frame in terms of varargs_and_saved_regs_size and
|
||||
saved_regs_and_above.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Simplify
|
||||
the allocation of the top of the frame.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 23 ++++++++---------------
|
||||
1 file changed, 8 insertions(+), 15 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index df8a83b04..3329aa364 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void)
|
||||
|
||||
frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
|
||||
- poly_int64 varargs_and_saved_regs_size
|
||||
- = frame.saved_regs_size + frame.saved_varargs_size;
|
||||
-
|
||||
- poly_int64 saved_regs_and_above
|
||||
- = aligned_upper_bound (varargs_and_saved_regs_size
|
||||
- + get_frame_size (),
|
||||
- STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
-
|
||||
- frame.bytes_above_hard_fp
|
||||
- = saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
|
||||
+ offset += get_frame_size ();
|
||||
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
+ auto top_of_locals = offset;
|
||||
|
||||
- /* Both these values are already aligned. */
|
||||
- gcc_assert (multiple_p (frame.bytes_below_saved_regs,
|
||||
- STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
- frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
|
||||
+ offset += frame.saved_varargs_size;
|
||||
+ gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
+ frame.frame_size = offset;
|
||||
|
||||
- frame.bytes_above_locals = frame.saved_varargs_size;
|
||||
+ frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
|
||||
+ frame.bytes_above_locals = frame.frame_size - top_of_locals;
|
||||
|
||||
frame.initial_adjust = 0;
|
||||
frame.final_adjust = 0;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,41 @@
|
||||
From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:10 +0100
|
||||
Subject: [PATCH 096/157] [Backport][SME] aarch64: Minor initial adjustment
|
||||
tweak
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c
|
||||
|
||||
This patch just changes a calculation of initial_adjust
|
||||
to one that makes it slightly more obvious that the total
|
||||
adjustment is frame.frame_size.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Tweak
|
||||
calculation of initial_adjust for frames in which all saves
|
||||
are SVE saves.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 3329aa364..72604dd9d 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void)
|
||||
{
|
||||
/* Frame in which all saves are SVE saves:
|
||||
|
||||
- sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
|
||||
+ sub sp, sp, frame_size - bytes_below_saved_regs
|
||||
save SVE registers relative to SP
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
- frame.initial_adjust = (frame.bytes_above_hard_fp
|
||||
- + frame.below_hard_fp_saved_regs_size);
|
||||
+ frame.initial_adjust = frame.frame_size - frame.bytes_below_saved_regs;
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
128
0189-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
Normal file
128
0189-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
Normal file
@ -0,0 +1,128 @@
|
||||
From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:10 +0100
|
||||
Subject: [PATCH 097/157] [Backport][SME] aarch64: Tweak stack clash boundary
|
||||
condition
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966
|
||||
|
||||
The AArch64 ABI says that, when stack clash protection is used,
|
||||
there can be a maximum of 1KiB of unprobed space at sp on entry
|
||||
to a function. Therefore, we need to probe when allocating
|
||||
>= guard_size - 1KiB of data (>= rather than >). This is what
|
||||
GCC does.
|
||||
|
||||
If an allocation is exactly guard_size bytes, it is enough to allocate
|
||||
those bytes and probe once at offset 1024. It isn't possible to use a
|
||||
single probe at any other offset: higher would conmplicate later code,
|
||||
by leaving more unprobed space than usual, while lower would risk
|
||||
leaving an entire page unprobed. For simplicity, the code probes all
|
||||
allocations at offset 1024.
|
||||
|
||||
Some register saves also act as probes. If we need to allocate
|
||||
more space below the last such register save probe, we need to
|
||||
probe the allocation if it is > 1KiB. Again, this allocation is
|
||||
then sometimes (but not always) probed at offset 1024. This sort of
|
||||
allocation is currently only used for outgoing arguments, which are
|
||||
rarely this big.
|
||||
|
||||
However, the code also probed if this final outgoing-arguments
|
||||
allocation was == 1KiB, rather than just > 1KiB. This isn't
|
||||
necessary, since the register save then probes at offset 1024
|
||||
as required. Continuing to probe allocations of exactly 1KiB
|
||||
would complicate later patches.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
|
||||
Don't probe final allocations that are exactly 1KiB in size (after
|
||||
unprobed space above the final allocation has been deducted).
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/stack-check-prologue-17.c: New test.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 4 +-
|
||||
.../aarch64/stack-check-prologue-17.c | 55 +++++++++++++++++++
|
||||
2 files changed, 58 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 72604dd9d..ba92a23a7 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
HOST_WIDE_INT guard_size
|
||||
= 1 << param_stack_clash_protection_guard_size;
|
||||
HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
|
||||
+ HOST_WIDE_INT byte_sp_alignment = STACK_BOUNDARY / BITS_PER_UNIT;
|
||||
+ gcc_assert (multiple_p (poly_size, byte_sp_alignment));
|
||||
HOST_WIDE_INT min_probe_threshold
|
||||
= (final_adjustment_p
|
||||
- ? guard_used_by_caller
|
||||
+ ? guard_used_by_caller + byte_sp_alignment
|
||||
: guard_size - guard_used_by_caller);
|
||||
/* When doing the final adjustment for the outgoing arguments, take into
|
||||
account any unprobed space there is above the current SP. There are
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
new file mode 100644
|
||||
index 000000000..0d8a25d73
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
@@ -0,0 +1,55 @@
|
||||
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||
+
|
||||
+void f(int, ...);
|
||||
+void g();
|
||||
+
|
||||
+/*
|
||||
+** test1:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1024
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test1(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test2:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1040
|
||||
+** str xzr, \[sp\]
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test2(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
409
0190-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
Normal file
409
0190-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
Normal file
@ -0,0 +1,409 @@
|
||||
From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:11 +0100
|
||||
Subject: [PATCH 098/157] [Backport][SME] aarch64: Put LR save probe in first
|
||||
16 bytes
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c
|
||||
|
||||
-fstack-clash-protection uses the save of LR as a probe for the next
|
||||
allocation. The next allocation could be:
|
||||
|
||||
* another part of the static frame, e.g. when allocating SVE save slots
|
||||
or outgoing arguments
|
||||
|
||||
* an alloca in the same function
|
||||
|
||||
* an allocation made by a callee function
|
||||
|
||||
However, when -fomit-frame-pointer is used, the LR save slot is placed
|
||||
above the other GPR save slots. It could therefore be up to 80 bytes
|
||||
above the base of the GPR save area (which is also the hard fp address).
|
||||
|
||||
aarch64_allocate_and_probe_stack_space took this into account when
|
||||
deciding how much subsequent space could be allocated without needing
|
||||
a probe. However, it interacted badly with:
|
||||
|
||||
/* If doing a small final adjustment, we always probe at offset 0.
|
||||
This is done to avoid issues when LR is not at position 0 or when
|
||||
the final adjustment is smaller than the probing offset. */
|
||||
else if (final_adjustment_p && rounded_size == 0)
|
||||
residual_probe_offset = 0;
|
||||
|
||||
which forces any allocation that is smaller than the guard page size
|
||||
to be probed at offset 0 rather than the usual offset 1024. It was
|
||||
therefore possible to construct cases in which we had:
|
||||
|
||||
* a probe using LR at SP + 80 bytes (or some other value >= 16)
|
||||
* an allocation of the guard page size - 16 bytes
|
||||
* a probe at SP + 0
|
||||
|
||||
which allocates guard page size + 64 consecutive unprobed bytes.
|
||||
|
||||
This patch requires the LR probe to be in the first 16 bytes of the
|
||||
save area when stack clash protection is active. Doing it
|
||||
unconditionally would cause code-quality regressions.
|
||||
|
||||
Putting LR before other registers prevents push/pop allocation
|
||||
when shadow call stacks are enabled, since LR is restored
|
||||
separately from the other callee-saved registers.
|
||||
|
||||
The new comment doesn't say that the probe register is required
|
||||
to be LR, since a later patch removes that restriction.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Ensure that
|
||||
the LR save slot is in the first 16 bytes of the register save area.
|
||||
Only form STP/LDP push/pop candidates if both registers are valid.
|
||||
(aarch64_allocate_and_probe_stack_space): Remove workaround for
|
||||
when LR was not in the first 16 bytes.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/stack-check-prologue-18.c: New test.
|
||||
* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
|
||||
* gcc.target/aarch64/stack-check-prologue-20.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 72 ++++++-------
|
||||
.../aarch64/stack-check-prologue-18.c | 100 ++++++++++++++++++
|
||||
.../aarch64/stack-check-prologue-19.c | 100 ++++++++++++++++++
|
||||
.../aarch64/stack-check-prologue-20.c | 3 +
|
||||
4 files changed, 233 insertions(+), 42 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index ba92a23a7..1ba4c2f89 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void)
|
||||
bool saves_below_hard_fp_p
|
||||
= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
|
||||
frame.bytes_below_hard_fp = offset;
|
||||
+
|
||||
+ auto allocate_gpr_slot = [&](unsigned int regno)
|
||||
+ {
|
||||
+ frame.reg_offset[regno] = offset;
|
||||
+ if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
||||
+ frame.wb_push_candidate1 = regno;
|
||||
+ else if (frame.wb_push_candidate2 == INVALID_REGNUM)
|
||||
+ frame.wb_push_candidate2 = regno;
|
||||
+ offset += UNITS_PER_WORD;
|
||||
+ };
|
||||
+
|
||||
if (frame.emit_frame_chain)
|
||||
{
|
||||
/* FP and LR are placed in the linkage record. */
|
||||
- frame.reg_offset[R29_REGNUM] = offset;
|
||||
- frame.wb_push_candidate1 = R29_REGNUM;
|
||||
- frame.reg_offset[R30_REGNUM] = offset + UNITS_PER_WORD;
|
||||
- frame.wb_push_candidate2 = R30_REGNUM;
|
||||
- offset += 2 * UNITS_PER_WORD;
|
||||
+ allocate_gpr_slot (R29_REGNUM);
|
||||
+ allocate_gpr_slot (R30_REGNUM);
|
||||
}
|
||||
+ else if (flag_stack_clash_protection
|
||||
+ && known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
|
||||
+ /* Put the LR save slot first, since it makes a good choice of probe
|
||||
+ for stack clash purposes. The idea is that the link register usually
|
||||
+ has to be saved before a call anyway, and so we lose little by
|
||||
+ stopping it from being individually shrink-wrapped. */
|
||||
+ allocate_gpr_slot (R30_REGNUM);
|
||||
|
||||
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
- {
|
||||
- frame.reg_offset[regno] = offset;
|
||||
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
||||
- frame.wb_push_candidate1 = regno;
|
||||
- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
|
||||
- frame.wb_push_candidate2 = regno;
|
||||
- offset += UNITS_PER_WORD;
|
||||
- }
|
||||
+ allocate_gpr_slot (regno);
|
||||
|
||||
poly_int64 max_int_offset = offset;
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void)
|
||||
max_push_offset to 0, because no registers are popped at this time,
|
||||
so callee_adjust cannot be adjusted. */
|
||||
HOST_WIDE_INT max_push_offset = 0;
|
||||
- if (frame.wb_pop_candidate2 != INVALID_REGNUM)
|
||||
- max_push_offset = 512;
|
||||
- else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
|
||||
- max_push_offset = 256;
|
||||
+ if (frame.wb_pop_candidate1 != INVALID_REGNUM)
|
||||
+ {
|
||||
+ if (frame.wb_pop_candidate2 != INVALID_REGNUM)
|
||||
+ max_push_offset = 512;
|
||||
+ else
|
||||
+ max_push_offset = 256;
|
||||
+ }
|
||||
|
||||
HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
|
||||
HOST_WIDE_INT const_saved_regs_size;
|
||||
@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
= (final_adjustment_p
|
||||
? guard_used_by_caller + byte_sp_alignment
|
||||
: guard_size - guard_used_by_caller);
|
||||
- /* When doing the final adjustment for the outgoing arguments, take into
|
||||
- account any unprobed space there is above the current SP. There are
|
||||
- two cases:
|
||||
-
|
||||
- - When saving SVE registers below the hard frame pointer, we force
|
||||
- the lowest save to take place in the prologue before doing the final
|
||||
- adjustment (i.e. we don't allow the save to be shrink-wrapped).
|
||||
- This acts as a probe at SP, so there is no unprobed space.
|
||||
-
|
||||
- - When there are no SVE register saves, we use the store of the link
|
||||
- register as a probe. We can't assume that LR was saved at position 0
|
||||
- though, so treat any space below it as unprobed. */
|
||||
- if (final_adjustment_p
|
||||
- && known_eq (frame.below_hard_fp_saved_regs_size, 0))
|
||||
- {
|
||||
- poly_int64 lr_offset = (frame.reg_offset[LR_REGNUM]
|
||||
- - frame.bytes_below_saved_regs);
|
||||
- if (known_ge (lr_offset, 0))
|
||||
- min_probe_threshold -= lr_offset.to_constant ();
|
||||
- else
|
||||
- gcc_assert (!flag_stack_clash_protection || known_eq (poly_size, 0));
|
||||
- }
|
||||
-
|
||||
poly_int64 frame_size = frame.frame_size;
|
||||
|
||||
/* We should always have a positive probe threshold. */
|
||||
@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
if (final_adjustment_p && rounded_size != 0)
|
||||
min_probe_threshold = 0;
|
||||
/* If doing a small final adjustment, we always probe at offset 0.
|
||||
- This is done to avoid issues when LR is not at position 0 or when
|
||||
- the final adjustment is smaller than the probing offset. */
|
||||
+ This is done to avoid issues when the final adjustment is smaller
|
||||
+ than the probing offset. */
|
||||
else if (final_adjustment_p && rounded_size == 0)
|
||||
residual_probe_offset = 0;
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
new file mode 100644
|
||||
index 000000000..82447d20f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
@@ -0,0 +1,100 @@
|
||||
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||
+
|
||||
+void f(int, ...);
|
||||
+void g();
|
||||
+
|
||||
+/*
|
||||
+** test1:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #4064
|
||||
+** str xzr, \[sp\]
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+** str x26, \[sp, #?4128\]
|
||||
+** ...
|
||||
+*/
|
||||
+int test1(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test2:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1040
|
||||
+** str xzr, \[sp\]
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test2(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test3:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1024
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test3(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
new file mode 100644
|
||||
index 000000000..73ac3e4e4
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
@@ -0,0 +1,100 @@
|
||||
+/* { dg-options "-O2 -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||
+
|
||||
+void f(int, ...);
|
||||
+void g();
|
||||
+
|
||||
+/*
|
||||
+** test1:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #4064
|
||||
+** str xzr, \[sp\]
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+** str x26, \[sp, #?4128\]
|
||||
+** ...
|
||||
+*/
|
||||
+int test1(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test2:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1040
|
||||
+** str xzr, \[sp\]
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test2(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test3:
|
||||
+** ...
|
||||
+** str x30, \[sp\]
|
||||
+** sub sp, sp, #1024
|
||||
+** cbnz w0, .*
|
||||
+** bl g
|
||||
+** ...
|
||||
+*/
|
||||
+int test3(int z) {
|
||||
+ __uint128_t x = 0;
|
||||
+ int y[0x400];
|
||||
+ if (z)
|
||||
+ {
|
||||
+ asm volatile ("" :::
|
||||
+ "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26");
|
||||
+ f(0, 0, 0, 0, 0, 0, 0, &y,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,
|
||||
+ x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x);
|
||||
+ }
|
||||
+ g();
|
||||
+ return 1;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
|
||||
new file mode 100644
|
||||
index 000000000..690aae8df
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
|
||||
@@ -0,0 +1,3 @@
|
||||
+/* { dg-options "-O2 -fstack-protector-all -fstack-clash-protection -fomit-frame-pointer --param stack-clash-protection-guard-size=12 -fsanitize=shadow-call-stack -ffixed-x18" } */
|
||||
+
|
||||
+#include "stack-check-prologue-19.c"
|
||||
--
|
||||
2.33.0
|
||||
|
||||
126
0191-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
Normal file
126
0191-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
Normal file
@ -0,0 +1,126 @@
|
||||
From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:11 +0100
|
||||
Subject: [PATCH 099/157] [Backport][SME] aarch64: Simplify probe of final
|
||||
frame allocation
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae
|
||||
|
||||
Previous patches ensured that the final frame allocation only needs
|
||||
a probe when the size is strictly greater than 1KiB. It's therefore
|
||||
safe to use the normal 1024 probe offset in all cases.
|
||||
|
||||
The main motivation for doing this is to simplify the code and
|
||||
remove the number of special cases.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_allocate_and_probe_stack_space):
|
||||
Always probe the residual allocation at offset 1024, asserting
|
||||
that that is in range.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/stack-check-prologue-17.c: Expect the probe
|
||||
to be at offset 1024 rather than offset 0.
|
||||
* gcc.target/aarch64/stack-check-prologue-18.c: Likewise.
|
||||
* gcc.target/aarch64/stack-check-prologue-19.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 12 ++++--------
|
||||
.../gcc.target/aarch64/stack-check-prologue-17.c | 2 +-
|
||||
.../gcc.target/aarch64/stack-check-prologue-18.c | 4 ++--
|
||||
.../gcc.target/aarch64/stack-check-prologue-19.c | 4 ++--
|
||||
4 files changed, 9 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 1ba4c2f89..6d835dc8f 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
are still safe. */
|
||||
if (residual)
|
||||
{
|
||||
- HOST_WIDE_INT residual_probe_offset = guard_used_by_caller;
|
||||
+ gcc_assert (guard_used_by_caller + byte_sp_alignment <= size);
|
||||
+
|
||||
/* If we're doing final adjustments, and we've done any full page
|
||||
allocations then any residual needs to be probed. */
|
||||
if (final_adjustment_p && rounded_size != 0)
|
||||
min_probe_threshold = 0;
|
||||
- /* If doing a small final adjustment, we always probe at offset 0.
|
||||
- This is done to avoid issues when the final adjustment is smaller
|
||||
- than the probing offset. */
|
||||
- else if (final_adjustment_p && rounded_size == 0)
|
||||
- residual_probe_offset = 0;
|
||||
|
||||
aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
|
||||
if (residual >= min_probe_threshold)
|
||||
@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
|
||||
HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
|
||||
"\n", residual);
|
||||
|
||||
- emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
- residual_probe_offset));
|
||||
+ emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
|
||||
+ guard_used_by_caller));
|
||||
emit_insn (gen_blockage ());
|
||||
}
|
||||
}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
index 0d8a25d73..f0ec13897 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
|
||||
@@ -33,7 +33,7 @@ int test1(int z) {
|
||||
** ...
|
||||
** str x30, \[sp\]
|
||||
** sub sp, sp, #1040
|
||||
-** str xzr, \[sp\]
|
||||
+** str xzr, \[sp, #?1024\]
|
||||
** cbnz w0, .*
|
||||
** bl g
|
||||
** ...
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
index 82447d20f..6383bec5e 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
|
||||
@@ -9,7 +9,7 @@ void g();
|
||||
** ...
|
||||
** str x30, \[sp\]
|
||||
** sub sp, sp, #4064
|
||||
-** str xzr, \[sp\]
|
||||
+** str xzr, \[sp, #?1024\]
|
||||
** cbnz w0, .*
|
||||
** bl g
|
||||
** ...
|
||||
@@ -50,7 +50,7 @@ int test1(int z) {
|
||||
** ...
|
||||
** str x30, \[sp\]
|
||||
** sub sp, sp, #1040
|
||||
-** str xzr, \[sp\]
|
||||
+** str xzr, \[sp, #?1024\]
|
||||
** cbnz w0, .*
|
||||
** bl g
|
||||
** ...
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
index 73ac3e4e4..562039b5e 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
|
||||
@@ -9,7 +9,7 @@ void g();
|
||||
** ...
|
||||
** str x30, \[sp\]
|
||||
** sub sp, sp, #4064
|
||||
-** str xzr, \[sp\]
|
||||
+** str xzr, \[sp, #?1024\]
|
||||
** cbnz w0, .*
|
||||
** bl g
|
||||
** ...
|
||||
@@ -50,7 +50,7 @@ int test1(int z) {
|
||||
** ...
|
||||
** str x30, \[sp\]
|
||||
** sub sp, sp, #1040
|
||||
-** str xzr, \[sp\]
|
||||
+** str xzr, \[sp, #?1024\]
|
||||
** cbnz w0, .*
|
||||
** bl g
|
||||
** ...
|
||||
--
|
||||
2.33.0
|
||||
|
||||
280
0192-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
Normal file
280
0192-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
Normal file
@ -0,0 +1,280 @@
|
||||
From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:12 +0100
|
||||
Subject: [PATCH 100/157] [Backport][SME] aarch64: Explicitly record probe
|
||||
registers in frame info
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f
|
||||
|
||||
The stack frame is currently divided into three areas:
|
||||
|
||||
A: the area above the hard frame pointer
|
||||
B: the SVE saves below the hard frame pointer
|
||||
C: the outgoing arguments
|
||||
|
||||
If the stack frame is allocated in one chunk, the allocation needs a
|
||||
probe if the frame size is >= guard_size - 1KiB. In addition, if the
|
||||
function is not a leaf function, it must probe an address no more than
|
||||
1KiB above the outgoing SP. We ensured the second condition by
|
||||
|
||||
(1) using single-chunk allocations for non-leaf functions only if
|
||||
the link register save slot is within 512 bytes of the bottom
|
||||
of the frame; and
|
||||
|
||||
(2) using the link register save as a probe (meaning, for instance,
|
||||
that it can't be individually shrink wrapped)
|
||||
|
||||
If instead the stack is allocated in multiple chunks, then:
|
||||
|
||||
* an allocation involving only the outgoing arguments (C above) requires
|
||||
a probe if the allocation size is > 1KiB
|
||||
|
||||
* any other allocation requires a probe if the allocation size
|
||||
is >= guard_size - 1KiB
|
||||
|
||||
* second and subsequent allocations require the previous allocation
|
||||
to probe at the bottom of the allocated area, regardless of the size
|
||||
of that previous allocation
|
||||
|
||||
The final point means that, unlike for single allocations,
|
||||
it can be necessary to have both a non-SVE register probe and
|
||||
an SVE register probe. For example:
|
||||
|
||||
* allocate A, probe using a non-SVE register save
|
||||
* allocate B, probe using an SVE register save
|
||||
* allocate C
|
||||
|
||||
The non-SVE register used in this case was again the link register.
|
||||
It was previously used even if the link register save slot was some
|
||||
bytes above the bottom of the non-SVE register saves, but an earlier
|
||||
patch avoided that by putting the link register save slot first.
|
||||
|
||||
As a belt-and-braces fix, this patch explicitly records which
|
||||
probe registers we're using and allows the non-SVE probe to be
|
||||
whichever register comes first (as for SVE).
|
||||
|
||||
The patch also avoids unnecessary probes in sve/pcs/stack_clash_3.c.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::sve_save_and_probe)
|
||||
(aarch64_frame::hard_fp_save_and_probe): New fields.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize them.
|
||||
Rather than asserting that a leaf function saves LR, instead assert
|
||||
that a leaf function saves something.
|
||||
(aarch64_get_separate_components): Prevent the chosen probe
|
||||
registers from being individually shrink-wrapped.
|
||||
(aarch64_allocate_and_probe_stack_space): Remove workaround for
|
||||
probe registers that aren't at the bottom of the previous allocation.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_3.c: Avoid redundant probes.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 68 +++++++++++++++----
|
||||
gcc/config/aarch64/aarch64.h | 8 +++
|
||||
.../aarch64/sve/pcs/stack_clash_3.c | 6 +-
|
||||
3 files changed, 64 insertions(+), 18 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 6d835dc8f..dd80ceba8 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void)
|
||||
&& !crtl->abi->clobbers_full_reg_p (regno))
|
||||
frame.reg_offset[regno] = SLOT_REQUIRED;
|
||||
|
||||
- /* With stack-clash, LR must be saved in non-leaf functions. The saving of
|
||||
- LR counts as an implicit probe which allows us to maintain the invariant
|
||||
- described in the comment at expand_prologue. */
|
||||
- gcc_assert (crtl->is_leaf
|
||||
- || maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
|
||||
|
||||
poly_int64 offset = crtl->outgoing_args_size;
|
||||
gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
frame.bytes_below_saved_regs = offset;
|
||||
+ frame.sve_save_and_probe = INVALID_REGNUM;
|
||||
|
||||
/* Now assign stack slots for the registers. Start with the predicate
|
||||
registers, since predicate LDR and STR have a relatively small
|
||||
@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void)
|
||||
for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
+ if (frame.sve_save_and_probe == INVALID_REGNUM)
|
||||
+ frame.sve_save_and_probe = regno;
|
||||
frame.reg_offset[regno] = offset;
|
||||
offset += BYTES_PER_SVE_PRED;
|
||||
}
|
||||
@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void)
|
||||
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
+ if (frame.sve_save_and_probe == INVALID_REGNUM)
|
||||
+ frame.sve_save_and_probe = regno;
|
||||
frame.reg_offset[regno] = offset;
|
||||
offset += vector_save_size;
|
||||
}
|
||||
@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void)
|
||||
frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
bool saves_below_hard_fp_p
|
||||
= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
|
||||
+ gcc_assert (!saves_below_hard_fp_p
|
||||
+ || (frame.sve_save_and_probe != INVALID_REGNUM
|
||||
+ && known_eq (frame.reg_offset[frame.sve_save_and_probe],
|
||||
+ frame.bytes_below_saved_regs)));
|
||||
+
|
||||
frame.bytes_below_hard_fp = offset;
|
||||
+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
|
||||
|
||||
auto allocate_gpr_slot = [&](unsigned int regno)
|
||||
{
|
||||
+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
||||
+ frame.hard_fp_save_and_probe = regno;
|
||||
frame.reg_offset[regno] = offset;
|
||||
if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
||||
frame.wb_push_candidate1 = regno;
|
||||
@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void)
|
||||
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
+ if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
||||
+ frame.hard_fp_save_and_probe = regno;
|
||||
/* If there is an alignment gap between integer and fp callee-saves,
|
||||
allocate the last fp register to it if possible. */
|
||||
if (regno == last_fp_reg
|
||||
@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void)
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
|
||||
frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
+ gcc_assert (known_eq (frame.saved_regs_size,
|
||||
+ frame.below_hard_fp_saved_regs_size)
|
||||
+ || (frame.hard_fp_save_and_probe != INVALID_REGNUM
|
||||
+ && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
|
||||
+ frame.bytes_below_hard_fp)));
|
||||
+
|
||||
+ /* With stack-clash, a register must be saved in non-leaf functions.
|
||||
+ The saving of the bottommost register counts as an implicit probe,
|
||||
+ which allows us to maintain the invariant described in the comment
|
||||
+ at expand_prologue. */
|
||||
+ gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
|
||||
|
||||
offset += get_frame_size ();
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void)
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
|
||||
+ /* The frame is allocated in pieces, with each non-final piece
|
||||
+ including a register save at offset 0 that acts as a probe for
|
||||
+ the following piece. In addition, the save of the bottommost register
|
||||
+ acts as a probe for callees and allocas. Roll back any probes that
|
||||
+ aren't needed.
|
||||
+
|
||||
+ A probe isn't needed if it is associated with the final allocation
|
||||
+ (including callees and allocas) that happens before the epilogue is
|
||||
+ executed. */
|
||||
+ if (crtl->is_leaf
|
||||
+ && !cfun->calls_alloca
|
||||
+ && known_eq (frame.final_adjust, 0))
|
||||
+ {
|
||||
+ if (maybe_ne (frame.sve_callee_adjust, 0))
|
||||
+ frame.sve_save_and_probe = INVALID_REGNUM;
|
||||
+ else
|
||||
+ frame.hard_fp_save_and_probe = INVALID_REGNUM;
|
||||
+ }
|
||||
+
|
||||
/* Make sure the individual adjustments add up to the full frame size. */
|
||||
gcc_assert (known_eq (frame.initial_adjust
|
||||
+ frame.callee_adjust
|
||||
@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void)
|
||||
|
||||
poly_int64 offset = frame.reg_offset[regno];
|
||||
|
||||
- /* If the register is saved in the first SVE save slot, we use
|
||||
- it as a stack probe for -fstack-clash-protection. */
|
||||
- if (flag_stack_clash_protection
|
||||
- && maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
|
||||
- && known_eq (offset, frame.bytes_below_saved_regs))
|
||||
- continue;
|
||||
-
|
||||
/* Get the offset relative to the register we'll use. */
|
||||
if (frame_pointer_needed)
|
||||
offset -= frame.bytes_below_hard_fp;
|
||||
@@ -9680,6 +9713,13 @@ aarch64_get_separate_components (void)
|
||||
|
||||
bitmap_clear_bit (components, LR_REGNUM);
|
||||
bitmap_clear_bit (components, SP_REGNUM);
|
||||
+ if (flag_stack_clash_protection)
|
||||
+ {
|
||||
+ if (frame.sve_save_and_probe != INVALID_REGNUM)
|
||||
+ bitmap_clear_bit (components, frame.sve_save_and_probe);
|
||||
+ if (frame.hard_fp_save_and_probe != INVALID_REGNUM)
|
||||
+ bitmap_clear_bit (components, frame.hard_fp_save_and_probe);
|
||||
+ }
|
||||
|
||||
return components;
|
||||
}
|
||||
@@ -10226,8 +10266,8 @@ aarch64_epilogue_uses (int regno)
|
||||
When probing is needed, we emit a probe at the start of the prologue
|
||||
and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
|
||||
|
||||
- We have to track how much space has been allocated and the only stores
|
||||
- to the stack we track as implicit probes are the FP/LR stores.
|
||||
+ We can also use register saves as probes. These are stored in
|
||||
+ sve_save_and_probe and hard_fp_save_and_probe.
|
||||
|
||||
For outgoing arguments we probe if the size is larger than 1KB, such that
|
||||
the ABI specified buffer is maintained for the next callee.
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index f340237d0..af480d9e8 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -887,6 +887,14 @@ struct GTY (()) aarch64_frame
|
||||
This is the register they should use. */
|
||||
unsigned spare_pred_reg;
|
||||
|
||||
+ /* An SVE register that is saved below the hard frame pointer and that acts
|
||||
+ as a probe for later allocations, or INVALID_REGNUM if none. */
|
||||
+ unsigned sve_save_and_probe;
|
||||
+
|
||||
+ /* A register that is saved at the hard frame pointer and that acts
|
||||
+ as a probe for later allocations, or INVALID_REGNUM if none. */
|
||||
+ unsigned hard_fp_save_and_probe;
|
||||
+
|
||||
bool laid_out;
|
||||
|
||||
/* True if shadow call stack should be enabled for the current function. */
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
|
||||
index 3e01ec36c..3530a0d50 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
|
||||
@@ -11,11 +11,10 @@
|
||||
** mov x11, sp
|
||||
** ...
|
||||
** sub sp, sp, x13
|
||||
-** str p4, \[sp\]
|
||||
** cbz w0, [^\n]*
|
||||
+** str p4, \[sp\]
|
||||
** ...
|
||||
** ptrue p0\.b, all
|
||||
-** ldr p4, \[sp\]
|
||||
** addvl sp, sp, #1
|
||||
** ldr x24, \[sp\], 32
|
||||
** ret
|
||||
@@ -39,13 +38,12 @@ test_1 (int n)
|
||||
** mov x11, sp
|
||||
** ...
|
||||
** sub sp, sp, x13
|
||||
-** str p4, \[sp\]
|
||||
** cbz w0, [^\n]*
|
||||
+** str p4, \[sp\]
|
||||
** str p5, \[sp, #1, mul vl\]
|
||||
** str p6, \[sp, #2, mul vl\]
|
||||
** ...
|
||||
** ptrue p0\.b, all
|
||||
-** ldr p4, \[sp\]
|
||||
** addvl sp, sp, #1
|
||||
** ldr x24, \[sp\], 32
|
||||
** ret
|
||||
--
|
||||
2.33.0
|
||||
|
||||
160
0193-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
Normal file
160
0193-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
Normal file
@ -0,0 +1,160 @@
|
||||
From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:12 +0100
|
||||
Subject: [PATCH 101/157] [Backport][SME] aarch64: Remove
|
||||
below_hard_fp_saved_regs_size
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1
|
||||
|
||||
After previous patches, it's no longer necessary to store
|
||||
saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
|
||||
All measurements instead use the top or bottom of the frame as
|
||||
reference points.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame::saved_regs_size)
|
||||
(aarch64_frame::below_hard_fp_saved_regs_size): Delete.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Update accordingly.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++++-------------------
|
||||
gcc/config/aarch64/aarch64.h | 7 ------
|
||||
2 files changed, 21 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index dd80ceba8..0894ed325 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void)
|
||||
|
||||
/* OFFSET is now the offset of the hard frame pointer from the bottom
|
||||
of the callee save area. */
|
||||
- frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
- bool saves_below_hard_fp_p
|
||||
- = maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
|
||||
+ auto below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
+ bool saves_below_hard_fp_p = maybe_ne (below_hard_fp_saved_regs_size, 0);
|
||||
gcc_assert (!saves_below_hard_fp_p
|
||||
|| (frame.sve_save_and_probe != INVALID_REGNUM
|
||||
&& known_eq (frame.reg_offset[frame.sve_save_and_probe],
|
||||
@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void)
|
||||
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
|
||||
- frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
- gcc_assert (known_eq (frame.saved_regs_size,
|
||||
- frame.below_hard_fp_saved_regs_size)
|
||||
+ auto saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
|
||||
|| (frame.hard_fp_save_and_probe != INVALID_REGNUM
|
||||
&& known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
|
||||
frame.bytes_below_hard_fp)));
|
||||
@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void)
|
||||
The saving of the bottommost register counts as an implicit probe,
|
||||
which allows us to maintain the invariant described in the comment
|
||||
at expand_prologue. */
|
||||
- gcc_assert (crtl->is_leaf || maybe_ne (frame.saved_regs_size, 0));
|
||||
+ gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
|
||||
|
||||
offset += get_frame_size ();
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void)
|
||||
|
||||
HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
|
||||
HOST_WIDE_INT const_saved_regs_size;
|
||||
- if (known_eq (frame.saved_regs_size, 0))
|
||||
+ if (known_eq (saved_regs_size, 0))
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
else if (frame.frame_size.is_constant (&const_size)
|
||||
&& const_size < max_push_offset
|
||||
@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void)
|
||||
frame.callee_adjust = const_size;
|
||||
}
|
||||
else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
|
||||
- && frame.saved_regs_size.is_constant (&const_saved_regs_size)
|
||||
+ && saved_regs_size.is_constant (&const_saved_regs_size)
|
||||
&& const_below_saved_regs + const_saved_regs_size < 512
|
||||
/* We could handle this case even with data below the saved
|
||||
registers, provided that that data left us with valid offsets
|
||||
@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void)
|
||||
frame.initial_adjust = frame.frame_size;
|
||||
}
|
||||
else if (saves_below_hard_fp_p
|
||||
- && known_eq (frame.saved_regs_size,
|
||||
- frame.below_hard_fp_saved_regs_size))
|
||||
+ && known_eq (saved_regs_size, below_hard_fp_saved_regs_size))
|
||||
{
|
||||
/* Frame in which all saves are SVE saves:
|
||||
|
||||
@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void)
|
||||
[save SVE registers relative to SP]
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
frame.callee_adjust = const_above_fp;
|
||||
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
else
|
||||
@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void)
|
||||
[save SVE registers relative to SP]
|
||||
sub sp, sp, bytes_below_saved_regs */
|
||||
frame.initial_adjust = frame.bytes_above_hard_fp;
|
||||
- frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
|
||||
+ frame.sve_callee_adjust = below_hard_fp_saved_regs_size;
|
||||
frame.final_adjust = frame.bytes_below_saved_regs;
|
||||
}
|
||||
|
||||
@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno)
|
||||
| local variables | <-- frame_pointer_rtx
|
||||
| |
|
||||
+-------------------------------+
|
||||
- | padding | \
|
||||
- +-------------------------------+ |
|
||||
- | callee-saved registers | | frame.saved_regs_size
|
||||
- +-------------------------------+ |
|
||||
- | LR' | |
|
||||
- +-------------------------------+ |
|
||||
- | FP' | |
|
||||
- +-------------------------------+ |<- hard_frame_pointer_rtx (aligned)
|
||||
- | SVE vector registers | | \
|
||||
- +-------------------------------+ | | below_hard_fp_saved_regs_size
|
||||
- | SVE predicate registers | / /
|
||||
+ | padding |
|
||||
+ +-------------------------------+
|
||||
+ | callee-saved registers |
|
||||
+ +-------------------------------+
|
||||
+ | LR' |
|
||||
+ +-------------------------------+
|
||||
+ | FP' |
|
||||
+ +-------------------------------+ <-- hard_frame_pointer_rtx (aligned)
|
||||
+ | SVE vector registers |
|
||||
+ +-------------------------------+
|
||||
+ | SVE predicate registers |
|
||||
+-------------------------------+
|
||||
| dynamic allocation |
|
||||
+-------------------------------+
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index af480d9e8..292ef2eec 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame
|
||||
STACK_BOUNDARY. */
|
||||
HOST_WIDE_INT saved_varargs_size;
|
||||
|
||||
- /* The size of the callee-save registers with a slot in REG_OFFSET. */
|
||||
- poly_int64 saved_regs_size;
|
||||
-
|
||||
/* The number of bytes between the bottom of the static frame (the bottom
|
||||
of the outgoing arguments) and the bottom of the register save area.
|
||||
This value is always a multiple of STACK_BOUNDARY. */
|
||||
poly_int64 bytes_below_saved_regs;
|
||||
|
||||
- /* The size of the callee-save registers with a slot in REG_OFFSET that
|
||||
- are saved below the hard frame pointer. */
|
||||
- poly_int64 below_hard_fp_saved_regs_size;
|
||||
-
|
||||
/* The number of bytes between the bottom of the static frame (the bottom
|
||||
of the outgoing arguments) and the hard frame pointer. This value is
|
||||
always a multiple of STACK_BOUNDARY. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
301
0194-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
Normal file
301
0194-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
Normal file
@ -0,0 +1,301 @@
|
||||
From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 12 Sep 2023 16:05:13 +0100
|
||||
Subject: [PATCH 102/157] [Backport][SME] aarch64: Make stack smash canary
|
||||
protect saved registers
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf
|
||||
|
||||
AArch64 normally puts the saved registers near the bottom of the frame,
|
||||
immediately above any dynamic allocations. But this means that a
|
||||
stack-smash attack on those dynamic allocations could overwrite the
|
||||
saved registers without needing to reach as far as the stack smash
|
||||
canary.
|
||||
|
||||
The same thing could also happen for variable-sized arguments that are
|
||||
passed by value, since those are allocated before a call and popped on
|
||||
return.
|
||||
|
||||
This patch avoids that by putting the locals (and thus the canary) below
|
||||
the saved registers when stack smash protection is active.
|
||||
|
||||
The patch fixes CVE-2023-4039.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.cc (aarch64_save_regs_above_locals_p):
|
||||
New function.
|
||||
(aarch64_layout_frame): Use it to decide whether locals should
|
||||
go above or below the saved registers.
|
||||
(aarch64_expand_prologue): Update stack layout comment.
|
||||
Emit a stack tie after the final adjustment.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/stack-protector-8.c: New test.
|
||||
* gcc.target/aarch64/stack-protector-9.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 46 +++++++--
|
||||
.../gcc.target/aarch64/stack-protector-8.c | 95 +++++++++++++++++++
|
||||
.../gcc.target/aarch64/stack-protector-9.c | 33 +++++++
|
||||
3 files changed, 168 insertions(+), 6 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 0894ed325..8d4dd2891 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void)
|
||||
return aarch64_use_frame_pointer;
|
||||
}
|
||||
|
||||
+/* Return true if the current function should save registers above
|
||||
+ the locals area, rather than below it. */
|
||||
+
|
||||
+static bool
|
||||
+aarch64_save_regs_above_locals_p ()
|
||||
+{
|
||||
+ /* When using stack smash protection, make sure that the canary slot
|
||||
+ comes between the locals and the saved registers. Otherwise,
|
||||
+ it would be possible for a carefully sized smash attack to change
|
||||
+ the saved registers (particularly LR and FP) without reaching the
|
||||
+ canary. */
|
||||
+ return crtl->stack_protect_guard;
|
||||
+}
|
||||
+
|
||||
/* Mark the registers that need to be saved by the callee and calculate
|
||||
the size of the callee-saved registers area and frame record (both FP
|
||||
and LR may be omitted). */
|
||||
@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void)
|
||||
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
|
||||
bool frame_related_fp_reg_p = false;
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
+ poly_int64 top_of_locals = -1;
|
||||
|
||||
frame.emit_frame_chain = aarch64_needs_frame_chain ();
|
||||
|
||||
@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void)
|
||||
&& !crtl->abi->clobbers_full_reg_p (regno))
|
||||
frame.reg_offset[regno] = SLOT_REQUIRED;
|
||||
|
||||
+ bool regs_at_top_p = aarch64_save_regs_above_locals_p ();
|
||||
|
||||
poly_int64 offset = crtl->outgoing_args_size;
|
||||
gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
+ if (regs_at_top_p)
|
||||
+ {
|
||||
+ offset += get_frame_size ();
|
||||
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
+ top_of_locals = offset;
|
||||
+ }
|
||||
frame.bytes_below_saved_regs = offset;
|
||||
frame.sve_save_and_probe = INVALID_REGNUM;
|
||||
|
||||
@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void)
|
||||
at expand_prologue. */
|
||||
gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
|
||||
|
||||
- offset += get_frame_size ();
|
||||
- offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
- auto top_of_locals = offset;
|
||||
-
|
||||
+ if (!regs_at_top_p)
|
||||
+ {
|
||||
+ offset += get_frame_size ();
|
||||
+ offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
+ top_of_locals = offset;
|
||||
+ }
|
||||
offset += frame.saved_varargs_size;
|
||||
gcc_assert (multiple_p (offset, STACK_BOUNDARY / BITS_PER_UNIT));
|
||||
frame.frame_size = offset;
|
||||
|
||||
frame.bytes_above_hard_fp = frame.frame_size - frame.bytes_below_hard_fp;
|
||||
+ gcc_assert (known_ge (top_of_locals, 0));
|
||||
frame.bytes_above_locals = frame.frame_size - top_of_locals;
|
||||
|
||||
frame.initial_adjust = 0;
|
||||
@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno)
|
||||
| for register varargs |
|
||||
| |
|
||||
+-------------------------------+
|
||||
- | local variables | <-- frame_pointer_rtx
|
||||
+ | local variables (1) | <-- frame_pointer_rtx
|
||||
| |
|
||||
+-------------------------------+
|
||||
- | padding |
|
||||
+ | padding (1) |
|
||||
+-------------------------------+
|
||||
| callee-saved registers |
|
||||
+-------------------------------+
|
||||
@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno)
|
||||
+-------------------------------+
|
||||
| SVE predicate registers |
|
||||
+-------------------------------+
|
||||
+ | local variables (2) |
|
||||
+ +-------------------------------+
|
||||
+ | padding (2) |
|
||||
+ +-------------------------------+
|
||||
| dynamic allocation |
|
||||
+-------------------------------+
|
||||
| padding |
|
||||
@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno)
|
||||
+-------------------------------+
|
||||
| | <-- stack_pointer_rtx (aligned)
|
||||
|
||||
+ The regions marked (1) and (2) are mutually exclusive. (2) is used
|
||||
+ when aarch64_save_regs_above_locals_p is true.
|
||||
+
|
||||
Dynamic stack allocations via alloca() decrease stack_pointer_rtx
|
||||
but leave frame_pointer_rtx and hard_frame_pointer_rtx
|
||||
unchanged.
|
||||
@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void)
|
||||
gcc_assert (known_eq (bytes_below_sp, final_adjust));
|
||||
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
|
||||
!frame_pointer_needed, true);
|
||||
+ if (emit_frame_chain && maybe_ne (final_adjust, 0))
|
||||
+ aarch64_emit_stack_tie (hard_frame_pointer_rtx);
|
||||
}
|
||||
|
||||
/* Return TRUE if we can use a simple_return insn.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
|
||||
new file mode 100644
|
||||
index 000000000..e71d820e3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
|
||||
@@ -0,0 +1,95 @@
|
||||
+/* { dg-options " -O -fstack-protector-strong -mstack-protector-guard=sysreg -mstack-protector-guard-reg=tpidr2_el0 -mstack-protector-guard-offset=16" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||
+
|
||||
+void g(void *);
|
||||
+__SVBool_t *h(void *);
|
||||
+
|
||||
+/*
|
||||
+** test1:
|
||||
+** sub sp, sp, #288
|
||||
+** stp x29, x30, \[sp, #?272\]
|
||||
+** add x29, sp, #?272
|
||||
+** mrs (x[0-9]+), tpidr2_el0
|
||||
+** ldr (x[0-9]+), \[\1, #?16\]
|
||||
+** str \2, \[sp, #?264\]
|
||||
+** mov \2, #?0
|
||||
+** add x0, sp, #?8
|
||||
+** bl g
|
||||
+** ...
|
||||
+** mrs .*
|
||||
+** ...
|
||||
+** bne .*
|
||||
+** ...
|
||||
+** ldp x29, x30, \[sp, #?272\]
|
||||
+** add sp, sp, #?288
|
||||
+** ret
|
||||
+** bl __stack_chk_fail
|
||||
+*/
|
||||
+int test1() {
|
||||
+ int y[0x40];
|
||||
+ g(y);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+/*
|
||||
+** test2:
|
||||
+** stp x29, x30, \[sp, #?-16\]!
|
||||
+** mov x29, sp
|
||||
+** sub sp, sp, #1040
|
||||
+** mrs (x[0-9]+), tpidr2_el0
|
||||
+** ldr (x[0-9]+), \[\1, #?16\]
|
||||
+** str \2, \[sp, #?1032\]
|
||||
+** mov \2, #?0
|
||||
+** add x0, sp, #?8
|
||||
+** bl g
|
||||
+** ...
|
||||
+** mrs .*
|
||||
+** ...
|
||||
+** bne .*
|
||||
+** ...
|
||||
+** add sp, sp, #?1040
|
||||
+** ldp x29, x30, \[sp\], #?16
|
||||
+** ret
|
||||
+** bl __stack_chk_fail
|
||||
+*/
|
||||
+int test2() {
|
||||
+ int y[0x100];
|
||||
+ g(y);
|
||||
+ return 1;
|
||||
+}
|
||||
+
|
||||
+#pragma GCC target "+sve"
|
||||
+
|
||||
+/*
|
||||
+** test3:
|
||||
+** stp x29, x30, \[sp, #?-16\]!
|
||||
+** mov x29, sp
|
||||
+** addvl sp, sp, #-18
|
||||
+** ...
|
||||
+** str p4, \[sp\]
|
||||
+** ...
|
||||
+** sub sp, sp, #272
|
||||
+** mrs (x[0-9]+), tpidr2_el0
|
||||
+** ldr (x[0-9]+), \[\1, #?16\]
|
||||
+** str \2, \[sp, #?264\]
|
||||
+** mov \2, #?0
|
||||
+** add x0, sp, #?8
|
||||
+** bl h
|
||||
+** ...
|
||||
+** mrs .*
|
||||
+** ...
|
||||
+** bne .*
|
||||
+** ...
|
||||
+** add sp, sp, #?272
|
||||
+** ...
|
||||
+** ldr p4, \[sp\]
|
||||
+** ...
|
||||
+** addvl sp, sp, #18
|
||||
+** ldp x29, x30, \[sp\], #?16
|
||||
+** ret
|
||||
+** bl __stack_chk_fail
|
||||
+*/
|
||||
+__SVBool_t test3() {
|
||||
+ int y[0x40];
|
||||
+ return *h(y);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
|
||||
new file mode 100644
|
||||
index 000000000..58f322aa4
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
|
||||
@@ -0,0 +1,33 @@
|
||||
+/* { dg-options "-O2 -mcpu=neoverse-v1 -fstack-protector-all" } */
|
||||
+/* { dg-final { check-function-bodies "**" "" } } */
|
||||
+
|
||||
+/*
|
||||
+** main:
|
||||
+** ...
|
||||
+** stp x29, x30, \[sp, #?-[0-9]+\]!
|
||||
+** ...
|
||||
+** sub sp, sp, #[0-9]+
|
||||
+** ...
|
||||
+** str x[0-9]+, \[x29, #?-8\]
|
||||
+** ...
|
||||
+*/
|
||||
+int f(const char *);
|
||||
+void g(void *);
|
||||
+int main(int argc, char* argv[])
|
||||
+{
|
||||
+ int a;
|
||||
+ int b;
|
||||
+ char c[2+f(argv[1])];
|
||||
+ int d[0x100];
|
||||
+ char y;
|
||||
+
|
||||
+ y=42; a=4; b=10;
|
||||
+ c[0] = 'h'; c[1] = '\0';
|
||||
+
|
||||
+ c[f(argv[2])] = '\0';
|
||||
+
|
||||
+ __builtin_printf("%d %d\n%s\n", a, b, c);
|
||||
+ g(d);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
201
0195-Backport-SME-Handle-epilogues-that-contain-jumps.patch
Normal file
201
0195-Backport-SME-Handle-epilogues-that-contain-jumps.patch
Normal file
@ -0,0 +1,201 @@
|
||||
From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 17 Oct 2023 23:45:43 +0100
|
||||
Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e
|
||||
|
||||
The prologue/epilogue pass allows the prologue sequence to contain
|
||||
jumps. The sequence is then partitioned into basic blocks using
|
||||
find_many_sub_basic_blocks.
|
||||
|
||||
This patch treats epilogues in a similar way. Since only one block
|
||||
might need to be split, the patch (re)introduces a find_sub_basic_blocks
|
||||
routine to handle a single block.
|
||||
|
||||
The new routine hard-codes the assumption that split_block will chain
|
||||
the new block immediately after the original block. The routine doesn't
|
||||
try to replicate the fix for PR81030, since that was specific to
|
||||
gimple->rtl expansion.
|
||||
|
||||
The patch is needed for follow-on aarch64 patches that add conditional
|
||||
code to the epilogue. The tests are part of those patches.
|
||||
|
||||
gcc/
|
||||
* cfgbuild.h (find_sub_basic_blocks): Declare.
|
||||
* cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
|
||||
split out from...
|
||||
(find_many_sub_basic_blocks): ...here.
|
||||
(find_sub_basic_blocks): New function.
|
||||
* function.cc (thread_prologue_and_epilogue_insns): Handle
|
||||
epilogues that contain jumps.
|
||||
---
|
||||
gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
|
||||
gcc/cfgbuild.h | 1 +
|
||||
gcc/function.cc | 4 +++
|
||||
3 files changed, 70 insertions(+), 30 deletions(-)
|
||||
|
||||
diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
|
||||
index 646a06614..58b865f29 100644
|
||||
--- a/gcc/cfgbuild.cc
|
||||
+++ b/gcc/cfgbuild.cc
|
||||
@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
|
||||
}
|
||||
}
|
||||
|
||||
+/* Update the profile information for BB, which was created by splitting
|
||||
+ an RTL block that had a non-final jump. */
|
||||
+
|
||||
+static void
|
||||
+update_profile_for_new_sub_basic_block (basic_block bb)
|
||||
+{
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+
|
||||
+ bool initialized_src = false, uninitialized_src = false;
|
||||
+ bb->count = profile_count::zero ();
|
||||
+ FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
+ {
|
||||
+ if (e->count ().initialized_p ())
|
||||
+ {
|
||||
+ bb->count += e->count ();
|
||||
+ initialized_src = true;
|
||||
+ }
|
||||
+ else
|
||||
+ uninitialized_src = true;
|
||||
+ }
|
||||
+ /* When some edges are missing with read profile, this is
|
||||
+ most likely because RTL expansion introduced loop.
|
||||
+ When profile is guessed we may have BB that is reachable
|
||||
+ from unlikely path as well as from normal path.
|
||||
+
|
||||
+ TODO: We should handle loops created during BB expansion
|
||||
+ correctly here. For now we assume all those loop to cycle
|
||||
+ precisely once. */
|
||||
+ if (!initialized_src
|
||||
+ || (uninitialized_src
|
||||
+ && profile_status_for_fn (cfun) < PROFILE_GUESSED))
|
||||
+ bb->count = profile_count::uninitialized ();
|
||||
+
|
||||
+ compute_outgoing_frequencies (bb);
|
||||
+}
|
||||
+
|
||||
/* Assume that some pass has inserted labels or control flow
|
||||
instructions within a basic block. Split basic blocks as needed
|
||||
and create edges. */
|
||||
@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
|
||||
if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
|
||||
FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
|
||||
{
|
||||
- edge e;
|
||||
- edge_iterator ei;
|
||||
-
|
||||
if (STATE (bb) == BLOCK_ORIGINAL)
|
||||
continue;
|
||||
if (STATE (bb) == BLOCK_NEW)
|
||||
{
|
||||
- bool initialized_src = false, uninitialized_src = false;
|
||||
- bb->count = profile_count::zero ();
|
||||
- FOR_EACH_EDGE (e, ei, bb->preds)
|
||||
- {
|
||||
- if (e->count ().initialized_p ())
|
||||
- {
|
||||
- bb->count += e->count ();
|
||||
- initialized_src = true;
|
||||
- }
|
||||
- else
|
||||
- uninitialized_src = true;
|
||||
- }
|
||||
- /* When some edges are missing with read profile, this is
|
||||
- most likely because RTL expansion introduced loop.
|
||||
- When profile is guessed we may have BB that is reachable
|
||||
- from unlikely path as well as from normal path.
|
||||
-
|
||||
- TODO: We should handle loops created during BB expansion
|
||||
- correctly here. For now we assume all those loop to cycle
|
||||
- precisely once. */
|
||||
- if (!initialized_src
|
||||
- || (uninitialized_src
|
||||
- && profile_status_for_fn (cfun) < PROFILE_GUESSED))
|
||||
- bb->count = profile_count::uninitialized ();
|
||||
+ update_profile_for_new_sub_basic_block (bb);
|
||||
+ continue;
|
||||
}
|
||||
- /* If nothing changed, there is no need to create new BBs. */
|
||||
- else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
|
||||
+ /* If nothing changed, there is no need to create new BBs. */
|
||||
+ if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
|
||||
{
|
||||
/* In rare occassions RTL expansion might have mistakely assigned
|
||||
a probabilities different from what is in CFG. This happens
|
||||
@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
|
||||
update_br_prob_note (bb);
|
||||
continue;
|
||||
}
|
||||
-
|
||||
compute_outgoing_frequencies (bb);
|
||||
}
|
||||
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
SET_STATE (bb, 0);
|
||||
}
|
||||
+
|
||||
+/* Like find_many_sub_basic_blocks, but look only within BB. */
|
||||
+
|
||||
+void
|
||||
+find_sub_basic_blocks (basic_block bb)
|
||||
+{
|
||||
+ basic_block end_bb = bb->next_bb;
|
||||
+ find_bb_boundaries (bb);
|
||||
+ if (bb->next_bb == end_bb)
|
||||
+ return;
|
||||
+
|
||||
+ /* Re-scan and wire in all edges. This expects simple (conditional)
|
||||
+ jumps at the end of each new basic blocks. */
|
||||
+ make_edges (bb, end_bb->prev_bb, 1);
|
||||
+
|
||||
+ /* Update branch probabilities. Expect only (un)conditional jumps
|
||||
+ to be created with only the forward edges. */
|
||||
+ if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
|
||||
+ {
|
||||
+ compute_outgoing_frequencies (bb);
|
||||
+ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
|
||||
+ update_profile_for_new_sub_basic_block (bb);
|
||||
+ }
|
||||
+}
|
||||
diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
|
||||
index 85145da7f..53543bb75 100644
|
||||
--- a/gcc/cfgbuild.h
|
||||
+++ b/gcc/cfgbuild.h
|
||||
@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
|
||||
extern bool control_flow_insn_p (const rtx_insn *);
|
||||
extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
|
||||
extern void find_many_sub_basic_blocks (sbitmap);
|
||||
+extern void find_sub_basic_blocks (basic_block);
|
||||
|
||||
#endif /* GCC_CFGBUILD_H */
|
||||
diff --git a/gcc/function.cc b/gcc/function.cc
|
||||
index ddab43ca4..f4fc211a0 100644
|
||||
--- a/gcc/function.cc
|
||||
+++ b/gcc/function.cc
|
||||
@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
|
||||
&& returnjump_p (BB_END (e->src)))
|
||||
e->flags &= ~EDGE_FALLTHRU;
|
||||
}
|
||||
+
|
||||
+ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
|
||||
}
|
||||
else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
|
||||
{
|
||||
@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
|
||||
set_insn_locations (seq, epilogue_location);
|
||||
|
||||
emit_insn_before (seq, insn);
|
||||
+
|
||||
+ find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
|
||||
}
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
709
0196-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
Normal file
709
0196-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
Normal file
@ -0,0 +1,709 @@
|
||||
From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001
|
||||
From: Richard Sandiford <richard.sandiford@arm.com>
|
||||
Date: Tue, 17 Oct 2023 23:46:33 +0100
|
||||
Subject: [PATCH 104/157] [Backport][SME] aarch64: Use vecs to store register
|
||||
save order
|
||||
|
||||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0
|
||||
|
||||
aarch64_save/restore_callee_saves looped over registers in register
|
||||
number order. This in turn meant that we could only use LDP and STP
|
||||
for registers that were consecutive both number-wise and
|
||||
offset-wise (after unsaved registers are excluded).
|
||||
|
||||
This patch instead builds lists of the registers that we've decided to
|
||||
save, in offset order. We can then form LDP/STP pairs regardless of
|
||||
register number order, which in turn means that we can put the LR save
|
||||
slot first without losing LDP/STP opportunities.
|
||||
|
||||
gcc/
|
||||
* config/aarch64/aarch64.h (aarch64_frame): Add vectors that
|
||||
store the list saved GPRs, FPRs and predicate registers.
|
||||
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize
|
||||
the lists of saved registers. Use them to choose push candidates.
|
||||
Invalidate pop candidates if we're not going to do a pop.
|
||||
(aarch64_next_callee_save): Delete.
|
||||
(aarch64_save_callee_saves): Take a list of registers,
|
||||
rather than a range. Make !skip_wb select only write-back
|
||||
candidates.
|
||||
(aarch64_expand_prologue): Update calls accordingly.
|
||||
(aarch64_restore_callee_saves): Take a list of registers,
|
||||
rather than a range. Always skip pop candidates. Also skip
|
||||
LR if shadow call stacks are enabled.
|
||||
(aarch64_expand_epilogue): Update calls accordingly.
|
||||
|
||||
gcc/testsuite/
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores
|
||||
to happen in offset order.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise.
|
||||
* gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise.
|
||||
---
|
||||
gcc/config/aarch64/aarch64.cc | 203 +++++++++---------
|
||||
gcc/config/aarch64/aarch64.h | 9 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2_128.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2_256.c | 6 +-
|
||||
.../aarch64/sve/pcs/stack_clash_2_512.c | 6 +-
|
||||
8 files changed, 128 insertions(+), 120 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index 8d4dd2891..e10c9d763 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p ()
|
||||
static void
|
||||
aarch64_layout_frame (void)
|
||||
{
|
||||
- int regno, last_fp_reg = INVALID_REGNUM;
|
||||
+ unsigned regno, last_fp_reg = INVALID_REGNUM;
|
||||
machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
|
||||
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
|
||||
bool frame_related_fp_reg_p = false;
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
poly_int64 top_of_locals = -1;
|
||||
|
||||
+ vec_safe_truncate (frame.saved_gprs, 0);
|
||||
+ vec_safe_truncate (frame.saved_fprs, 0);
|
||||
+ vec_safe_truncate (frame.saved_prs, 0);
|
||||
+
|
||||
frame.emit_frame_chain = aarch64_needs_frame_chain ();
|
||||
|
||||
/* Adjust the outgoing arguments size if required. Keep it in sync with what
|
||||
@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void)
|
||||
for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
+ vec_safe_push (frame.saved_prs, regno);
|
||||
if (frame.sve_save_and_probe == INVALID_REGNUM)
|
||||
frame.sve_save_and_probe = regno;
|
||||
frame.reg_offset[regno] = offset;
|
||||
@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void)
|
||||
If we don't have any vector registers to save, and we know how
|
||||
big the predicate save area is, we can just round it up to the
|
||||
next 16-byte boundary. */
|
||||
- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
|
||||
+ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ())
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
else
|
||||
{
|
||||
@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void)
|
||||
}
|
||||
|
||||
/* If we need to save any SVE vector registers, add them next. */
|
||||
- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
|
||||
+ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
|
||||
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
+ vec_safe_push (frame.saved_fprs, regno);
|
||||
if (frame.sve_save_and_probe == INVALID_REGNUM)
|
||||
frame.sve_save_and_probe = regno;
|
||||
frame.reg_offset[regno] = offset;
|
||||
@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void)
|
||||
|
||||
auto allocate_gpr_slot = [&](unsigned int regno)
|
||||
{
|
||||
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
||||
- frame.hard_fp_save_and_probe = regno;
|
||||
+ vec_safe_push (frame.saved_gprs, regno);
|
||||
frame.reg_offset[regno] = offset;
|
||||
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
||||
- frame.wb_push_candidate1 = regno;
|
||||
- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
|
||||
- frame.wb_push_candidate2 = regno;
|
||||
offset += UNITS_PER_WORD;
|
||||
};
|
||||
|
||||
@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void)
|
||||
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
|
||||
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
|
||||
{
|
||||
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
|
||||
- frame.hard_fp_save_and_probe = regno;
|
||||
+ vec_safe_push (frame.saved_fprs, regno);
|
||||
/* If there is an alignment gap between integer and fp callee-saves,
|
||||
allocate the last fp register to it if possible. */
|
||||
if (regno == last_fp_reg
|
||||
@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void)
|
||||
}
|
||||
|
||||
frame.reg_offset[regno] = offset;
|
||||
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
|
||||
- frame.wb_push_candidate1 = regno;
|
||||
- else if (frame.wb_push_candidate2 == INVALID_REGNUM
|
||||
- && frame.wb_push_candidate1 >= V0_REGNUM)
|
||||
- frame.wb_push_candidate2 = regno;
|
||||
offset += vector_save_size;
|
||||
}
|
||||
|
||||
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
|
||||
-
|
||||
auto saved_regs_size = offset - frame.bytes_below_saved_regs;
|
||||
- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
|
||||
- || (frame.hard_fp_save_and_probe != INVALID_REGNUM
|
||||
- && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
|
||||
- frame.bytes_below_hard_fp)));
|
||||
+
|
||||
+ array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs)
|
||||
+ ? frame.saved_gprs
|
||||
+ : frame.saved_fprs);
|
||||
+ if (!push_regs.empty ()
|
||||
+ && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))
|
||||
+ {
|
||||
+ frame.hard_fp_save_and_probe = push_regs[0];
|
||||
+ frame.wb_push_candidate1 = push_regs[0];
|
||||
+ if (push_regs.size () > 1)
|
||||
+ frame.wb_push_candidate2 = push_regs[1];
|
||||
+ }
|
||||
+ else
|
||||
+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size));
|
||||
|
||||
/* With stack-clash, a register must be saved in non-leaf functions.
|
||||
The saving of the bottommost register counts as an implicit probe,
|
||||
@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void)
|
||||
+ frame.sve_callee_adjust
|
||||
+ frame.final_adjust, frame.frame_size));
|
||||
|
||||
- if (!frame.emit_frame_chain && frame.callee_adjust == 0)
|
||||
+ if (frame.callee_adjust == 0)
|
||||
{
|
||||
- /* We've decided not to associate any register saves with the initial
|
||||
- stack allocation. */
|
||||
- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM;
|
||||
- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM;
|
||||
+ /* We've decided not to do a "real" push and pop. However,
|
||||
+ setting up the frame chain is treated as being essentially
|
||||
+ a multi-instruction push. */
|
||||
+ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM;
|
||||
+ if (!frame.emit_frame_chain)
|
||||
+ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM;
|
||||
}
|
||||
|
||||
frame.laid_out = true;
|
||||
@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno)
|
||||
return known_ge (cfun->machine->frame.reg_offset[regno], 0);
|
||||
}
|
||||
|
||||
-/* Return the next register up from REGNO up to LIMIT for the callee
|
||||
- to save. */
|
||||
-
|
||||
-static unsigned
|
||||
-aarch64_next_callee_save (unsigned regno, unsigned limit)
|
||||
-{
|
||||
- while (regno <= limit && !aarch64_register_saved_on_entry (regno))
|
||||
- regno ++;
|
||||
- return regno;
|
||||
-}
|
||||
-
|
||||
/* Push the register number REGNO of mode MODE to the stack with write-back
|
||||
adjusting the stack by ADJUSTMENT. */
|
||||
|
||||
@@ -9424,41 +9419,46 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
|
||||
add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
|
||||
}
|
||||
|
||||
-/* Emit code to save the callee-saved registers from register number START
|
||||
- to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
|
||||
- bytes above the bottom of the static frame. Skip any write-back
|
||||
- candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
|
||||
- frame pointer has been set up. */
|
||||
+/* Emit code to save the callee-saved registers in REGS. Skip any
|
||||
+ write-back candidates if SKIP_WB is true, otherwise consider only
|
||||
+ write-back candidates.
|
||||
+
|
||||
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
|
||||
+ of the static frame. HARD_FP_VALID_P is true if the hard frame pointer
|
||||
+ has been set up. */
|
||||
|
||||
static void
|
||||
aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
- unsigned start, unsigned limit, bool skip_wb,
|
||||
+ array_slice<unsigned int> regs, bool skip_wb,
|
||||
bool hard_fp_valid_p)
|
||||
{
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
rtx_insn *insn;
|
||||
- unsigned regno;
|
||||
- unsigned regno2;
|
||||
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
|
||||
|
||||
- for (regno = aarch64_next_callee_save (start, limit);
|
||||
- regno <= limit;
|
||||
- regno = aarch64_next_callee_save (regno + 1, limit))
|
||||
+ auto skip_save_p = [&](unsigned int regno)
|
||||
+ {
|
||||
+ if (cfun->machine->reg_is_wrapped_separately[regno])
|
||||
+ return true;
|
||||
+
|
||||
+ if (skip_wb == (regno == frame.wb_push_candidate1
|
||||
+ || regno == frame.wb_push_candidate2))
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+ };
|
||||
+
|
||||
+ for (unsigned int i = 0; i < regs.size (); ++i)
|
||||
{
|
||||
- rtx reg, mem;
|
||||
+ unsigned int regno = regs[i];
|
||||
poly_int64 offset;
|
||||
bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
||||
|
||||
- if (skip_wb
|
||||
- && (regno == frame.wb_push_candidate1
|
||||
- || regno == frame.wb_push_candidate2))
|
||||
- continue;
|
||||
-
|
||||
- if (cfun->machine->reg_is_wrapped_separately[regno])
|
||||
+ if (skip_save_p (regno))
|
||||
continue;
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
- reg = gen_rtx_REG (mode, regno);
|
||||
+ rtx reg = gen_rtx_REG (mode, regno);
|
||||
offset = frame.reg_offset[regno] - bytes_below_sp;
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
poly_int64 sp_offset = offset;
|
||||
@@ -9485,12 +9485,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
}
|
||||
offset -= fp_offset;
|
||||
}
|
||||
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
||||
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
||||
bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
|
||||
|
||||
+ unsigned int regno2;
|
||||
if (!aarch64_sve_mode_p (mode)
|
||||
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
||||
- && !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||
+ && i + 1 < regs.size ()
|
||||
+ && (regno2 = regs[i + 1], !skip_save_p (regno2))
|
||||
&& known_eq (GET_MODE_SIZE (mode),
|
||||
frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
||||
{
|
||||
@@ -9516,6 +9517,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
}
|
||||
|
||||
regno = regno2;
|
||||
+ ++i;
|
||||
}
|
||||
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
{
|
||||
@@ -9533,49 +9535,57 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
|
||||
}
|
||||
}
|
||||
|
||||
-/* Emit code to restore the callee registers from register number START
|
||||
- up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
|
||||
- bytes above the bottom of the static frame. Skip any write-back
|
||||
- candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
|
||||
- notes into CFI_OPS. */
|
||||
+/* Emit code to restore the callee registers in REGS, ignoring pop candidates
|
||||
+ and any other registers that are handled separately. Write the appropriate
|
||||
+ REG_CFA_RESTORE notes into CFI_OPS.
|
||||
+
|
||||
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
|
||||
+ of the static frame. */
|
||||
|
||||
static void
|
||||
-aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
||||
- unsigned limit, bool skip_wb, rtx *cfi_ops)
|
||||
+aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
|
||||
+ array_slice<unsigned int> regs, rtx *cfi_ops)
|
||||
{
|
||||
aarch64_frame &frame = cfun->machine->frame;
|
||||
- unsigned regno;
|
||||
- unsigned regno2;
|
||||
poly_int64 offset;
|
||||
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
|
||||
|
||||
- for (regno = aarch64_next_callee_save (start, limit);
|
||||
- regno <= limit;
|
||||
- regno = aarch64_next_callee_save (regno + 1, limit))
|
||||
+ auto skip_restore_p = [&](unsigned int regno)
|
||||
{
|
||||
- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
||||
if (cfun->machine->reg_is_wrapped_separately[regno])
|
||||
- continue;
|
||||
+ return true;
|
||||
+
|
||||
+ if (regno == frame.wb_pop_candidate1
|
||||
+ || regno == frame.wb_pop_candidate2)
|
||||
+ return true;
|
||||
|
||||
- rtx reg, mem;
|
||||
+ /* The shadow call stack code restores LR separately. */
|
||||
+ if (frame.is_scs_enabled && regno == LR_REGNUM)
|
||||
+ return true;
|
||||
|
||||
- if (skip_wb
|
||||
- && (regno == frame.wb_pop_candidate1
|
||||
- || regno == frame.wb_pop_candidate2))
|
||||
+ return false;
|
||||
+ };
|
||||
+
|
||||
+ for (unsigned int i = 0; i < regs.size (); ++i)
|
||||
+ {
|
||||
+ unsigned int regno = regs[i];
|
||||
+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
|
||||
+ if (skip_restore_p (regno))
|
||||
continue;
|
||||
|
||||
machine_mode mode = aarch64_reg_save_mode (regno);
|
||||
- reg = gen_rtx_REG (mode, regno);
|
||||
+ rtx reg = gen_rtx_REG (mode, regno);
|
||||
offset = frame.reg_offset[regno] - bytes_below_sp;
|
||||
rtx base_rtx = stack_pointer_rtx;
|
||||
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
|
||||
offset, ptrue);
|
||||
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
||||
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
|
||||
|
||||
+ unsigned int regno2;
|
||||
if (!aarch64_sve_mode_p (mode)
|
||||
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
|
||||
- && !cfun->machine->reg_is_wrapped_separately[regno2]
|
||||
+ && i + 1 < regs.size ()
|
||||
+ && (regno2 = regs[i + 1], !skip_restore_p (regno2))
|
||||
&& known_eq (GET_MODE_SIZE (mode),
|
||||
frame.reg_offset[regno2] - frame.reg_offset[regno]))
|
||||
{
|
||||
@@ -9588,6 +9598,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
|
||||
|
||||
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
|
||||
regno = regno2;
|
||||
+ ++i;
|
||||
}
|
||||
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
|
||||
emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
|
||||
@@ -10409,13 +10420,10 @@ aarch64_expand_prologue (void)
|
||||
- frame.bytes_above_hard_fp);
|
||||
gcc_assert (known_ge (chain_offset, 0));
|
||||
|
||||
+ gcc_assert (reg1 == R29_REGNUM && reg2 == R30_REGNUM);
|
||||
if (callee_adjust == 0)
|
||||
- {
|
||||
- reg1 = R29_REGNUM;
|
||||
- reg2 = R30_REGNUM;
|
||||
- aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
|
||||
- false, false);
|
||||
- }
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs,
|
||||
+ false, false);
|
||||
else
|
||||
gcc_assert (known_eq (chain_offset, 0));
|
||||
aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
|
||||
@@ -10453,8 +10461,7 @@ aarch64_expand_prologue (void)
|
||||
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
|
||||
}
|
||||
|
||||
- aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
|
||||
- callee_adjust != 0 || emit_frame_chain,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true,
|
||||
emit_frame_chain);
|
||||
if (maybe_ne (sve_callee_adjust, 0))
|
||||
{
|
||||
@@ -10465,10 +10472,9 @@ aarch64_expand_prologue (void)
|
||||
!frame_pointer_needed, false);
|
||||
bytes_below_sp -= sve_callee_adjust;
|
||||
}
|
||||
- aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
|
||||
- false, emit_frame_chain);
|
||||
- aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
|
||||
- callee_adjust != 0 || emit_frame_chain,
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_prs, true,
|
||||
+ emit_frame_chain);
|
||||
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_fprs, true,
|
||||
emit_frame_chain);
|
||||
|
||||
/* We may need to probe the final adjustment if it is larger than the guard
|
||||
@@ -10514,8 +10520,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
|
||||
unsigned reg1 = frame.wb_pop_candidate1;
|
||||
unsigned reg2 = frame.wb_pop_candidate2;
|
||||
- unsigned int last_gpr = (frame.is_scs_enabled
|
||||
- ? R29_REGNUM : R30_REGNUM);
|
||||
rtx cfi_ops = NULL;
|
||||
rtx_insn *insn;
|
||||
/* A stack clash protection prologue may not have left EP0_REGNUM or
|
||||
@@ -10579,10 +10583,8 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
|
||||
/* Restore the vector registers before the predicate registers,
|
||||
so that we can use P4 as a temporary for big-endian SVE frames. */
|
||||
- aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
|
||||
- callee_adjust != 0, &cfi_ops);
|
||||
- aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
|
||||
- false, &cfi_ops);
|
||||
+ aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops);
|
||||
+ aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops);
|
||||
if (maybe_ne (sve_callee_adjust, 0))
|
||||
aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
|
||||
|
||||
@@ -10590,8 +10592,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|
||||
restore x30, we don't need to restore x30 again in the traditional
|
||||
way. */
|
||||
aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
|
||||
- R0_REGNUM, last_gpr,
|
||||
- callee_adjust != 0, &cfi_ops);
|
||||
+ frame.saved_gprs, &cfi_ops);
|
||||
|
||||
if (need_barrier_p)
|
||||
aarch64_emit_stack_tie (stack_pointer_rtx);
|
||||
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
|
||||
index 292ef2eec..1591cde8b 100644
|
||||
--- a/gcc/config/aarch64/aarch64.h
|
||||
+++ b/gcc/config/aarch64/aarch64.h
|
||||
@@ -787,7 +787,7 @@ extern enum aarch64_processor aarch64_tune;
|
||||
|
||||
#define DEFAULT_PCC_STRUCT_RETURN 0
|
||||
|
||||
-#ifdef HAVE_POLY_INT_H
|
||||
+#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
|
||||
struct GTY (()) aarch64_frame
|
||||
{
|
||||
/* The offset from the bottom of the static frame (the bottom of the
|
||||
@@ -795,6 +795,13 @@ struct GTY (()) aarch64_frame
|
||||
needed. */
|
||||
poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
|
||||
|
||||
+ /* The list of GPRs, FPRs and predicate registers that have nonnegative
|
||||
+ entries in reg_offset. The registers are listed in order of
|
||||
+ increasing offset (rather than increasing register number). */
|
||||
+ vec<unsigned, va_gc_atomic> *saved_gprs;
|
||||
+ vec<unsigned, va_gc_atomic> *saved_fprs;
|
||||
+ vec<unsigned, va_gc_atomic> *saved_prs;
|
||||
+
|
||||
/* The number of extra stack bytes taken up by register varargs.
|
||||
This area is allocated by the callee at the very top of the
|
||||
frame. This value is rounded up to a multiple of
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
||||
index 4622a1eed..bbb45d266 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
|
||||
@@ -215,9 +215,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr p4, \[sp\]
|
||||
** addvl sp, sp, #1
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -283,9 +283,9 @@ test_9 (int n)
|
||||
** addvl sp, x29, #-1
|
||||
** ldr p4, \[sp\]
|
||||
** addvl sp, sp, #1
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -319,9 +319,9 @@ test_10 (int n)
|
||||
** addvl sp, x29, #-1
|
||||
** ldr p4, \[sp\]
|
||||
** addvl sp, sp, #1
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
||||
index e31200fc2..9437c7a85 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
|
||||
@@ -176,9 +176,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?128
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -234,9 +234,9 @@ test_9 (int n)
|
||||
** sub sp, x29, #128
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?128
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -268,9 +268,9 @@ test_10 (int n)
|
||||
** sub sp, x29, #128
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?128
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
||||
index 41193b411..b4e1627fa 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
|
||||
@@ -176,9 +176,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr p4, \[sp\]
|
||||
** add sp, sp, #?16
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -234,9 +234,9 @@ test_9 (int n)
|
||||
** sub sp, x29, #16
|
||||
** ldr p4, \[sp\]
|
||||
** add sp, sp, #?16
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -267,9 +267,9 @@ test_10 (int n)
|
||||
** sub sp, x29, #16
|
||||
** ldr p4, \[sp\]
|
||||
** add sp, sp, #?16
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
||||
index f63751678..921209379 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
|
||||
@@ -176,9 +176,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?256
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -234,9 +234,9 @@ test_9 (int n)
|
||||
** sub sp, x29, #256
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?256
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -268,9 +268,9 @@ test_10 (int n)
|
||||
** sub sp, x29, #256
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?256
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
||||
index 6bcbb5772..bd8bef0f0 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
|
||||
@@ -176,9 +176,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?32
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -234,9 +234,9 @@ test_9 (int n)
|
||||
** sub sp, x29, #32
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?32
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -267,9 +267,9 @@ test_10 (int n)
|
||||
** sub sp, x29, #32
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?32
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
||||
index dc7df8e6b..2c76ccecd 100644
|
||||
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
|
||||
@@ -176,9 +176,9 @@ test_7 (void)
|
||||
** add sp, sp, #?16
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?64
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -234,9 +234,9 @@ test_9 (int n)
|
||||
** sub sp, x29, #64
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?64
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** mov x12, #?4144
|
||||
** add sp, sp, x12
|
||||
** ret
|
||||
@@ -268,9 +268,9 @@ test_10 (int n)
|
||||
** sub sp, x29, #64
|
||||
** ldr z16, \[sp\]
|
||||
** add sp, sp, #?64
|
||||
+** ldp x29, x30, \[sp\]
|
||||
** ldp x24, x25, \[sp, 16\]
|
||||
** ldr x26, \[sp, 32\]
|
||||
-** ldp x29, x30, \[sp\]
|
||||
** add sp, sp, #?3008
|
||||
** add sp, sp, #?126976
|
||||
** ret
|
||||
--
|
||||
2.33.0
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user