!498 Apply SME patches

From: @eastb233 
Reviewed-by: @li-yancheng 
Signed-off-by: @li-yancheng
This commit is contained in:
openeuler-ci-bot 2024-11-19 08:18:24 +00:00 committed by Gitee
commit 6b3f4b97d9
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
152 changed files with 68279 additions and 270 deletions

View File

@ -0,0 +1,336 @@
From 9a36ca4e9188ee402327ec908d4f6860f2ee67eb Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Wed, 18 May 2022 16:02:12 +0100
Subject: [PATCH 005/157] [Backport][SME] AArch64: Cleanup CPU option
processing code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1be715f31605976d8e4336973d3b81c5b7cea79f
The --with-cpu/--with-arch configure option processing not only checks valid
arguments but also sets TARGET_CPU_DEFAULT with a CPU and extension bitmask.
This isn't used however since a --with-cpu is translated into a -mcpu option
which is processed as if written on the command-line (so TARGET_CPU_DEFAULT
is never accessed).
So remove all the complex processing and bitmask, and just validate the
option. Fix a bug that always reports valid architecture extensions as invalid.
As a result the CPU processing in aarch64.c can be simplified.
gcc/
* config.gcc (aarch64*-*-*): Simplify --with-cpu and --with-arch
processing. Add support for architectural extensions.
* config/aarch64/aarch64.h (TARGET_CPU_DEFAULT): Remove
AARCH64_CPU_DEFAULT_FLAGS.
(TARGET_CPU_NBITS): Remove.
(TARGET_CPU_MASK): Remove.
* config/aarch64/aarch64.cc (AARCH64_CPU_DEFAULT_FLAGS): Remove define.
(get_tune_cpu): Assert CPU is always valid.
(get_arch): Assert architecture is always valid.
(aarch64_override_options): Cleanup CPU selection code and simplify logic.
(aarch64_option_restore): Remove unnecessary checks on tune.
---
gcc/config.gcc | 43 +------------
gcc/config/aarch64/aarch64.cc | 115 +++++++++-------------------------
gcc/config/aarch64/aarch64.h | 9 +--
3 files changed, 32 insertions(+), 135 deletions(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 8fdde1576..3be450471 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4190,8 +4190,6 @@ case "${target}" in
pattern=AARCH64_CORE
fi
- ext_mask=AARCH64_CPU_DEFAULT_FLAGS
-
# Find the base CPU or ARCH id in aarch64-cores.def or
# aarch64-arches.def
if [ x"$base_val" = x ] \
@@ -4199,23 +4197,6 @@ case "${target}" in
${srcdir}/config/aarch64/$def \
> /dev/null; then
- if [ $which = arch ]; then
- base_id=`grep "^$pattern(\"$base_val\"," \
- ${srcdir}/config/aarch64/$def | \
- sed -e 's/^[^,]*,[ ]*//' | \
- sed -e 's/,.*$//'`
- # Extract the architecture flags from aarch64-arches.def
- ext_mask=`grep "^$pattern(\"$base_val\"," \
- ${srcdir}/config/aarch64/$def | \
- sed -e 's/)$//' | \
- sed -e 's/^.*,//'`
- else
- base_id=`grep "^$pattern(\"$base_val\"," \
- ${srcdir}/config/aarch64/$def | \
- sed -e 's/^[^,]*,[ ]*//' | \
- sed -e 's/,.*$//'`
- fi
-
# Disallow extensions in --with-tune=cortex-a53+crc.
if [ $which = tune ] && [ x"$ext_val" != x ]; then
echo "Architecture extensions not supported in --with-$which=$val" 1>&2
@@ -4246,25 +4227,7 @@ case "${target}" in
grep "^\"$base_ext\""`
if [ x"$base_ext" = x ] \
- || [[ -n $opt_line ]]; then
-
- # These regexp extract the elements based on
- # their group match index in the regexp.
- ext_canon=`echo -e "$opt_line" | \
- sed -e "s/$sed_patt/\2/"`
- ext_on=`echo -e "$opt_line" | \
- sed -e "s/$sed_patt/\3/"`
- ext_off=`echo -e "$opt_line" | \
- sed -e "s/$sed_patt/\4/"`
-
- if [ $ext = $base_ext ]; then
- # Adding extension
- ext_mask="("$ext_mask") | ("$ext_on" | "$ext_canon")"
- else
- # Removing extension
- ext_mask="("$ext_mask") & ~("$ext_off" | "$ext_canon")"
- fi
-
+ || [ x"$opt_line" != x ]; then
true
else
echo "Unknown extension used in --with-$which=$val" 1>&2
@@ -4273,10 +4236,6 @@ case "${target}" in
ext_val=`echo $ext_val | sed -e 's/[a-z0-9]\+//'`
done
- ext_mask="(("$ext_mask") << TARGET_CPU_NBITS)"
- if [ x"$base_id" != x ]; then
- target_cpu_cname="TARGET_CPU_$base_id | $ext_mask"
- fi
true
else
# Allow --with-$which=native.
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 7c62ddb2a..ba888beb0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3014,8 +3014,6 @@ static const struct attribute_spec aarch64_attribute_table[] =
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
-#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
-
/* An ISA extension in the co-processor and main instruction set space. */
struct aarch64_option_extension
{
@@ -18411,39 +18409,24 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
return false;
}
-static_assert (TARGET_CPU_generic < TARGET_CPU_MASK,
- "TARGET_CPU_NBITS is big enough");
-
-/* Return the CPU corresponding to the enum CPU.
- If it doesn't specify a cpu, return the default. */
+/* Return the CPU corresponding to the enum CPU. */
static const struct processor *
aarch64_get_tune_cpu (enum aarch64_processor cpu)
{
- if (cpu != aarch64_none)
- return &all_cores[cpu];
+ gcc_assert (cpu != aarch64_none);
- /* The & TARGET_CPU_MASK is to extract the bottom TARGET_CPU_NBITS bits that
- encode the default cpu as selected by the --with-cpu GCC configure option
- in config.gcc.
- ???: The whole TARGET_CPU_DEFAULT and AARCH64_CPU_DEFAULT_FLAGS
- flags mechanism should be reworked to make it more sane. */
- return &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
+ return &all_cores[cpu];
}
-/* Return the architecture corresponding to the enum ARCH.
- If it doesn't specify a valid architecture, return the default. */
+/* Return the architecture corresponding to the enum ARCH. */
static const struct processor *
aarch64_get_arch (enum aarch64_arch arch)
{
- if (arch != aarch64_no_arch)
- return &all_architectures[arch];
-
- const struct processor *cpu
- = &all_cores[TARGET_CPU_DEFAULT & TARGET_CPU_MASK];
+ gcc_assert (arch != aarch64_no_arch);
- return &all_architectures[cpu->arch];
+ return &all_architectures[arch];
}
/* Return the VG value associated with -msve-vector-bits= value VALUE. */
@@ -18481,10 +18464,6 @@ aarch64_override_options (void)
uint64_t arch_isa = 0;
aarch64_isa_flags = 0;
- bool valid_cpu = true;
- bool valid_tune = true;
- bool valid_arch = true;
-
selected_cpu = NULL;
selected_arch = NULL;
selected_tune = NULL;
@@ -18499,77 +18478,56 @@ aarch64_override_options (void)
If either of -march or -mtune is given, they override their
respective component of -mcpu. */
if (aarch64_cpu_string)
- valid_cpu = aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu,
- &cpu_isa);
+ aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
if (aarch64_arch_string)
- valid_arch = aarch64_validate_march (aarch64_arch_string, &selected_arch,
- &arch_isa);
+ aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
if (aarch64_tune_string)
- valid_tune = aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
+ aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
- /* If the user did not specify a processor, choose the default
- one for them. This will be the CPU set during configuration using
- --with-cpu, otherwise it is "generic". */
- if (!selected_cpu)
- {
- if (selected_arch)
- {
- selected_cpu = &all_cores[selected_arch->ident];
- aarch64_isa_flags = arch_isa;
- explicit_arch = selected_arch->arch;
- }
- else
- {
- /* Get default configure-time CPU. */
- selected_cpu = aarch64_get_tune_cpu (aarch64_none);
- aarch64_isa_flags = TARGET_CPU_DEFAULT >> TARGET_CPU_NBITS;
- }
-
- if (selected_tune)
- explicit_tune_core = selected_tune->ident;
- }
- /* If both -mcpu and -march are specified check that they are architecturally
- compatible, warn if they're not and prefer the -march ISA flags. */
- else if (selected_arch)
+ if (selected_cpu && selected_arch)
{
+ /* If both -mcpu and -march are specified, warn if they are not
+ architecturally compatible and prefer the -march ISA flags. */
if (selected_arch->arch != selected_cpu->arch)
{
warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
aarch64_cpu_string,
aarch64_arch_string);
}
+
aarch64_isa_flags = arch_isa;
- explicit_arch = selected_arch->arch;
- explicit_tune_core = selected_tune ? selected_tune->ident
- : selected_cpu->ident;
}
- else
+ else if (selected_cpu)
{
- /* -mcpu but no -march. */
- aarch64_isa_flags = cpu_isa;
- explicit_tune_core = selected_tune ? selected_tune->ident
- : selected_cpu->ident;
- gcc_assert (selected_cpu);
selected_arch = &all_architectures[selected_cpu->arch];
- explicit_arch = selected_arch->arch;
+ aarch64_isa_flags = cpu_isa;
}
-
- /* Set the arch as well as we will need it when outputing
- the .arch directive in assembly. */
- if (!selected_arch)
+ else if (selected_arch)
{
- gcc_assert (selected_cpu);
+ selected_cpu = &all_cores[selected_arch->ident];
+ aarch64_isa_flags = arch_isa;
+ }
+ else
+ {
+ /* No -mcpu or -march specified, so use the default CPU. */
+ selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
selected_arch = &all_architectures[selected_cpu->arch];
+ aarch64_isa_flags = selected_cpu->flags;
}
+ explicit_arch = selected_arch->arch;
if (!selected_tune)
selected_tune = selected_cpu;
+ explicit_tune_core = selected_tune->ident;
+
+ gcc_assert (explicit_tune_core != aarch64_none);
+ gcc_assert (explicit_arch != aarch64_no_arch);
if (aarch64_enable_bti == 2)
{
@@ -18605,15 +18563,6 @@ aarch64_override_options (void)
if (aarch64_ra_sign_scope != AARCH64_FUNCTION_NONE && TARGET_ILP32)
sorry ("return address signing is only supported for %<-mabi=lp64%>");
- /* Make sure we properly set up the explicit options. */
- if ((aarch64_cpu_string && valid_cpu)
- || (aarch64_tune_string && valid_tune))
- gcc_assert (explicit_tune_core != aarch64_none);
-
- if ((aarch64_cpu_string && valid_cpu)
- || (aarch64_arch_string && valid_arch))
- gcc_assert (explicit_arch != aarch64_no_arch);
-
/* The pass to insert speculation tracking runs before
shrink-wrapping and the latter does not know how to update the
tracking status. So disable it in this case. */
@@ -18719,11 +18668,7 @@ aarch64_option_restore (struct gcc_options *opts,
opts->x_explicit_arch = ptr->x_explicit_arch;
selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
- if (opts->x_explicit_tune_core == aarch64_none
- && opts->x_explicit_arch != aarch64_no_arch)
- selected_tune = &all_cores[selected_arch->ident];
- else
- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
+ selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
opts->x_aarch64_branch_protection_string
= ptr->x_aarch64_branch_protection_string;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 6834c3e99..14e2af054 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -811,16 +811,9 @@ enum target_cpus
TARGET_CPU_generic
};
-/* Define how many bits are used to represent the CPU in TARGET_CPU_DEFAULT.
- This needs to be big enough to fit the value of TARGET_CPU_generic.
- All bits after this are used to represent the AARCH64_CPU_DEFAULT_FLAGS. */
-#define TARGET_CPU_NBITS 8
-#define TARGET_CPU_MASK ((1 << TARGET_CPU_NBITS) - 1)
-
/* If there is no CPU defined at configure, use generic as default. */
#ifndef TARGET_CPU_DEFAULT
-#define TARGET_CPU_DEFAULT \
- (TARGET_CPU_generic | (AARCH64_CPU_DEFAULT_FLAGS << TARGET_CPU_NBITS))
+# define TARGET_CPU_DEFAULT TARGET_CPU_generic
#endif
/* If inserting NOP before a mult-accumulate insn remember to adjust the
--
2.33.0

View File

@ -0,0 +1,528 @@
From ba32885874fc6caa90f6ae5e264bc3d51f64a26e Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Wed, 1 Jun 2022 16:46:36 +0100
Subject: [PATCH 006/157] [Backport][SME] AArch64: Cleanup option processing
code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ae54c1b09963779c5c3914782324ff48af32e2f1
Further cleanup option processing. Remove the duplication of global
variables for CPU and tune settings so that CPU option processing is
simplified even further. Move global variables that need save and
restore due to target option processing into aarch64.opt. This removes
the need for explicit saving/restoring and unnecessary reparsing of
options.
gcc/
* config/aarch64/aarch64.opt (explicit_tune_core): Rename to
selected_tune.
(explicit_arch): Rename to selected_arch.
(x_aarch64_override_tune_string): Remove.
(aarch64_ra_sign_key): Add as TargetVariable so it gets saved/restored.
(aarch64_override_tune_string): Add Save so it gets saved/restored.
* config/aarch64/aarch64.h (aarch64_architecture_version): Remove.
* config/aarch64/aarch64.cc (aarch64_architecture_version): Remove.
(processor): Remove archtecture_version field.
(selected_arch): Remove global.
(selected_cpu): Remove global.
(selected_tune): Remove global.
(aarch64_ra_sign_key): Move global to aarch64.opt so it is saved.
(aarch64_override_options_internal): Use aarch64_get_tune_cpu.
(aarch64_override_options): Further simplify code to only set
selected_arch and selected_tune globals.
(aarch64_option_save): Remove now that target options are saved.
(aarch64_option_restore): Remove redundant target option restores.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Use
AARCH64_ISA_V9.
* config/aarch64/aarch64-opts.h (aarch64_key_type): Add, moved from...
* config/aarch64/aarch64-protos.h (aarch64_key_type): Remove.
(aarch64_ra_sign_key): Remove.
---
gcc/config/aarch64/aarch64-c.cc | 2 +-
gcc/config/aarch64/aarch64-opts.h | 6 +
gcc/config/aarch64/aarch64-protos.h | 8 --
gcc/config/aarch64/aarch64.cc | 183 ++++++++++------------------
gcc/config/aarch64/aarch64.h | 3 -
gcc/config/aarch64/aarch64.opt | 12 +-
6 files changed, 76 insertions(+), 138 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index a4c407724..90d45e45d 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
{
aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
- builtin_define_with_int_value ("__ARM_ARCH", aarch64_architecture_version);
+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
flag_short_enums ? 1 : 4);
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index 93572fe83..421648a15 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -98,4 +98,10 @@ enum stack_protector_guard {
SSP_GLOBAL /* global canary */
};
+/* The key type that -msign-return-address should use. */
+enum aarch64_key_type {
+ AARCH64_KEY_A,
+ AARCH64_KEY_B
+};
+
#endif
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 475d174dd..e60ce3c36 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -672,14 +672,6 @@ enum simd_immediate_check {
AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
};
-/* The key type that -msign-return-address should use. */
-enum aarch64_key_type {
- AARCH64_KEY_A,
- AARCH64_KEY_B
-};
-
-extern enum aarch64_key_type aarch64_ra_sign_key;
-
extern struct tune_params aarch64_tune_params;
/* The available SVE predicate patterns, known in the ACLE as "svpattern". */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ba888beb0..254ecfaa2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -306,9 +306,6 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
aarch64_addr_query_type);
static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
-/* Major revision number of the ARM Architecture implemented by the target. */
-unsigned aarch64_architecture_version;
-
/* The processor for which instructions should be scheduled. */
enum aarch64_processor aarch64_tune = cortexa53;
@@ -2931,7 +2928,6 @@ struct processor
enum aarch64_processor ident;
enum aarch64_processor sched_core;
enum aarch64_arch arch;
- unsigned architecture_version;
const uint64_t flags;
const struct tune_params *const tune;
};
@@ -2940,9 +2936,9 @@ struct processor
static const struct processor all_architectures[] =
{
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
- {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, ARCH_REV, FLAGS, NULL},
+ {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, FLAGS, NULL},
#include "aarch64-arches.def"
- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
};
/* Processor cores implementing AArch64. */
@@ -2950,23 +2946,13 @@ static const struct processor all_cores[] =
{
#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
- all_architectures[AARCH64_ARCH_##ARCH].architecture_version, \
FLAGS, &COSTS##_tunings},
#include "aarch64-cores.def"
- {"generic", generic, cortexa53, AARCH64_ARCH_8A, 8,
+ {"generic", generic, cortexa53, AARCH64_ARCH_8A,
AARCH64_FL_FOR_ARCH8, &generic_tunings},
- {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, 0, NULL}
+ {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
};
-
-/* Target specification. These are populated by the -march, -mtune, -mcpu
- handling code or by target attributes. */
-static const struct processor *selected_arch;
-static const struct processor *selected_cpu;
-static const struct processor *selected_tune;
-
-enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A;
-
/* The current tuning set. */
struct tune_params aarch64_tune_params = generic_tunings;
@@ -10633,8 +10619,8 @@ aarch64_case_values_threshold (void)
/* Use the specified limit for the number of cases before using jump
tables at higher optimization levels. */
if (optimize > 2
- && selected_cpu->tune->max_case_values != 0)
- return selected_cpu->tune->max_case_values;
+ && aarch64_tune_params.max_case_values != 0)
+ return aarch64_tune_params.max_case_values;
else
return optimize_size ? 8 : 11;
}
@@ -17769,6 +17755,26 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
return;
}
+/* Return the CPU corresponding to the enum CPU. */
+
+static const struct processor *
+aarch64_get_tune_cpu (enum aarch64_processor cpu)
+{
+ gcc_assert (cpu != aarch64_none);
+
+ return &all_cores[cpu];
+}
+
+/* Return the architecture corresponding to the enum ARCH. */
+
+static const struct processor *
+aarch64_get_arch (enum aarch64_arch arch)
+{
+ gcc_assert (arch != aarch64_no_arch);
+
+ return &all_architectures[arch];
+}
+
/* Parse STRING looking for options in the format:
string :: option:string
option :: name=substring
@@ -17879,18 +17885,18 @@ aarch64_override_options_after_change_1 (struct gcc_options *opts)
void
aarch64_override_options_internal (struct gcc_options *opts)
{
- aarch64_tune_flags = selected_tune->flags;
- aarch64_tune = selected_tune->sched_core;
+ const struct processor *tune = aarch64_get_tune_cpu (opts->x_selected_tune);
+ aarch64_tune_flags = tune->flags;
+ aarch64_tune = tune->sched_core;
/* Make a copy of the tuning parameters attached to the core, which
we may later overwrite. */
- aarch64_tune_params = *(selected_tune->tune);
- aarch64_architecture_version = selected_arch->architecture_version;
- if (selected_tune->tune == &generic_tunings)
+ aarch64_tune_params = *(tune->tune);
+ if (tune->tune == &generic_tunings)
aarch64_adjust_generic_arch_tuning (aarch64_tune_params);
if (opts->x_aarch64_override_tune_string)
aarch64_parse_override_string (opts->x_aarch64_override_tune_string,
- &aarch64_tune_params);
+ &aarch64_tune_params);
/* This target defaults to strict volatile bitfields. */
if (opts->x_flag_strict_volatile_bitfields < 0 && abi_version_at_least (2))
@@ -18051,13 +18057,6 @@ aarch64_override_options_internal (struct gcc_options *opts)
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
opts->x_flag_prefetch_loop_arrays = 1;
- if (opts->x_aarch64_arch_string == NULL)
- opts->x_aarch64_arch_string = selected_arch->name;
- if (opts->x_aarch64_cpu_string == NULL)
- opts->x_aarch64_cpu_string = selected_cpu->name;
- if (opts->x_aarch64_tune_string == NULL)
- opts->x_aarch64_tune_string = selected_tune->name;
-
aarch64_override_options_after_change_1 (opts);
}
@@ -18409,26 +18408,6 @@ aarch64_validate_mtune (const char *str, const struct processor **res)
return false;
}
-/* Return the CPU corresponding to the enum CPU. */
-
-static const struct processor *
-aarch64_get_tune_cpu (enum aarch64_processor cpu)
-{
- gcc_assert (cpu != aarch64_none);
-
- return &all_cores[cpu];
-}
-
-/* Return the architecture corresponding to the enum ARCH. */
-
-static const struct processor *
-aarch64_get_arch (enum aarch64_arch arch)
-{
- gcc_assert (arch != aarch64_no_arch);
-
- return &all_architectures[arch];
-}
-
/* Return the VG value associated with -msve-vector-bits= value VALUE. */
static poly_uint16
@@ -18464,9 +18443,9 @@ aarch64_override_options (void)
uint64_t arch_isa = 0;
aarch64_isa_flags = 0;
- selected_cpu = NULL;
- selected_arch = NULL;
- selected_tune = NULL;
+ const struct processor *cpu = NULL;
+ const struct processor *arch = NULL;
+ const struct processor *tune = NULL;
if (aarch64_harden_sls_string)
aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
@@ -18478,56 +18457,52 @@ aarch64_override_options (void)
If either of -march or -mtune is given, they override their
respective component of -mcpu. */
if (aarch64_cpu_string)
- aarch64_validate_mcpu (aarch64_cpu_string, &selected_cpu, &cpu_isa);
+ aarch64_validate_mcpu (aarch64_cpu_string, &cpu, &cpu_isa);
if (aarch64_arch_string)
- aarch64_validate_march (aarch64_arch_string, &selected_arch, &arch_isa);
+ aarch64_validate_march (aarch64_arch_string, &arch, &arch_isa);
if (aarch64_tune_string)
- aarch64_validate_mtune (aarch64_tune_string, &selected_tune);
+ aarch64_validate_mtune (aarch64_tune_string, &tune);
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
- if (selected_cpu && selected_arch)
+ if (cpu && arch)
{
/* If both -mcpu and -march are specified, warn if they are not
architecturally compatible and prefer the -march ISA flags. */
- if (selected_arch->arch != selected_cpu->arch)
+ if (arch->arch != cpu->arch)
{
warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
aarch64_cpu_string,
aarch64_arch_string);
}
+ selected_arch = arch->arch;
aarch64_isa_flags = arch_isa;
}
- else if (selected_cpu)
+ else if (cpu)
{
- selected_arch = &all_architectures[selected_cpu->arch];
+ selected_arch = cpu->arch;
aarch64_isa_flags = cpu_isa;
}
- else if (selected_arch)
+ else if (arch)
{
- selected_cpu = &all_cores[selected_arch->ident];
+ cpu = &all_cores[arch->ident];
+ selected_arch = arch->arch;
aarch64_isa_flags = arch_isa;
}
else
{
/* No -mcpu or -march specified, so use the default CPU. */
- selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
- selected_arch = &all_architectures[selected_cpu->arch];
- aarch64_isa_flags = selected_cpu->flags;
+ cpu = &all_cores[TARGET_CPU_DEFAULT];
+ selected_arch = cpu->arch;
+ aarch64_isa_flags = cpu->flags;
}
- explicit_arch = selected_arch->arch;
- if (!selected_tune)
- selected_tune = selected_cpu;
- explicit_tune_core = selected_tune->ident;
-
- gcc_assert (explicit_tune_core != aarch64_none);
- gcc_assert (explicit_arch != aarch64_no_arch);
+ selected_tune = tune ? tune->ident : cpu->ident;
if (aarch64_enable_bti == 2)
{
@@ -18646,38 +18621,14 @@ initialize_aarch64_code_model (struct gcc_options *opts)
}
}
-/* Implement TARGET_OPTION_SAVE. */
-
-static void
-aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts,
- struct gcc_options */* opts_set */)
-{
- ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
- ptr->x_aarch64_branch_protection_string
- = opts->x_aarch64_branch_protection_string;
-}
-
/* Implements TARGET_OPTION_RESTORE. Restore the backend codegen decisions
using the information saved in PTR. */
static void
aarch64_option_restore (struct gcc_options *opts,
- struct gcc_options */* opts_set */,
- struct cl_target_option *ptr)
+ struct gcc_options * /* opts_set */,
+ struct cl_target_option * /* ptr */)
{
- opts->x_explicit_arch = ptr->x_explicit_arch;
- selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
- opts->x_explicit_tune_core = ptr->x_explicit_tune_core;
- selected_tune = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
- opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
- opts->x_aarch64_branch_protection_string
- = ptr->x_aarch64_branch_protection_string;
- if (opts->x_aarch64_branch_protection_string)
- {
- aarch64_parse_branch_protection (opts->x_aarch64_branch_protection_string,
- NULL);
- }
-
aarch64_override_options_internal (opts);
}
@@ -18687,11 +18638,11 @@ static void
aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
{
const struct processor *cpu
- = aarch64_get_tune_cpu (ptr->x_explicit_tune_core);
- uint64_t isa_flags = ptr->x_aarch64_isa_flags;
- const struct processor *arch = aarch64_get_arch (ptr->x_explicit_arch);
+ = aarch64_get_tune_cpu (ptr->x_selected_tune);
+ const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch);
std::string extension
- = aarch64_get_extension_string_for_isa_flags (isa_flags, arch->flags);
+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags,
+ arch->flags);
fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
fprintf (file, "%*sselected arch = %s%s\n", indent, "",
@@ -18804,8 +18755,7 @@ aarch64_handle_attr_arch (const char *str)
if (parse_res == AARCH64_PARSE_OK)
{
gcc_assert (tmp_arch);
- selected_arch = tmp_arch;
- explicit_arch = selected_arch->arch;
+ selected_arch = tmp_arch->arch;
return true;
}
@@ -18843,11 +18793,8 @@ aarch64_handle_attr_cpu (const char *str)
if (parse_res == AARCH64_PARSE_OK)
{
gcc_assert (tmp_cpu);
- selected_tune = tmp_cpu;
- explicit_tune_core = selected_tune->ident;
-
- selected_arch = &all_architectures[tmp_cpu->arch];
- explicit_arch = selected_arch->arch;
+ selected_tune = tmp_cpu->ident;
+ selected_arch = tmp_cpu->arch;
return true;
}
@@ -18915,8 +18862,7 @@ aarch64_handle_attr_tune (const char *str)
if (parse_res == AARCH64_PARSE_OK)
{
gcc_assert (tmp_tune);
- selected_tune = tmp_tune;
- explicit_tune_core = selected_tune->ident;
+ selected_tune = tmp_tune->ident;
return true;
}
@@ -22821,7 +22767,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
gcc_assert (targ_options);
const struct processor *this_arch
- = aarch64_get_arch (targ_options->x_explicit_arch);
+ = aarch64_get_arch (targ_options->x_selected_arch);
uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
std::string extension
@@ -22840,7 +22786,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
useful to readers of the generated asm. Do it only when it changes
from function to function and verbose assembly is requested. */
const struct processor *this_tune
- = aarch64_get_tune_cpu (targ_options->x_explicit_tune_core);
+ = aarch64_get_tune_cpu (targ_options->x_selected_tune);
if (flag_debug_asm && aarch64_last_printed_tune_string != this_tune->name)
{
@@ -22952,7 +22898,7 @@ aarch64_start_file (void)
= TREE_TARGET_OPTION (target_option_default_node);
const struct processor *default_arch
- = aarch64_get_arch (default_options->x_explicit_arch);
+ = aarch64_get_arch (default_options->x_selected_arch);
uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
std::string extension
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
@@ -27950,9 +27896,6 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_OFFLOAD_OPTIONS
#define TARGET_OFFLOAD_OPTIONS aarch64_offload_options
-#undef TARGET_OPTION_SAVE
-#define TARGET_OPTION_SAVE aarch64_option_save
-
#undef TARGET_OPTION_RESTORE
#define TARGET_OPTION_RESTORE aarch64_option_restore
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 14e2af054..7d73689e4 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -144,9 +144,6 @@
#define PCC_BITFIELD_TYPE_MATTERS 1
-/* Major revision number of the ARM Architecture implemented by the target. */
-extern unsigned aarch64_architecture_version;
-
/* Instruction tuning/selection flags. */
/* Bit values used to identify processor capabilities. */
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 101664c7c..836a3c784 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -22,13 +22,10 @@ HeaderInclude
config/aarch64/aarch64-opts.h
TargetVariable
-enum aarch64_processor explicit_tune_core = aarch64_none
+enum aarch64_processor selected_tune = aarch64_none
TargetVariable
-enum aarch64_arch explicit_arch = aarch64_no_arch
-
-TargetSave
-const char *x_aarch64_override_tune_string
+enum aarch64_arch selected_arch = aarch64_no_arch
TargetVariable
uint64_t aarch64_isa_flags = 0
@@ -36,6 +33,9 @@ uint64_t aarch64_isa_flags = 0
TargetVariable
unsigned aarch64_enable_bti = 2
+TargetVariable
+enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A
+
; The TLS dialect names to use with -mtls-dialect.
Enum
@@ -139,7 +139,7 @@ Target RejectNegative Joined Enum(aarch64_abi) Var(aarch64_abi) Init(AARCH64_ABI
Generate code that conforms to the specified ABI.
moverride=
-Target RejectNegative ToLower Joined Var(aarch64_override_tune_string)
+Target RejectNegative ToLower Joined Var(aarch64_override_tune_string) Save
-moverride=<string> Power users only! Override CPU optimization parameters.
Enum
--
2.33.0

View File

@ -0,0 +1,108 @@
From 0bfb7b0b745d0a9af13772ad48ccc102e557f95a Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Mon, 26 Sep 2022 10:10:25 +0100
Subject: [PATCH 007/157] [Backport][SME] aarch64: Add -march support for
Armv9.1-A, Armv9.2-A, Armv9.3-A
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c33e12fa479c01848f4a288883bf1ef848c94ca3
This is a straightforward patch that allows targeting the architecture revisions mentioned in the subject
through -march. These are already supported in binutils.
Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/ChangeLog:
* config/aarch64/aarch64-arches.def (armv9.1-a): Define.
(armv9.2-a): Likewise.
(armv9.3-a): Likewise.
* config/aarch64/aarch64.h (AARCH64_FL_V9_1): Likewise.
(AARCH64_FL_V9_2): Likewise.
(AARCH64_FL_V9_3): Likewise.
(AARCH64_FL_FOR_ARCH9_1): Likewise.
(AARCH64_FL_FOR_ARCH9_2): Likewise.
(AARCH64_FL_FOR_ARCH9_3): Likewise.
(AARCH64_ISA_V9_1): Likewise.
(AARCH64_ISA_V9_2): Likewise.
(AARCH64_ISA_V9_3): Likewise.
* doc/invoke.texi (AArch64 Options): Document armv9.1-a, armv9.2-a,
armv9.3-a values to -march.
---
gcc/config/aarch64/aarch64-arches.def | 3 +++
gcc/config/aarch64/aarch64.h | 18 ++++++++++++++++++
gcc/doc/invoke.texi | 3 +++
3 files changed, 24 insertions(+)
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index 3c2b16588..6150448dc 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -41,5 +41,8 @@ AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8
AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8)
AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R)
AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9)
+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1)
+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2)
+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3)
#undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7d73689e4..42aae37ef 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -239,6 +239,15 @@
/* Armv8.8-a architecture extensions. */
#define AARCH64_FL_V8_8 (1ULL << 45)
+/* Armv9.1-A. */
+#define AARCH64_FL_V9_1 (1ULL << 46)
+
+/* Armv9.2-A. */
+#define AARCH64_FL_V9_2 (1ULL << 47)
+
+/* Armv9.3-A. */
+#define AARCH64_FL_V9_3 (1ULL << 48)
+
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -274,6 +283,12 @@
#define AARCH64_FL_FOR_ARCH9 \
(AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
| AARCH64_FL_F16)
+#define AARCH64_FL_FOR_ARCH9_1 \
+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
+#define AARCH64_FL_FOR_ARCH9_2 \
+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
+#define AARCH64_FL_FOR_ARCH9_3 \
+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
/* Macros to test ISA flags. */
@@ -314,6 +329,9 @@
#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R)
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9)
+#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1)
+#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2)
+#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3)
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 17d9e4126..53709b246 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19176,6 +19176,9 @@ and the features that they enable by default:
@item @samp{armv8.7-a} @tab Armv8.7-A @tab @samp{armv8.6-a}, @samp{+ls64}
@item @samp{armv8.8-a} @tab Armv8.8-a @tab @samp{armv8.7-a}, @samp{+mops}
@item @samp{armv9-a} @tab Armv9-A @tab @samp{armv8.5-a}, @samp{+sve}, @samp{+sve2}
+@item @samp{armv9.1-a} @tab Armv9.1-A @tab @samp{armv9-a}, @samp{+bf16}, @samp{+i8mm}
+@item @samp{armv9.2-a} @tab Armv9.2-A @tab @samp{armv9.1-a}, @samp{+ls64}
+@item @samp{armv9.3-a} @tab Armv9.3-A @tab @samp{armv9.2-a}, @samp{+mops}
@item @samp{armv8-r} @tab Armv8-R @tab @samp{armv8-r}
@end multitable
--
2.33.0

View File

@ -0,0 +1,112 @@
From b36c8c41cab42d3df45197bb287f06381d660001 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Mon, 19 Feb 2024 19:27:29 +0800
Subject: [PATCH 008/157] [Backport][SME] Revert "aarch64: Define
__ARM_FEATURE_RCPC"
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=40a727379f3e8e6a83aea4e94c38dfa5dd8ef33d
Revert this commit to solve conflicts with later patches,
and will apply it later.
---
gcc/config/aarch64/aarch64-c.cc | 1 -
gcc/config/aarch64/aarch64-cores.def | 10 +++++-----
gcc/config/aarch64/aarch64.h | 4 +---
.../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 -------------------
4 files changed, 6 insertions(+), 29 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 90d45e45d..3d2fb5ec2 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -202,7 +202,6 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
- aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b11eb80..842d64932 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -134,17 +134,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO, saphira, 0x51, 0xC01, -1)
+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
/* ARMv8-A big.LITTLE implementations. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 42aae37ef..7c090c8f2 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -262,8 +262,7 @@
#define AARCH64_FL_FOR_ARCH8_2 \
(AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
#define AARCH64_FL_FOR_ARCH8_3 \
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH \
- | AARCH64_FL_RCPC)
+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
#define AARCH64_FL_FOR_ARCH8_4 \
(AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
@@ -314,7 +313,6 @@
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
-#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC)
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5)
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
index 307fa3d67..bfb044f5d 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
@@ -248,26 +248,6 @@
#error "__ARM_FEATURE_CRC32 is not defined but should be!"
#endif
-#pragma GCC target ("arch=armv8.2-a")
-#ifdef __ARM_FEATURE_RCPC
-#error "__ARM_FEATURE_RCPC is defined but should not be!"
-#endif
-
-#pragma GCC target ("arch=armv8.2-a+rcpc")
-#ifndef __ARM_FEATURE_RCPC
-#error "__ARM_FEATURE_RCPC is not defined but should be!"
-#endif
-
-#pragma GCC target ("+norcpc")
-#ifdef __ARM_FEATURE_RCPC
-#error "__ARM_FEATURE_RCPC is defined but should not be!"
-#endif
-
-#pragma GCC target ("arch=armv8.3-a")
-#ifndef __ARM_FEATURE_RCPC
-#error "__ARM_FEATURE_RCPC is not defined but should be!"
-#endif
-
int
foo (int a)
{
--
2.33.0

View File

@ -0,0 +1,39 @@
From 34374de5edde59f27a1b3b443e8a163fc5b528d7 Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Tue, 20 Feb 2024 10:13:06 +0800
Subject: [PATCH 009/157] [Backport][SME] Revert "Ampere-1 and Ampere-1A core
definition in aarch64-cores.def"
Revert it to solve conflicts with later patches, and will apply it
later. It's introduced by commit 3668a59ae22a and e9f0d974600e.
---
gcc/config/aarch64/aarch64-cores.def | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 842d64932..0402bfb74 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -69,8 +69,7 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH
AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
/* Ampere Computing ('\xC0') cores. */
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3, ampere1, 0xC0, 0xac3, -1)
-AARCH64_CORE("ampere1a", ampere1a, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_RNG | AARCH64_FL_SHA3 | AARCH64_FL_MEMTAG, ampere1a, 0xC0, 0xac4, -1)
+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
@@ -164,8 +163,7 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cor
/* Armv9.0-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG
- | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
--
2.33.0

View File

@ -0,0 +1,157 @@
From 244780570ebc85c44806559ba165d4a70a2333d1 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:50 +0100
Subject: [PATCH 010/157] [Backport][SME] aarch64: Rename AARCH64_ISA
architecture-level macros
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a4788ac3bae1467b0379852d5a6690a8496d0c9
All AARCH64_ISA_* architecture-level macros except AARCH64_ISA_V8_R
are for the A profile: they cause __ARM_ARCH_PROFILE to be set to
'A' and they are associated with architecture names like armv8.4-a.
It's convenient for later patches if we make this explicit
by adding an "A" to the name. Also, rather than add an underscore
(as for V8_R) it's more convenient to add the profile directly
to the number, like we already do in the ARCH_IDENT field of the
aarch64-arches.def entries.
gcc/
* config/aarch64/aarch64.h (AARCH64_ISA_V8_2, AARCH64_ISA_V8_3)
(AARCH64_ISA_V8_4, AARCH64_ISA_V8_5, AARCH64_ISA_V8_6)
(AARCH64_ISA_V9, AARCH64_ISA_V9_1, AARCH64_ISA_V9_2)
(AARCH64_ISA_V9_3): Add "A" to the end of the name.
(AARCH64_ISA_V8_R): Rename to AARCH64_ISA_V8R.
(TARGET_ARMV8_3, TARGET_JSCVT, TARGET_FRINT, TARGET_MEMTAG): Update
accordingly.
* common/config/aarch64/aarch64-common.cc
(aarch64_get_extension_string_for_isa_flags): Likewise.
* config/aarch64/aarch64-c.cc
(aarch64_define_unconditional_macros): Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
gcc/config/aarch64/aarch64-c.cc | 4 +--
gcc/config/aarch64/aarch64.h | 28 ++++++++++-----------
3 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 85ce8133b..3dc020f0c 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -506,7 +506,7 @@ aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
Note that assemblers with Armv8-R AArch64 support should not have this
issue, so we don't need this fix when targeting Armv8-R. */
- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8_R)
+ if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
isa_flag_bits |= AARCH64_ISA_CRC;
/* Pass Two:
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 3d2fb5ec2..18c9b975b 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -64,7 +64,7 @@ aarch64_define_unconditional_macros (cpp_reader *pfile)
builtin_define ("__ARM_ARCH_8A");
builtin_define_with_int_value ("__ARM_ARCH_PROFILE",
- AARCH64_ISA_V8_R ? 'R' : 'A');
+ AARCH64_ISA_V8R ? 'R' : 'A');
builtin_define ("__ARM_FEATURE_CLZ");
builtin_define ("__ARM_FEATURE_IDIV");
builtin_define ("__ARM_FEATURE_UNALIGNED");
@@ -82,7 +82,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
{
aarch64_def_or_undef (flag_unsafe_math_optimizations, "__ARM_FP_FAST", pfile);
- builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9 ? 9 : 8);
+ builtin_define_with_int_value ("__ARM_ARCH", AARCH64_ISA_V9A ? 9 : 8);
builtin_define_with_int_value ("__ARM_SIZEOF_MINIMAL_ENUM",
flag_short_enums ? 1 : 4);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 7c090c8f2..356a263b2 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -297,7 +297,7 @@
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
-#define AARCH64_ISA_V8_2 (aarch64_isa_flags & AARCH64_FL_V8_2)
+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2)
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
@@ -305,31 +305,31 @@
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
-#define AARCH64_ISA_V8_3 (aarch64_isa_flags & AARCH64_FL_V8_3)
+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
#define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2)
-#define AARCH64_ISA_V8_4 (aarch64_isa_flags & AARCH64_FL_V8_4)
+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4)
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
-#define AARCH64_ISA_V8_5 (aarch64_isa_flags & AARCH64_FL_V8_5)
+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5)
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
#define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG)
-#define AARCH64_ISA_V8_6 (aarch64_isa_flags & AARCH64_FL_V8_6)
+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6)
#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
-#define AARCH64_ISA_V8_R (aarch64_isa_flags & AARCH64_FL_V8_R)
+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R)
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
-#define AARCH64_ISA_V9 (aarch64_isa_flags & AARCH64_FL_V9)
-#define AARCH64_ISA_V9_1 (aarch64_isa_flags & AARCH64_FL_V9_1)
-#define AARCH64_ISA_V9_2 (aarch64_isa_flags & AARCH64_FL_V9_2)
-#define AARCH64_ISA_V9_3 (aarch64_isa_flags & AARCH64_FL_V9_3)
+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9)
+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1)
+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2)
+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3)
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
@@ -383,16 +383,16 @@
#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
/* ARMv8.3-A features. */
-#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3)
+#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
/* Javascript conversion instruction from Armv8.3-a. */
-#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3)
+#define TARGET_JSCVT (TARGET_FLOAT && AARCH64_ISA_V8_3A)
/* Armv8.3-a Complex number extension to AdvSIMD extensions. */
#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
/* Floating-point rounding instructions from Armv8.5-a. */
-#define TARGET_FRINT (AARCH64_ISA_V8_5 && TARGET_FLOAT)
+#define TARGET_FRINT (AARCH64_ISA_V8_5A && TARGET_FLOAT)
/* TME instructions are enabled. */
#define TARGET_TME (AARCH64_ISA_TME)
@@ -401,7 +401,7 @@
#define TARGET_RNG (AARCH64_ISA_RNG)
/* Memory Tagging instructions optional to Armv8.5 enabled through +memtag. */
-#define TARGET_MEMTAG (AARCH64_ISA_V8_5 && AARCH64_ISA_MEMTAG)
+#define TARGET_MEMTAG (AARCH64_ISA_V8_5A && AARCH64_ISA_MEMTAG)
/* I8MM instructions are enabled through +i8mm. */
#define TARGET_I8MM (AARCH64_ISA_I8MM)
--
2.33.0

View File

@ -0,0 +1,220 @@
From e1b067871c4c39565bf6059b4924a810923c6eeb Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:51 +0100
Subject: [PATCH 011/157] [Backport][SME] aarch64: Rename AARCH64_FL
architecture-level macros
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=78aaafc3d4dc0ef997b4747349d3836ca2f7e301
Following on from the previous AARCH64_ISA patch, this one adds the
profile name directly to the end of architecture-level AARCH64_FL_*
macros.
gcc/
* config/aarch64/aarch64.h (AARCH64_FL_V8_1, AARCH64_FL_V8_2)
(AARCH64_FL_V8_3, AARCH64_FL_V8_4, AARCH64_FL_V8_5, AARCH64_FL_V8_6)
(AARCH64_FL_V9, AARCH64_FL_V8_7, AARCH64_FL_V8_8, AARCH64_FL_V9_1)
(AARCH64_FL_V9_2, AARCH64_FL_V9_3): Add "A" to the end of the name.
(AARCH64_FL_V8_R): Rename to AARCH64_FL_V8R.
(AARCH64_FL_FOR_ARCH8_1, AARCH64_FL_FOR_ARCH8_2): Update accordingly.
(AARCH64_FL_FOR_ARCH8_3, AARCH64_FL_FOR_ARCH8_4): Likewise.
(AARCH64_FL_FOR_ARCH8_5, AARCH64_FL_FOR_ARCH8_6): Likewise.
(AARCH64_FL_FOR_ARCH8_7, AARCH64_FL_FOR_ARCH8_8): Likewise.
(AARCH64_FL_FOR_ARCH8_R, AARCH64_FL_FOR_ARCH9): Likewise.
(AARCH64_FL_FOR_ARCH9_1, AARCH64_FL_FOR_ARCH9_2): Likewise.
(AARCH64_FL_FOR_ARCH9_3, AARCH64_ISA_V8_2A, AARCH64_ISA_V8_3A)
(AARCH64_ISA_V8_4A, AARCH64_ISA_V8_5A, AARCH64_ISA_V8_6A): Likewise.
(AARCH64_ISA_V8R, AARCH64_ISA_V9A, AARCH64_ISA_V9_1A): Likewise.
(AARCH64_ISA_V9_2A, AARCH64_ISA_V9_3A): Likewise.
---
gcc/config/aarch64/aarch64.h | 72 ++++++++++++++++++------------------
1 file changed, 36 insertions(+), 36 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 356a263b2..5a91dfdd2 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -154,22 +154,22 @@
/* ARMv8.1-A architecture extensions. */
#define AARCH64_FL_LSE (1 << 4) /* Has Large System Extensions. */
#define AARCH64_FL_RDMA (1 << 5) /* Has Round Double Multiply Add. */
-#define AARCH64_FL_V8_1 (1 << 6) /* Has ARMv8.1-A extensions. */
+#define AARCH64_FL_V8_1A (1 << 6) /* Has ARMv8.1-A extensions. */
/* Armv8-R. */
-#define AARCH64_FL_V8_R (1 << 7) /* Armv8-R AArch64. */
+#define AARCH64_FL_V8R (1 << 7) /* Armv8-R AArch64. */
/* ARMv8.2-A architecture extensions. */
-#define AARCH64_FL_V8_2 (1 << 8) /* Has ARMv8.2-A features. */
+#define AARCH64_FL_V8_2A (1 << 8) /* Has ARMv8.2-A features. */
#define AARCH64_FL_F16 (1 << 9) /* Has ARMv8.2-A FP16 extensions. */
#define AARCH64_FL_SVE (1 << 10) /* Has Scalable Vector Extensions. */
/* ARMv8.3-A architecture extensions. */
-#define AARCH64_FL_V8_3 (1 << 11) /* Has ARMv8.3-A features. */
+#define AARCH64_FL_V8_3A (1 << 11) /* Has ARMv8.3-A features. */
#define AARCH64_FL_RCPC (1 << 12) /* Has support for RCpc model. */
#define AARCH64_FL_DOTPROD (1 << 13) /* Has ARMv8.2-A Dot Product ins. */
/* New flags to split crypto into aes and sha2. */
#define AARCH64_FL_AES (1 << 14) /* Has Crypto AES. */
#define AARCH64_FL_SHA2 (1 << 15) /* Has Crypto SHA2. */
/* ARMv8.4-A architecture extensions. */
-#define AARCH64_FL_V8_4 (1 << 16) /* Has ARMv8.4-A features. */
+#define AARCH64_FL_V8_4A (1 << 16) /* Has ARMv8.4-A features. */
#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */
#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */
#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */
@@ -179,7 +179,7 @@
#define AARCH64_FL_PROFILE (1 << 21)
/* ARMv8.5-A architecture extensions. */
-#define AARCH64_FL_V8_5 (1 << 22) /* Has ARMv8.5-A features. */
+#define AARCH64_FL_V8_5A (1 << 22) /* Has ARMv8.5-A features. */
#define AARCH64_FL_RNG (1 << 23) /* ARMv8.5-A Random Number Insns. */
#define AARCH64_FL_MEMTAG (1 << 24) /* ARMv8.5-A Memory Tagging
Extensions. */
@@ -204,7 +204,7 @@
#define AARCH64_FL_TME (1ULL << 33) /* Has TME instructions. */
/* Armv8.6-A architecture extensions. */
-#define AARCH64_FL_V8_6 (1ULL << 34)
+#define AARCH64_FL_V8_6A (1ULL << 34)
/* 8-bit Integer Matrix Multiply (I8MM) extensions. */
#define AARCH64_FL_I8MM (1ULL << 35)
@@ -225,28 +225,28 @@
#define AARCH64_FL_PAUTH (1ULL << 40)
/* Armv9.0-A. */
-#define AARCH64_FL_V9 (1ULL << 41) /* Armv9.0-A Architecture. */
+#define AARCH64_FL_V9A (1ULL << 41) /* Armv9.0-A Architecture. */
/* 64-byte atomic load/store extensions. */
#define AARCH64_FL_LS64 (1ULL << 42)
/* Armv8.7-a architecture extensions. */
-#define AARCH64_FL_V8_7 (1ULL << 43)
+#define AARCH64_FL_V8_7A (1ULL << 43)
/* Hardware memory operation instructions. */
#define AARCH64_FL_MOPS (1ULL << 44)
/* Armv8.8-a architecture extensions. */
-#define AARCH64_FL_V8_8 (1ULL << 45)
+#define AARCH64_FL_V8_8A (1ULL << 45)
/* Armv9.1-A. */
-#define AARCH64_FL_V9_1 (1ULL << 46)
+#define AARCH64_FL_V9_1A (1ULL << 46)
/* Armv9.2-A. */
-#define AARCH64_FL_V9_2 (1ULL << 47)
+#define AARCH64_FL_V9_2A (1ULL << 47)
/* Armv9.3-A. */
-#define AARCH64_FL_V9_3 (1ULL << 48)
+#define AARCH64_FL_V9_3A (1ULL << 48)
/* Has FP and SIMD. */
#define AARCH64_FL_FPSIMD (AARCH64_FL_FP | AARCH64_FL_SIMD)
@@ -258,36 +258,36 @@
#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
#define AARCH64_FL_FOR_ARCH8_1 \
(AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
- | AARCH64_FL_RDMA | AARCH64_FL_V8_1)
+ | AARCH64_FL_RDMA | AARCH64_FL_V8_1A)
#define AARCH64_FL_FOR_ARCH8_2 \
- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2)
+ (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A)
#define AARCH64_FL_FOR_ARCH8_3 \
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3 | AARCH64_FL_PAUTH)
+ (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
#define AARCH64_FL_FOR_ARCH8_4 \
- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4 | AARCH64_FL_F16FML \
+ (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
#define AARCH64_FL_FOR_ARCH8_5 \
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5 \
+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
#define AARCH64_FL_FOR_ARCH8_6 \
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6 | AARCH64_FL_FPSIMD \
+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
| AARCH64_FL_I8MM | AARCH64_FL_BF16)
#define AARCH64_FL_FOR_ARCH8_7 \
- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7 | AARCH64_FL_LS64)
+ (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
#define AARCH64_FL_FOR_ARCH8_8 \
- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8 | AARCH64_FL_MOPS)
+ (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
#define AARCH64_FL_FOR_ARCH8_R \
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_R)
+ (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R)
#define AARCH64_FL_FOR_ARCH9 \
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9 \
+ (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
| AARCH64_FL_F16)
#define AARCH64_FL_FOR_ARCH9_1 \
- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1)
+ (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A)
#define AARCH64_FL_FOR_ARCH9_2 \
- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2)
+ (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A)
#define AARCH64_FL_FOR_ARCH9_3 \
- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3)
+ (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A)
/* Macros to test ISA flags. */
@@ -297,7 +297,7 @@
#define AARCH64_ISA_SIMD (aarch64_isa_flags & AARCH64_FL_SIMD)
#define AARCH64_ISA_LSE (aarch64_isa_flags & AARCH64_FL_LSE)
#define AARCH64_ISA_RDMA (aarch64_isa_flags & AARCH64_FL_RDMA)
-#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2)
+#define AARCH64_ISA_V8_2A (aarch64_isa_flags & AARCH64_FL_V8_2A)
#define AARCH64_ISA_F16 (aarch64_isa_flags & AARCH64_FL_F16)
#define AARCH64_ISA_SVE (aarch64_isa_flags & AARCH64_FL_SVE)
#define AARCH64_ISA_SVE2 (aarch64_isa_flags & AARCH64_FL_SVE2)
@@ -305,31 +305,31 @@
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
-#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3)
+#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
#define AARCH64_ISA_SHA2 (aarch64_isa_flags & AARCH64_FL_SHA2)
-#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4)
+#define AARCH64_ISA_V8_4A (aarch64_isa_flags & AARCH64_FL_V8_4A)
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
-#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5)
+#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
#define AARCH64_ISA_MEMTAG (aarch64_isa_flags & AARCH64_FL_MEMTAG)
-#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6)
+#define AARCH64_ISA_V8_6A (aarch64_isa_flags & AARCH64_FL_V8_6A)
#define AARCH64_ISA_I8MM (aarch64_isa_flags & AARCH64_FL_I8MM)
#define AARCH64_ISA_F32MM (aarch64_isa_flags & AARCH64_FL_F32MM)
#define AARCH64_ISA_F64MM (aarch64_isa_flags & AARCH64_FL_F64MM)
#define AARCH64_ISA_BF16 (aarch64_isa_flags & AARCH64_FL_BF16)
#define AARCH64_ISA_SB (aarch64_isa_flags & AARCH64_FL_SB)
-#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8_R)
+#define AARCH64_ISA_V8R (aarch64_isa_flags & AARCH64_FL_V8R)
#define AARCH64_ISA_PAUTH (aarch64_isa_flags & AARCH64_FL_PAUTH)
-#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9)
-#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1)
-#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2)
-#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3)
+#define AARCH64_ISA_V9A (aarch64_isa_flags & AARCH64_FL_V9A)
+#define AARCH64_ISA_V9_1A (aarch64_isa_flags & AARCH64_FL_V9_1A)
+#define AARCH64_ISA_V9_2A (aarch64_isa_flags & AARCH64_FL_V9_2A)
+#define AARCH64_ISA_V9_3A (aarch64_isa_flags & AARCH64_FL_V9_3A)
#define AARCH64_ISA_MOPS (aarch64_isa_flags & AARCH64_FL_MOPS)
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
--
2.33.0

View File

@ -0,0 +1,398 @@
From 7da27deb7413d7d1fd2c543617640e2de5b10db0 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:51 +0100
Subject: [PATCH 012/157] [Backport][SME] aarch64: Rename AARCH64_FL_FOR_ARCH
macros
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f833d1900176509e16b6f5563cfe58508fef5d2
This patch renames AARCH64_FL_FOR_ARCH* macros to follow the
same V<number><profile> names that we (now) use elsewhere.
The names are only temporary -- a later patch will move the
information to the .def file instead. However, it helps with
the sequencing to do this first.
gcc/
* config/aarch64/aarch64.h (AARCH64_FL_FOR_ARCH8): Rename to...
(AARCH64_FL_FOR_V8A): ...this.
(AARCH64_FL_FOR_ARCH8_1): Rename to...
(AARCH64_FL_FOR_V8_1A): ...this.
(AARCH64_FL_FOR_ARCH8_2): Rename to...
(AARCH64_FL_FOR_V8_2A): ...this.
(AARCH64_FL_FOR_ARCH8_3): Rename to...
(AARCH64_FL_FOR_V8_3A): ...this.
(AARCH64_FL_FOR_ARCH8_4): Rename to...
(AARCH64_FL_FOR_V8_4A): ...this.
(AARCH64_FL_FOR_ARCH8_5): Rename to...
(AARCH64_FL_FOR_V8_5A): ...this.
(AARCH64_FL_FOR_ARCH8_6): Rename to...
(AARCH64_FL_FOR_V8_6A): ...this.
(AARCH64_FL_FOR_ARCH8_7): Rename to...
(AARCH64_FL_FOR_V8_7A): ...this.
(AARCH64_FL_FOR_ARCH8_8): Rename to...
(AARCH64_FL_FOR_V8_8A): ...this.
(AARCH64_FL_FOR_ARCH8_R): Rename to...
(AARCH64_FL_FOR_V8R): ...this.
(AARCH64_FL_FOR_ARCH9): Rename to...
(AARCH64_FL_FOR_V9A): ...this.
(AARCH64_FL_FOR_ARCH9_1): Rename to...
(AARCH64_FL_FOR_V9_1A): ...this.
(AARCH64_FL_FOR_ARCH9_2): Rename to...
(AARCH64_FL_FOR_V9_2A): ...this.
(AARCH64_FL_FOR_ARCH9_3): Rename to...
(AARCH64_FL_FOR_V9_3A): ...this.
* common/config/aarch64/aarch64-common.cc (all_cores): Update
accordingly.
* config/aarch64/aarch64-arches.def: Likewise.
* config/aarch64/aarch64-cores.def: Likewise.
* config/aarch64/aarch64.cc (all_cores): Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
gcc/config/aarch64/aarch64-arches.def | 28 ++---
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
gcc/config/aarch64/aarch64.cc | 2 +-
gcc/config/aarch64/aarch64.h | 56 ++++-----
5 files changed, 109 insertions(+), 109 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 3dc020f0c..0461201a5 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] =
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
{NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
#include "config/aarch64/aarch64-cores.def"
- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_ARCH8},
+ {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
{"", aarch64_no_arch, 0}
};
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index 6150448dc..c6bf7d82c 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -30,19 +30,19 @@
Due to the assumptions about the positions of these fields in config.gcc,
the NAME should be kept as the first argument and FLAGS as the last. */
-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_ARCH8)
-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_ARCH8_1)
-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_ARCH8_2)
-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_ARCH8_3)
-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_ARCH8_4)
-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_ARCH8_5)
-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_ARCH8_6)
-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_ARCH8_7)
-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_ARCH8_8)
-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_ARCH8_R)
-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_ARCH9)
-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_ARCH9_1)
-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_ARCH9_2)
-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_ARCH9_3)
+AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A)
+AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A)
+AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A)
+AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A)
+AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A)
+AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A)
+AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A)
+AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A)
+AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A)
+AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R)
+AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A)
+AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A)
+AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A)
+AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A)
#undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 0402bfb74..c4038c641 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -46,132 +46,132 @@
/* ARMv8-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
/* Do not swap around "thunderxt88p1" and "thunderxt88",
this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
/* OcteonTX is the official name for T81/T83. */
-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
/* Ampere Computing ('\xC0') cores. */
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_ARCH8_6, ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
/* APM ('P') cores. */
-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xgene1, 0x50, 0x000, -1)
+AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
/* Samsung ('S') cores. */
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
/* HXT ('h') cores. */
-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
+AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
/* ARMv8.1-A Architecture Processors. */
/* Broadcom ('B') cores. */
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
/* ARMv8.2-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
/* Fujitsu ('F') cores. */
-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
+AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
/* ARMv8-A big.LITTLE implementations. */
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
/* ARM DynamIQ big.LITTLE configurations. */
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
/* Armv8-R Architecture Processors. */
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_ARCH8_R, cortexa53, 0x41, 0xd15, -1)
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
/* Armv9.0-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 254ecfaa2..3714c1047 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2949,7 +2949,7 @@ static const struct processor all_cores[] =
FLAGS, &COSTS##_tunings},
#include "aarch64-cores.def"
{"generic", generic, cortexa53, AARCH64_ARCH_8A,
- AARCH64_FL_FOR_ARCH8, &generic_tunings},
+ AARCH64_FL_FOR_V8A, &generic_tunings},
{NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
};
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 5a91dfdd2..918a14193 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -255,39 +255,39 @@
#define AARCH64_FL_FPQ16 (AARCH64_FL_FP & ~AARCH64_FL_SIMD)
/* Architecture flags that effect instruction selection. */
-#define AARCH64_FL_FOR_ARCH8 (AARCH64_FL_FPSIMD)
-#define AARCH64_FL_FOR_ARCH8_1 \
- (AARCH64_FL_FOR_ARCH8 | AARCH64_FL_LSE | AARCH64_FL_CRC \
+#define AARCH64_FL_FOR_V8A (AARCH64_FL_FPSIMD)
+#define AARCH64_FL_FOR_V8_1A \
+ (AARCH64_FL_FOR_V8A | AARCH64_FL_LSE | AARCH64_FL_CRC \
| AARCH64_FL_RDMA | AARCH64_FL_V8_1A)
-#define AARCH64_FL_FOR_ARCH8_2 \
- (AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_V8_2A)
-#define AARCH64_FL_FOR_ARCH8_3 \
- (AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
-#define AARCH64_FL_FOR_ARCH8_4 \
- (AARCH64_FL_FOR_ARCH8_3 | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
+#define AARCH64_FL_FOR_V8_2A \
+ (AARCH64_FL_FOR_V8_1A | AARCH64_FL_V8_2A)
+#define AARCH64_FL_FOR_V8_3A \
+ (AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
+#define AARCH64_FL_FOR_V8_4A \
+ (AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
| AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
-#define AARCH64_FL_FOR_ARCH8_5 \
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8_5A \
+#define AARCH64_FL_FOR_V8_5A \
+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
-#define AARCH64_FL_FOR_ARCH8_6 \
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
+#define AARCH64_FL_FOR_V8_6A \
+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_V8_6A | AARCH64_FL_FPSIMD \
| AARCH64_FL_I8MM | AARCH64_FL_BF16)
-#define AARCH64_FL_FOR_ARCH8_7 \
- (AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
-#define AARCH64_FL_FOR_ARCH8_8 \
- (AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
-
-#define AARCH64_FL_FOR_ARCH8_R \
- (AARCH64_FL_FOR_ARCH8_4 | AARCH64_FL_V8R)
-#define AARCH64_FL_FOR_ARCH9 \
- (AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
+#define AARCH64_FL_FOR_V8_7A \
+ (AARCH64_FL_FOR_V8_6A | AARCH64_FL_V8_7A | AARCH64_FL_LS64)
+#define AARCH64_FL_FOR_V8_8A \
+ (AARCH64_FL_FOR_V8_7A | AARCH64_FL_V8_8A | AARCH64_FL_MOPS)
+
+#define AARCH64_FL_FOR_V8R \
+ (AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8R)
+#define AARCH64_FL_FOR_V9A \
+ (AARCH64_FL_FOR_V8_5A | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_V9A \
| AARCH64_FL_F16)
-#define AARCH64_FL_FOR_ARCH9_1 \
- (AARCH64_FL_FOR_ARCH9 | AARCH64_FL_FOR_ARCH8_6 | AARCH64_FL_V9_1A)
-#define AARCH64_FL_FOR_ARCH9_2 \
- (AARCH64_FL_FOR_ARCH9_1 | AARCH64_FL_FOR_ARCH8_7 | AARCH64_FL_V9_2A)
-#define AARCH64_FL_FOR_ARCH9_3 \
- (AARCH64_FL_FOR_ARCH9_2 | AARCH64_FL_FOR_ARCH8_8 | AARCH64_FL_V9_3A)
+#define AARCH64_FL_FOR_V9_1A \
+ (AARCH64_FL_FOR_V9A | AARCH64_FL_FOR_V8_6A | AARCH64_FL_V9_1A)
+#define AARCH64_FL_FOR_V9_2A \
+ (AARCH64_FL_FOR_V9_1A | AARCH64_FL_FOR_V8_7A | AARCH64_FL_V9_2A)
+#define AARCH64_FL_FOR_V9_3A \
+ (AARCH64_FL_FOR_V9_2A | AARCH64_FL_FOR_V8_8A | AARCH64_FL_V9_3A)
/* Macros to test ISA flags. */
--
2.33.0

View File

@ -0,0 +1,315 @@
From ed8ce0b31f2b608f0360af1ffd5375ea7809aba7 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:52 +0100
Subject: [PATCH 013/157] [Backport][SME] aarch64: Add "V" to
aarch64-arches.def names
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=00c22ba69d8e738a4789b30165ff9c925c508fc1
This patch completes the renaming of architecture-level related
things by adding "V" to the name of the architecture in
aarch64-arches.def. Since the "V" is predictable, we can easily
drop it when we don't need it (as when matching /proc/cpuinfo).
Having a valid C identifier is necessary for later patches.
gcc/
* config/aarch64/aarch64-arches.def: Add a leading "V" to the
ARCH_IDENT fields.
* config/aarch64/aarch64-cores.def: Update accordingly.
* common/config/aarch64/aarch64-common.cc (all_cores): Likewise.
* config/aarch64/aarch64.cc (all_cores): Likewise.
* config/aarch64/driver-aarch64.cc (aarch64_arches): Skip the
leading "V".
---
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
gcc/config/aarch64/aarch64-arches.def | 28 ++---
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
gcc/config/aarch64/aarch64.cc | 2 +-
gcc/config/aarch64/driver-aarch64.cc | 3 +-
5 files changed, 83 insertions(+), 82 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 0461201a5..6ca89d31f 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -253,7 +253,7 @@ static const struct processor_name_to_arch all_cores[] =
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
{NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
#include "config/aarch64/aarch64-cores.def"
- {"generic", AARCH64_ARCH_8A, AARCH64_FL_FOR_V8A},
+ {"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
{"", aarch64_no_arch, 0}
};
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index c6bf7d82c..e42202822 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -30,19 +30,19 @@
Due to the assumptions about the positions of these fields in config.gcc,
the NAME should be kept as the first argument and FLAGS as the last. */
-AARCH64_ARCH("armv8-a", generic, 8A, 8, AARCH64_FL_FOR_V8A)
-AARCH64_ARCH("armv8.1-a", generic, 8_1A, 8, AARCH64_FL_FOR_V8_1A)
-AARCH64_ARCH("armv8.2-a", generic, 8_2A, 8, AARCH64_FL_FOR_V8_2A)
-AARCH64_ARCH("armv8.3-a", generic, 8_3A, 8, AARCH64_FL_FOR_V8_3A)
-AARCH64_ARCH("armv8.4-a", generic, 8_4A, 8, AARCH64_FL_FOR_V8_4A)
-AARCH64_ARCH("armv8.5-a", generic, 8_5A, 8, AARCH64_FL_FOR_V8_5A)
-AARCH64_ARCH("armv8.6-a", generic, 8_6A, 8, AARCH64_FL_FOR_V8_6A)
-AARCH64_ARCH("armv8.7-a", generic, 8_7A, 8, AARCH64_FL_FOR_V8_7A)
-AARCH64_ARCH("armv8.8-a", generic, 8_8A, 8, AARCH64_FL_FOR_V8_8A)
-AARCH64_ARCH("armv8-r", generic, 8R , 8, AARCH64_FL_FOR_V8R)
-AARCH64_ARCH("armv9-a", generic, 9A , 9, AARCH64_FL_FOR_V9A)
-AARCH64_ARCH("armv9.1-a", generic, 9_1A, 9, AARCH64_FL_FOR_V9_1A)
-AARCH64_ARCH("armv9.2-a", generic, 9_2A, 9, AARCH64_FL_FOR_V9_2A)
-AARCH64_ARCH("armv9.3-a", generic, 9_3A, 9, AARCH64_FL_FOR_V9_3A)
+AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A)
+AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A)
+AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, AARCH64_FL_FOR_V8_2A)
+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, AARCH64_FL_FOR_V8_3A)
+AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, AARCH64_FL_FOR_V8_4A)
+AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, AARCH64_FL_FOR_V8_5A)
+AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, AARCH64_FL_FOR_V8_6A)
+AARCH64_ARCH("armv8.7-a", generic, V8_7A, 8, AARCH64_FL_FOR_V8_7A)
+AARCH64_ARCH("armv8.8-a", generic, V8_8A, 8, AARCH64_FL_FOR_V8_8A)
+AARCH64_ARCH("armv8-r", generic, V8R , 8, AARCH64_FL_FOR_V8R)
+AARCH64_ARCH("armv9-a", generic, V9A , 9, AARCH64_FL_FOR_V9A)
+AARCH64_ARCH("armv9.1-a", generic, V9_1A, 9, AARCH64_FL_FOR_V9_1A)
+AARCH64_ARCH("armv9.2-a", generic, V9_2A, 9, AARCH64_FL_FOR_V9_2A)
+AARCH64_ARCH("armv9.3-a", generic, V9_3A, 9, AARCH64_FL_FOR_V9_3A)
#undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index c4038c641..f4c2f4ea4 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -46,132 +46,132 @@
/* ARMv8-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx", thunderx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
/* Do not swap around "thunderxt88p1" and "thunderxt88",
this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
/* OcteonTX is the official name for T81/T83. */
-AARCH64_CORE("octeontx", octeontx, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
/* Ampere Computing ('\xC0') cores. */
-AARCH64_CORE("ampere1", ampere1, cortexa57, 8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
-AARCH64_CORE("emag", emag, xgene1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
/* APM ('P') cores. */
-AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor", falkor, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
/* Samsung ('S') cores. */
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
/* HXT ('h') cores. */
-AARCH64_CORE("phecda", phecda, falkor, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
/* ARMv8.1-A Architecture Processors. */
/* Broadcom ('B') cores. */
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, 8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
/* ARMv8.2-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
-AARCH64_CORE("ares", ares, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
/* Fujitsu ('F') cores. */
-AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, 8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
/* ARMv8-A big.LITTLE implementations. */
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, 8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
/* ARM DynamIQ big.LITTLE configurations. */
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, 8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
/* Armv8-R Architecture Processors. */
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, 8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
/* Armv9.0-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3714c1047..22b51e12f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2948,7 +2948,7 @@ static const struct processor all_cores[] =
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
FLAGS, &COSTS##_tunings},
#include "aarch64-cores.def"
- {"generic", generic, cortexa53, AARCH64_ARCH_8A,
+ {"generic", generic, cortexa53, AARCH64_ARCH_V8A,
AARCH64_FL_FOR_V8A, &generic_tunings},
{NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
};
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index d714a8bda..644780ef2 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -78,8 +78,9 @@ struct aarch64_arch_driver_info
const uint64_t flags;
};
+/* Skip the leading "V" in the architecture name. */
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
- { #ARCH_IDENT, NAME, FLAGS },
+ { #ARCH_IDENT + 1, NAME, FLAGS },
static struct aarch64_arch_driver_info aarch64_arches[] =
{
--
2.33.0

View File

@ -0,0 +1,55 @@
From aac2b2d4191d08a107c3ff8d98602355988a5558 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:52 +0100
Subject: [PATCH 014/157] [Backport][SME] aarch64: Small config.gcc cleanups
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0af214b447529453b356e8e480d7d35b3e642f0e
The aarch64-option-extensions.def parsing in config.gcc had
some code left over from when it tried to parse the whole
macro definition. Also, config.gcc now only looks at the
first fields of the aarch64-arches.def entries.
gcc/
* config.gcc: Remove dead aarch64-option-extensions.def code.
* config/aarch64/aarch64-arches.def: Update comment.
---
gcc/config.gcc | 8 --------
gcc/config/aarch64/aarch64-arches.def | 2 +-
2 files changed, 1 insertion(+), 9 deletions(-)
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 3be450471..da66603cd 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -4210,14 +4210,6 @@ case "${target}" in
options_parsed="`$ac_cv_prog_CPP -D"$opt_macro" -x c \
${srcdir}/config/aarch64/aarch64-option-extensions.def`"
- # Match one element inside AARCH64_OPT_EXTENSION, we
- # consume anything that's not a ,.
- elem="[ ]*\([^,]\+\)[ ]*"
-
- # Repeat the pattern for the number of entries in the
- # AARCH64_OPT_EXTENSION, currently 6 times.
- sed_patt="^$elem,$elem,$elem,$elem,$elem,$elem"
-
while [ x"$ext_val" != x ]
do
ext_val=`echo $ext_val | sed -e 's/\+//'`
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index e42202822..ece96e22a 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -28,7 +28,7 @@
ARCH_REV is an integer specifying the architecture major revision.
FLAGS are the flags implied by the architecture.
Due to the assumptions about the positions of these fields in config.gcc,
- the NAME should be kept as the first argument and FLAGS as the last. */
+ NAME should be kept as the first argument. */
AARCH64_ARCH("armv8-a", generic, V8A, 8, AARCH64_FL_FOR_V8A)
AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, AARCH64_FL_FOR_V8_1A)
--
2.33.0

View File

@ -0,0 +1,273 @@
From f6f28c50045f672a35f5b7344b556fc45dc0b3a1 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:53 +0100
Subject: [PATCH 015/157] [Backport][SME] aarch64: Avoid redundancy in
aarch64-cores.def
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=198bb6ed327c74eb2b0450bf978e4e6a64a6406c
The flags fields of the aarch64-cores.def always start with
AARCH64_FL_FOR_<ARCH>. After previous changes, <ARCH> is always
identical to the previous field, so we can drop the explicit
AARCH64_FL_FOR_<ARCH> and derive it programmatically.
This isn't a big saving in itself, but it helps with later patches.
gcc/
* config/aarch64/aarch64-cores.def: Remove AARCH64_FL_FOR_<ARCH>
from the flags field.
* common/config/aarch64/aarch64-common.cc (all_cores): Add it
here instead.
* config/aarch64/aarch64.cc (all_cores): Likewise.
* config/aarch64/driver-aarch64.cc (all_cores): Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 2 +-
gcc/config/aarch64/aarch64-cores.def | 130 ++++++++++----------
gcc/config/aarch64/aarch64.cc | 2 +-
gcc/config/aarch64/driver-aarch64.cc | 2 +-
4 files changed, 68 insertions(+), 68 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 6ca89d31f..a965ac660 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -251,7 +251,7 @@ struct arch_to_arch_name
static const struct processor_name_to_arch all_cores[] =
{
#define AARCH64_CORE(NAME, X, IDENT, ARCH_IDENT, FLAGS, COSTS, IMP, PART, VARIANT) \
- {NAME, AARCH64_ARCH_##ARCH_IDENT, FLAGS},
+ {NAME, AARCH64_ARCH_##ARCH_IDENT, AARCH64_FL_FOR_##ARCH_IDENT | FLAGS},
#include "config/aarch64/aarch64-cores.def"
{"generic", AARCH64_ARCH_V8A, AARCH64_FL_FOR_V8A},
{"", aarch64_no_arch, 0}
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f4c2f4ea4..008b0b8c1 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -46,132 +46,132 @@
/* ARMv8-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
-AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
-AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
-AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
-AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
-AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
+AARCH64_CORE("cortex-a34", cortexa34, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd02, -1)
+AARCH64_CORE("cortex-a35", cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa35, 0x41, 0xd04, -1)
+AARCH64_CORE("cortex-a53", cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa53, 0x41, 0xd03, -1)
+AARCH64_CORE("cortex-a57", cortexa57, cortexa57, V8A, AARCH64_FL_CRC, cortexa57, 0x41, 0xd07, -1)
+AARCH64_CORE("cortex-a72", cortexa72, cortexa57, V8A, AARCH64_FL_CRC, cortexa72, 0x41, 0xd08, -1)
+AARCH64_CORE("cortex-a73", cortexa73, cortexa57, V8A, AARCH64_FL_CRC, cortexa73, 0x41, 0xd09, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("thunderx", thunderx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
/* Do not swap around "thunderxt88p1" and "thunderxt88",
this order is required to handle variant correctly. */
-AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
-AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
+AARCH64_CORE("thunderxt88p1", thunderxt88p1, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, 0)
+AARCH64_CORE("thunderxt88", thunderxt88, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderxt88, 0x43, 0x0a1, -1)
/* OcteonTX is the official name for T81/T83. */
-AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
-AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("octeontx", octeontx, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a0, -1)
+AARCH64_CORE("octeontx81", octeontxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("octeontx83", octeontxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
-AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
-AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
+AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a2, -1)
+AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx, 0x43, 0x0a3, -1)
/* Ampere Computing ('\xC0') cores. */
-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, AARCH64_FL_FOR_V8_6A, ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, 0, ampere1, 0xC0, 0xac3, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
-AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
+AARCH64_CORE("emag", emag, xgene1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, emag, 0x50, 0x000, 3)
/* APM ('P') cores. */
-AARCH64_CORE("xgene1", xgene1, xgene1, V8A, AARCH64_FL_FOR_V8A, xgene1, 0x50, 0x000, -1)
+AARCH64_CORE("xgene1", xgene1, xgene1, V8A, 0, xgene1, 0x50, 0x000, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("falkor", falkor, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx", qdf24xx, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO | AARCH64_FL_RDMA, qdf24xx, 0x51, 0xC00, -1)
/* Samsung ('S') cores. */
-AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
+AARCH64_CORE("exynos-m1", exynosm1, exynosm1, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1, 0x53, 0x001, -1)
/* HXT ('h') cores. */
-AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
+AARCH64_CORE("phecda", phecda, falkor, V8A, AARCH64_FL_CRC | AARCH64_FL_CRYPTO, qdf24xx, 0x68, 0x000, -1)
/* ARMv8.1-A Architecture Processors. */
/* Broadcom ('B') cores. */
-AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
-AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("thunderx2t99p1", thunderx2t99p1, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
+AARCH64_CORE("vulcan", vulcan, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x42, 0x516, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_FOR_V8_1A | AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
+AARCH64_CORE("thunderx2t99", thunderx2t99, thunderx2t99, V8_1A, AARCH64_FL_CRYPTO, thunderx2t99, 0x43, 0x0af, -1)
/* ARMv8.2-A Architecture Processors. */
/* ARM ('A') cores. */
-AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
-AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
-AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
-AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
-AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
-AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
-AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
-AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
-AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
-AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
-AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
-AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
-AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
+AARCH64_CORE("cortex-a55", cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa53, 0x41, 0xd05, -1)
+AARCH64_CORE("cortex-a75", cortexa75, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, 0xd0a, -1)
+AARCH64_CORE("cortex-a76", cortexa76, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, 0xd0b, -1)
+AARCH64_CORE("cortex-a76ae", cortexa76ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0e, -1)
+AARCH64_CORE("cortex-a77", cortexa77, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, neoversen1, 0x41, 0xd0d, -1)
+AARCH64_CORE("cortex-a78", cortexa78, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd41, -1)
+AARCH64_CORE("cortex-a78ae", cortexa78ae, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd42, -1)
+AARCH64_CORE("cortex-a78c", cortexa78c, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE | AARCH64_FL_FLAGM | AARCH64_FL_PAUTH, neoversen1, 0x41, 0xd4b, -1)
+AARCH64_CORE("cortex-a65", cortexa65, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd06, -1)
+AARCH64_CORE("cortex-a65ae", cortexa65ae, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd43, -1)
+AARCH64_CORE("cortex-x1", cortexx1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd44, -1)
+AARCH64_CORE("ares", ares, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-n1", neoversen1, cortexa57, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
+AARCH64_CORE("neoverse-e1", neoversee1, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD | AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
/* Cavium ('C') cores. */
-AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
-AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
-AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2", octeontx2, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98", octeontx2t98, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96", octeontx2t96, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
-AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
-AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
-AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
-AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
+AARCH64_CORE("octeontx2t93", octeontx2t93, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95", octeontx2f95, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n", octeontx2f95n, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 0x43, 0x0b5, -1)
/* Fujitsu ('F') cores. */
-AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
+AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A, AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_FOR_V8_3A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, AARCH64_FL_SVE | AARCH64_FL_RCPC | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_PROFILE | AARCH64_FL_SSBS | AARCH64_FL_RNG, neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_FOR_V8_4A | AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC, saphira, 0x51, 0xC01, -1)
/* ARMv8-A big.LITTLE implementations. */
-AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
-AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
-AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
-AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_FOR_V8A | AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
+AARCH64_CORE("cortex-a57.cortex-a53", cortexa57cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa57, 0x41, AARCH64_BIG_LITTLE (0xd07, 0xd03), -1)
+AARCH64_CORE("cortex-a72.cortex-a53", cortexa72cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa72, 0x41, AARCH64_BIG_LITTLE (0xd08, 0xd03), -1)
+AARCH64_CORE("cortex-a73.cortex-a35", cortexa73cortexa35, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd04), -1)
+AARCH64_CORE("cortex-a73.cortex-a53", cortexa73cortexa53, cortexa53, V8A, AARCH64_FL_CRC, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd09, 0xd03), -1)
/* ARM DynamIQ big.LITTLE configurations. */
-AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
-AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_FOR_V8_2A | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
+AARCH64_CORE("cortex-a75.cortex-a55", cortexa75cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, cortexa73, 0x41, AARCH64_BIG_LITTLE (0xd0a, 0xd05), -1)
+AARCH64_CORE("cortex-a76.cortex-a55", cortexa76cortexa55, cortexa53, V8_2A, AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD, neoversen1, 0x41, AARCH64_BIG_LITTLE (0xd0b, 0xd05), -1)
/* Armv8-R Architecture Processors. */
-AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, AARCH64_FL_FOR_V8R, cortexa53, 0x41, 0xd15, -1)
+AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, 0, cortexa53, 0x41, 0xd15, -1)
/* Armv9.0-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
+AARCH64_CORE("cortex-a510", cortexa510, cortexa55, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, cortexa53, 0x41, 0xd46, -1)
-AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
+AARCH64_CORE("cortex-a710", cortexa710, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd47, -1)
-AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+AARCH64_CORE("cortex-x2", cortexx2, cortexa57, V9A, AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
-AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
-AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_FOR_V9A | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("demeter", demeter, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
#undef AARCH64_CORE
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 22b51e12f..f975aad07 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2946,7 +2946,7 @@ static const struct processor all_cores[] =
{
#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
{NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
- FLAGS, &COSTS##_tunings},
+ AARCH64_FL_FOR_##ARCH | FLAGS, &COSTS##_tunings},
#include "aarch64-cores.def"
{"generic", generic, cortexa53, AARCH64_ARCH_V8A,
AARCH64_FL_FOR_V8A, &generic_tunings},
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index 644780ef2..97690de62 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -62,7 +62,7 @@ struct aarch64_core_data
#define DEFAULT_ARCH "8A"
#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
- { CORE_NAME, #ARCH, IMP, PART, VARIANT, FLAGS },
+ { CORE_NAME, #ARCH, IMP, PART, VARIANT, AARCH64_FL_FOR_##ARCH | FLAGS },
static struct aarch64_core_data aarch64_cpu_data[] =
{
--
2.33.0

View File

@ -0,0 +1,83 @@
From f6137d5be2761caea75dcc1c98d941ceec161456 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:53 +0100
Subject: [PATCH 016/157] [Backport][SME] aarch64: Remove AARCH64_FL_RCPC8_4
[PR107025]
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0f244d848cffeda68f0eb4c5bb9c7e629bf2e957
AARCH64_FL_RCPC8_4 is an odd-one-out in that it has no associated
entry in aarch64-option-extensions.def. This means that, although
it is internally separated from AARCH64_FL_V8_4A, there is no
mechanism for turning it on and off individually, independently
of armv8.4-a.
The only place that the flag was used independently was in the
entry for thunderx3t110, which enabled it alongside V8_3A.
As noted in PR107025, this means that any use of the extension
will fail to assemble.
In the PR trail, Andrew suggested removing the core entry.
That might be best long-term, but since the barrier for removing
command-line options without a deprecation period is very high,
this patch instead just drops the flag from the core entry.
We'll still produce correct code.
gcc/
PR target/107025
* config/aarch64/aarch64.h (oAARCH64_FL_RCPC8_4): Delete.
(AARCH64_FL_FOR_V8_4A): Update accordingly.
(AARCH64_ISA_RCPC8_4): Use AARCH64_FL_V8_4A directly.
* config/aarch64/aarch64-cores.def (thunderx3t110): Remove
AARCH64_FL_RCPC8_4.
---
gcc/config/aarch64/aarch64-cores.def | 2 +-
gcc/config/aarch64/aarch64.h | 5 ++---
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 008b0b8c1..cf500d0a9 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -133,7 +133,7 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, AARCH64_FL_CRYPTO | AARCH64_FL_F
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML | AARCH64_FL_RCPC8_4, thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, AARCH64_FL_CRYPTO | AARCH64_FL_RCPC | AARCH64_FL_SM4 | AARCH64_FL_SHA3 | AARCH64_FL_F16FML, thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 918a14193..f4e0cd148 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -173,7 +173,6 @@
#define AARCH64_FL_SM4 (1 << 17) /* Has ARMv8.4-A SM3 and SM4. */
#define AARCH64_FL_SHA3 (1 << 18) /* Has ARMv8.4-a SHA3 and SHA512. */
#define AARCH64_FL_F16FML (1 << 19) /* Has ARMv8.4-a FP16 extensions. */
-#define AARCH64_FL_RCPC8_4 (1 << 20) /* Has ARMv8.4-a RCPC extensions. */
/* Statistical Profiling extensions. */
#define AARCH64_FL_PROFILE (1 << 21)
@@ -265,7 +264,7 @@
(AARCH64_FL_FOR_V8_2A | AARCH64_FL_V8_3A | AARCH64_FL_PAUTH)
#define AARCH64_FL_FOR_V8_4A \
(AARCH64_FL_FOR_V8_3A | AARCH64_FL_V8_4A | AARCH64_FL_F16FML \
- | AARCH64_FL_DOTPROD | AARCH64_FL_RCPC8_4 | AARCH64_FL_FLAGM)
+ | AARCH64_FL_DOTPROD | AARCH64_FL_FLAGM)
#define AARCH64_FL_FOR_V8_5A \
(AARCH64_FL_FOR_V8_4A | AARCH64_FL_V8_5A \
| AARCH64_FL_SB | AARCH64_FL_SSBS | AARCH64_FL_PREDRES)
@@ -313,7 +312,7 @@
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
-#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_RCPC8_4)
+#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A)
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
#define AARCH64_ISA_TME (aarch64_isa_flags & AARCH64_FL_TME)
--
2.33.0

View File

@ -0,0 +1,154 @@
From c6698a5feb07fc0cda89a54a0ee4006295ac6dbe Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:53 +0100
Subject: [PATCH 017/157] [Backport][SME] aarch64: Fix transitive closure of
features
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=b754d32d3053a4ba2a82361ac0f2739797a811f1
aarch64-option-extensions.def requires us to maintain the transitive
closure of options by hand. This patch fixes a few cases where a
flag was missed.
+noaes and +nosha2 now disable +crypto, which IMO makes more
sense and is consistent with the Clang behaviour.
gcc/
* config/aarch64/aarch64-option-extensions.def (dotprod): Depend
on fp as well as simd.
(sha3): Likewise.
(aes): Likewise. Make +noaes disable crypto.
(sha2): Likewise +nosha2. Also make +nosha2 disable sha3 and
sve2-sha3.
(sve2-sha3): Depend on sha2 as well as sha3.
gcc/testsuite/
* gcc.target/aarch64/options_set_6.c: Expect +crypto+nosha2 to
disable crypto but keep aes.
* gcc.target/aarch64/pragma_cpp_predefs_4.c: New test.
---
.../aarch64/aarch64-option-extensions.def | 16 ++++---
.../gcc.target/aarch64/options_set_6.c | 5 +-
.../gcc.target/aarch64/pragma_cpp_predefs_4.c | 47 +++++++++++++++++++
3 files changed, 58 insertions(+), 10 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index b4d0ac8b6..b98008127 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -113,28 +113,29 @@ AARCH64_OPT_EXTENSION("rdma", AARCH64_FL_RDMA, \
/* Enabling "dotprod" also enables "simd".
Disabling "dotprod" only disables "dotprod". */
-AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_SIMD, 0, \
+AARCH64_OPT_EXTENSION("dotprod", AARCH64_FL_DOTPROD, AARCH64_FL_FPSIMD, 0, \
false, "asimddp")
/* Enabling "aes" also enables "simd".
Disabling "aes" disables "aes" and "sve2-aes'. */
-AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_SIMD, \
- AARCH64_FL_SVE2_AES, false, "aes")
+AARCH64_OPT_EXTENSION("aes", AARCH64_FL_AES, AARCH64_FL_FPSIMD, \
+ AARCH64_FL_SVE2_AES | AARCH64_FL_CRYPTO, false, "aes")
/* Enabling "sha2" also enables "simd".
Disabling "sha2" just disables "sha2". */
-AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_SIMD, 0, false, \
- "sha1 sha2")
+AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
+ AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
+ AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
/* Enabling "sha3" enables "simd" and "sha2".
Disabling "sha3" disables "sha3" and "sve2-sha3". */
-AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_SIMD | \
+AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
AARCH64_FL_SHA2, AARCH64_FL_SVE2_SHA3, false, \
"sha3 sha512")
/* Enabling "sm4" also enables "simd".
Disabling "sm4" disables "sm4" and "sve2-sm4". */
-AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_SIMD, \
+AARCH64_OPT_EXTENSION("sm4", AARCH64_FL_SM4, AARCH64_FL_FPSIMD, \
AARCH64_FL_SVE2_SM4, false, "sm3 sm4")
/* Enabling "fp16fml" also enables "fp" and "fp16".
@@ -192,6 +193,7 @@ AARCH64_OPT_EXTENSION("sve2-aes", AARCH64_FL_SVE2_AES, AARCH64_FL_AES | \
/* Enabling "sve2-sha3" also enables "sha3", "simd", "fp16", "fp", "sve", and
"sve2". Disabling "sve2-sha3" just disables "sve2-sha3". */
AARCH64_OPT_EXTENSION("sve2-sha3", AARCH64_FL_SVE2_SHA3, AARCH64_FL_SHA3 | \
+ AARCH64_FL_SHA2 | \
AARCH64_FL_SIMD | AARCH64_FL_F16 | AARCH64_FL_FP | \
AARCH64_FL_SVE | AARCH64_FL_SVE2, 0, false, "svesha3")
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_6.c b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
index 90a055928..2a1d7fe5b 100644
--- a/gcc/testsuite/gcc.target/aarch64/options_set_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_6.c
@@ -6,7 +6,6 @@ int main ()
return 0;
}
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+aes} 1 } } */
-/* Group as a whole was requested to be turned on, crypto itself is a bit and so
- just turning off one feature can't turn it off. */
+/* +crypto turns on +aes and +sha2, but +nosha2 disables +crypto. */
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
new file mode 100644
index 000000000..0e6461fa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -0,0 +1,47 @@
+#pragma GCC target "+nothing+dotprod"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+aes"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sha2"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sha3"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sm4"
+#ifndef __ARM_FEATURE_FMA
+#error Foo
+#endif
+
+#pragma GCC target "+crypto+noaes"
+#ifdef __ARM_FEATURE_CRYPTO
+#error Foo
+#endif
+
+#pragma GCC target "+crypto+nosha2"
+#ifdef __ARM_FEATURE_CRYPTO
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sve2-sha3"
+#ifndef __ARM_FEATURE_SHA2
+#error Foo
+#endif
+
+#pragma GCC target "+sve2-sha3+nosha2"
+#ifdef __ARM_FEATURE_SHA3
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_SVE2_SHA3
+#error Foo
+#endif
--
2.33.0

View File

@ -0,0 +1,194 @@
From 4a2d0bdf5c9a5f4ee615c1d0768cb2e8a3dfef4a Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:54 +0100
Subject: [PATCH 018/157] [Backport][SME] aarch64: Reorder an entry in
aarch64-option-extensions.def
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c067c474f85b1e9c56fb34dd51ef0eec9221b766
aarch64-option-extensions.def was topologically sorted except
for one case: crypto came before its aes and sha2 dependencies.
This patch moves crypto after sha2 instead.
gcc/
* config/aarch64/aarch64-option-extensions.def: Move crypto
after sha2.
gcc/testsuite/
* gcc.target/aarch64/cpunative/native_cpu_0.c: Expect +crypto
to come after +crc.
* gcc.target/aarch64/cpunative/native_cpu_13.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_16.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_17.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_6.c: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_7.c: Likewise.
* gcc.target/aarch64/options_set_2.c: Likewise.
* gcc.target/aarch64/options_set_3.c: Likewise.
* gcc.target/aarch64/options_set_4.c: Likewise.
---
.../aarch64/aarch64-option-extensions.def | 20 +++++++++----------
.../aarch64/cpunative/native_cpu_0.c | 2 +-
.../aarch64/cpunative/native_cpu_13.c | 2 +-
.../aarch64/cpunative/native_cpu_16.c | 2 +-
.../aarch64/cpunative/native_cpu_17.c | 2 +-
.../aarch64/cpunative/native_cpu_6.c | 2 +-
.../aarch64/cpunative/native_cpu_7.c | 2 +-
.../gcc.target/aarch64/options_set_2.c | 2 +-
.../gcc.target/aarch64/options_set_3.c | 2 +-
.../gcc.target/aarch64/options_set_4.c | 4 ++--
10 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index b98008127..df2c8d19b 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -76,16 +76,6 @@ AARCH64_OPT_EXTENSION("simd", AARCH64_FL_SIMD, AARCH64_FL_FP, \
AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM, \
false, "asimd")
-/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
- Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
- "sve2-aes", "sve2-sha3", "sve2-sm4". */
-AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
- AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
- AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
- AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
- AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
- "aes pmull sha1 sha2")
-
/* Enabling or disabling "crc" only changes "crc". */
AARCH64_OPT_EXTENSION("crc", AARCH64_FL_CRC, 0, 0, false, "crc32")
@@ -127,6 +117,16 @@ AARCH64_OPT_EXTENSION("sha2", AARCH64_FL_SHA2, AARCH64_FL_FPSIMD, \
AARCH64_FL_CRYPTO | AARCH64_FL_SHA3 | \
AARCH64_FL_SVE2_SHA3, false, "sha1 sha2")
+/* Enabling "crypto" also enables "fp", "simd", "aes" and "sha2".
+ Disabling "crypto" disables "crypto", "aes", "sha2", "sha3" and "sm3/sm4",
+ "sve2-aes", "sve2-sha3", "sve2-sm4". */
+AARCH64_OPT_EXTENSION("crypto", AARCH64_FL_CRYPTO, AARCH64_FL_FP | \
+ AARCH64_FL_SIMD | AARCH64_FL_AES | AARCH64_FL_SHA2, \
+ AARCH64_FL_AES | AARCH64_FL_SHA2 | AARCH64_FL_SHA3 | \
+ AARCH64_FL_SM4 | AARCH64_FL_SVE2_AES | \
+ AARCH64_FL_SVE2_SHA3 | AARCH64_FL_SVE2_SM4, true, \
+ "aes pmull sha1 sha2")
+
/* Enabling "sha3" enables "simd" and "sha2".
Disabling "sha3" disables "sha3" and "sve2-sha3". */
AARCH64_OPT_EXTENSION("sha3", AARCH64_FL_SHA3, AARCH64_FL_FPSIMD | \
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
index f155f51ba..8499f87c3 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_0.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
/* Test a normal looking procinfo. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
index b7b3a8e13..551669091 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_13.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto} } } */
/* Test one with mixed order of feature bits. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
index a424e7c56..2f963bb23 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_16.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
/* Test a normal looking procinfo. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
index c269c5fef..c68a697aa 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_17.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+crc\+dotprod\+sve2} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+crc\+dotprod\+crypto\+sve2} } } */
/* Test a normal looking procinfo. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
index da72052e6..7608e8845 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_6.c
@@ -7,7 +7,7 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
/* Test one where the feature bits for crypto and fp16 are given in
same order as declared in options file. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
index 96ad4c14d..72b14b4f6 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_7.c
@@ -7,7 +7,7 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+crypto\+fp16} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+fp16\+crypto} } } */
/* Test one where the crypto and fp16 options are specified in different
order from what is in the options file. */
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_2.c b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
index 3476febce..f82cb5f78 100644
--- a/gcc/testsuite/gcc.target/aarch64/options_set_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_2.c
@@ -6,6 +6,6 @@ int main ()
return 0;
}
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
/* Check to see if crc and crypto are maintained if crypto specified. */
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_3.c b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
index 4558339f1..7d350cfa3 100644
--- a/gcc/testsuite/gcc.target/aarch64/options_set_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_3.c
@@ -6,6 +6,6 @@ int main ()
return 0;
}
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
/* Check if smallest set is maintained when outputting. */
diff --git a/gcc/testsuite/gcc.target/aarch64/options_set_4.c b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
index 15514bfe9..5370e02e1 100644
--- a/gcc/testsuite/gcc.target/aarch64/options_set_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/options_set_4.c
@@ -6,7 +6,7 @@ int main ()
return 0;
}
-/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crypto\+crc} 1 } } */
+/* { dg-final { scan-assembler-times {\.arch armv8\.2\-a\+crc\+crypto} 1 } } */
/* Check if individual bits that make up a grouping is specified that only the
- grouping is kept. */
\ No newline at end of file
+ grouping is kept. */
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,467 @@
From e7ebc54e809e8647ff054a02fbaf946b41414004 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:55 +0100
Subject: [PATCH 020/157] [Backport][SME] aarch64: Simplify generation of .arch
strings
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4ebf56f283ae5a98ae4c43079b7e8459945ef18d
aarch64-common.cc has two arrays, one maintaining the original
definition order and one sorted by population count. Sorting
by population count was a way of ensuring topological ordering,
taking advantage of the fact that the entries are partially
ordered by the subset relation. However, the sorting is not
needed now that the .def file is forced to have topological
order from the outset.
Other changes are:
(1) The population count used:
uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
where I think the & was supposed to be |. This meant that the
counts would always be 1 in practice, since flag_canonical is
a single bit. This led us to printing +nofp+nosimd even though
GCC "knows" (and GAS agrees) that +nofp disables simd.
(2) The .arch output code converts +aes+sha2 to +crypto. I think
the main reason for doing this is to support assemblers that
predate the individual per-feature crypto flags. It therefore
seems more natural to treat it as a special case, rather than
as an instance of a general pattern. Hopefully we won't do
something similar in future!
(There is already special handling of CRC, for different reasons.)
(3) Previously, if the /proc/cpuinfo code saw a feature like sve,
it would assume the presence of all the features that sve
depends on. It would be possible to keep that behaviour
if necessary, but it was simpler to assume the presence of
fp16 (say) only when fphp is present. There's an argument
that that's more conservatively correct too.
gcc/
* common/config/aarch64/aarch64-common.cc
(TARGET_OPTION_INIT_STRUCT): Delete.
(aarch64_option_extension): Remove is_synthetic_flag.
(all_extensions): Update accordingly.
(all_extensions_by_on, opt_ext, opt_ext_cmp): Delete.
(aarch64_option_init_struct, aarch64_contains_opt): Delete.
(aarch64_get_extension_string_for_isa_flags): Rewrite to use
all_extensions instead of all_extensions_on.
gcc/testsuite/
* gcc.target/aarch64/cpunative/info_8: Add all dependencies of sve.
* gcc.target/aarch64/cpunative/info_9: Likewise svesm4.
* gcc.target/aarch64/cpunative/info_15: Likewise.
* gcc.target/aarch64/cpunative/info_16: Likewise sve2.
* gcc.target/aarch64/cpunative/info_17: Likewise.
* gcc.target/aarch64/cpunative/native_cpu_2.c: Expect just +nofp
rather than +nofp+nosimd.
* gcc.target/aarch64/cpunative/native_cpu_10.c: Likewise.
* gcc.target/aarch64/target_attr_15.c: Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 244 ++++--------------
.../gcc.target/aarch64/cpunative/info_15 | 2 +-
.../gcc.target/aarch64/cpunative/info_16 | 2 +-
.../gcc.target/aarch64/cpunative/info_17 | 2 +-
.../gcc.target/aarch64/cpunative/info_8 | 2 +-
.../gcc.target/aarch64/cpunative/info_9 | 2 +-
.../aarch64/cpunative/native_cpu_10.c | 2 +-
.../aarch64/cpunative/native_cpu_2.c | 2 +-
.../gcc.target/aarch64/target_attr_15.c | 2 +-
9 files changed, 55 insertions(+), 205 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 74729bb30..057dc094d 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -42,8 +42,6 @@
#undef TARGET_OPTION_OPTIMIZATION_TABLE
#define TARGET_OPTION_OPTIMIZATION_TABLE aarch_option_optimization_table
-#undef TARGET_OPTION_INIT_STRUCT
-#define TARGET_OPTION_INIT_STRUCT aarch64_option_init_struct
#define INVALID_IMP ((unsigned) -1)
@@ -209,7 +207,6 @@ struct aarch64_option_extension
const uint64_t flag_canonical;
const uint64_t flags_on;
const uint64_t flags_off;
- const bool is_synthetic;
};
/* ISA extensions in AArch64. */
@@ -219,24 +216,9 @@ static const struct aarch64_option_extension all_extensions[] =
{NAME, AARCH64_FL_##IDENT, \
feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
- & ~AARCH64_FL_##IDENT, \
- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
+ & ~AARCH64_FL_##IDENT},
#include "config/aarch64/aarch64-option-extensions.def"
- {NULL, 0, 0, 0, false}
-};
-
-/* A copy of the ISA extensions list for AArch64 sorted by the popcount of
- bits and extension turned on. Cached for efficiency. */
-static struct aarch64_option_extension all_extensions_by_on[] =
-{
-#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
- {NAME, AARCH64_FL_##IDENT, \
- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
- & ~AARCH64_FL_##IDENT, \
- AARCH64_FL_##IDENT == AARCH64_FL_CRYPTO},
-#include "config/aarch64/aarch64-option-extensions.def"
- {NULL, 0, 0, 0, false}
+ {NULL, 0, 0, 0}
};
struct processor_name_to_arch
@@ -353,79 +335,6 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
candidates->safe_push (opt->name);
}
-/* Comparer to sort aarch64's feature extensions by population count. Largest
- first. */
-
-typedef const struct aarch64_option_extension opt_ext;
-
-int opt_ext_cmp (const void* a, const void* b)
-{
- opt_ext *opt_a = (opt_ext *)a;
- opt_ext *opt_b = (opt_ext *)b;
-
- /* We consider the total set of bits an options turns on to be the union of
- the singleton set containing the option itself and the set of options it
- turns on as a dependency. As an example +dotprod turns on FL_DOTPROD and
- FL_SIMD. As such the set of bits represented by this option is
- {FL_DOTPROD, FL_SIMD}. */
- uint64_t total_flags_a = opt_a->flag_canonical & opt_a->flags_on;
- uint64_t total_flags_b = opt_b->flag_canonical & opt_b->flags_on;
- int popcnt_a = popcount_hwi ((HOST_WIDE_INT)total_flags_a);
- int popcnt_b = popcount_hwi ((HOST_WIDE_INT)total_flags_b);
- int order = popcnt_b - popcnt_a;
-
- /* If they have the same amount of bits set, give it a more
- deterministic ordering by using the value of the bits themselves. */
- if (order != 0)
- return order;
-
- if (total_flags_a != total_flags_b)
- return total_flags_a < total_flags_b ? 1 : -1;
-
- return 0;
-}
-
-/* Implement TARGET_OPTION_INIT_STRUCT. */
-
-static void
-aarch64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
-{
- /* Sort the extensions based on how many bits they set, order the larger
- counts first. We sort the list because this makes processing the
- feature bits O(n) instead of O(n^2). While n is small, the function
- to calculate the feature strings is called on every options push,
- pop and attribute change (arm_neon headers, lto etc all cause this to
- happen quite frequently). It is a trade-off between time and space and
- so time won. */
- int n_extensions
- = sizeof (all_extensions) / sizeof (struct aarch64_option_extension);
- qsort (&all_extensions_by_on, n_extensions,
- sizeof (struct aarch64_option_extension), opt_ext_cmp);
-}
-
-/* Checks to see if enough bits from the option OPT are enabled in
- ISA_FLAG_BITS to be able to replace the individual options with the
- canonicalized version of the option. This is done based on two rules:
-
- 1) Synthetic groups, such as +crypto we only care about the bits that are
- turned on. e.g. +aes+sha2 can be replaced with +crypto.
-
- 2) Options that themselves have a bit, such as +rdma, in this case, all the
- feature bits they turn on must be available and the bit for the option
- itself must be. In this case it's effectively a reduction rather than a
- grouping. e.g. +fp+simd is not enough to turn on +rdma, for that you would
- need +rdma+fp+simd which is reduced down to +rdma.
-*/
-
-static bool
-aarch64_contains_opt (uint64_t isa_flag_bits, opt_ext *opt)
-{
- uint64_t flags_check
- = opt->is_synthetic ? opt->flags_on : opt->flag_canonical;
-
- return (isa_flag_bits & flags_check) == flags_check;
-}
-
/* Return a string representation of ISA_FLAGS. DEFAULT_ARCH_FLAGS
gives the default set of flags which are implied by whatever -march
we'd put out. Our job is to figure out the minimal set of "+" and
@@ -436,118 +345,59 @@ std::string
aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
uint64_t default_arch_flags)
{
- const struct aarch64_option_extension *opt = NULL;
std::string outstr = "";
- uint64_t isa_flag_bits = isa_flags;
-
- /* Pass one: Minimize the search space by reducing the set of options
- to the smallest set that still turns on the same features as before in
- conjunction with the bits that are turned on by default for the selected
- architecture. */
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
+ aarch64_feature_flags current_flags = default_arch_flags;
+
+ /* As a special case, do not assume that the assembler will enable CRC
+ even if it is the default for the architecture. This is required
+ because some CPUs had an incorrect specification in older assemblers:
+ even though CRC should be the default for these cases the -mcpu
+ values would not turn it on.
+
+ However, assemblers with Armv8-R AArch64 support should not have this
+ issue, so we don't need this fix when targeting Armv8-R. */
+ auto explicit_flags = (!(current_flags & AARCH64_FL_V8R)
+ ? AARCH64_FL_CRC : 0);
+
+ /* Add the features in isa_flags & ~current_flags using the smallest
+ possible number of extensions. We can do this by iterating over the
+ array in reverse order, since the array is sorted topologically.
+ But in order to make the output more readable, it seems better
+ to add the strings in definition order. */
+ aarch64_feature_flags added = 0;
+ for (unsigned int i = ARRAY_SIZE (all_extensions); i-- > 0; )
{
- /* If the bit is on by default, then all the options it turns on are also
- on by default due to the transitive dependencies.
-
- If the option is enabled explicitly in the set then we need to emit
- an option for it. Since this list is sorted by extensions setting the
- largest number of featers first, we can be sure that nothing else will
- ever need to set the bits we already set. Consider the following
- situation:
-
- Feat1 = A + B + C
- Feat2 = A + B
- Feat3 = A + D
- Feat4 = B + C
- Feat5 = C
-
- The following results are expected:
-
- A + C = A + Feat5
- B + C = Feat4
- Feat4 + A = Feat1
- Feat2 + Feat5 = Feat1
- Feat1 + C = Feat1
- Feat3 + Feat4 = Feat1 + D
-
- This search assumes that all invidual feature bits are use visible,
- in other words the user must be able to do +A, +B, +C and +D. */
- if (aarch64_contains_opt (isa_flag_bits | default_arch_flags, opt))
- {
- /* We remove all the dependent bits, to prevent them from being turned
- on twice. This only works because we assume that all there are
- individual options to set all bits standalone. */
-
- /* PR target/94396.
-
- For flags which would already imply a bit that's on by default (e.g
- fp16fml which implies +fp,+fp16) we must emit the flags that are not
- on by default. i.e. in Armv8.4-a +fp16fml is default if +fp16. So
- if a user passes armv8.4-a+fp16 (or +fp16fml) then we need to emit
- +fp16. But if +fp16fml is used in an architecture where it is
- completely optional we only have to emit the canonical flag. */
- uint64_t toggle_bits = opt->flags_on & default_arch_flags;
- /* Now check to see if the canonical flag is on by default. If it
- is not then enabling it will enable all bits in flags_on. */
- if ((opt->flag_canonical & default_arch_flags) == 0)
- toggle_bits = opt->flags_on;
-
- isa_flag_bits &= ~toggle_bits;
- isa_flag_bits |= opt->flag_canonical;
- }
- }
+ auto &opt = all_extensions[i];
- /* By toggling bits on and off, we may have set bits on that are already
- enabled by default. So we mask the default set out so we don't emit an
- option for them. Instead of checking for this each time during Pass One
- we just mask all default bits away at the end. */
- isa_flag_bits &= ~default_arch_flags;
-
- /* We now have the smallest set of features we need to process. A subsequent
- linear scan of the bits in isa_flag_bits will allow us to print the ext
- names. However as a special case if CRC was enabled before, always print
- it. This is required because some CPUs have an incorrect specification
- in older assemblers. Even though CRC should be the default for these
- cases the -mcpu values won't turn it on.
-
- Note that assemblers with Armv8-R AArch64 support should not have this
- issue, so we don't need this fix when targeting Armv8-R. */
- if ((isa_flags & AARCH64_ISA_CRC) && !AARCH64_ISA_V8R)
- isa_flag_bits |= AARCH64_ISA_CRC;
-
- /* Pass Two:
- Print the option names that we're sure we must turn on. These are only
- optional extension names. Mandatory ones have already been removed and
- ones we explicitly want off have been too. */
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
- {
- if (isa_flag_bits & opt->flag_canonical)
- {
- outstr += "+";
- outstr += opt->name;
- }
- }
+ /* As a special case, emit +crypto rather than +aes+sha2,
+ in order to support assemblers that predate the separate
+ per-feature crypto flags. */
+ auto flags = opt.flag_canonical;
+ if (flags == AARCH64_FL_CRYPTO)
+ flags = AARCH64_FL_AES | AARCH64_FL_SHA2;
- /* Pass Three:
- Print out a +no for any mandatory extension that we are
- turning off. By this point aarch64_parse_extension would have ensured
- that any optional extensions are turned off. The only things left are
- things that can't be turned off usually, e.g. something that is on by
- default because it's mandatory and we want it off. For turning off bits
- we don't guarantee the smallest set of flags, but instead just emit all
- options the user has specified.
-
- The assembler requires all +<opts> to be printed before +no<opts>. */
- for (opt = all_extensions_by_on; opt->name != NULL; opt++)
- {
- if ((~isa_flags) & opt->flag_canonical
- && !((~default_arch_flags) & opt->flag_canonical))
+ if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
{
- outstr += "+no";
- outstr += opt->name;
+ current_flags |= opt.flag_canonical | opt.flags_on;
+ added |= opt.flag_canonical;
}
}
+ for (auto &opt : all_extensions)
+ if (added & opt.flag_canonical)
+ {
+ outstr += "+";
+ outstr += opt.name;
+ }
+
+ /* Remove the features in current_flags & ~isa_flags. */
+ for (auto &opt : all_extensions)
+ if (opt.flag_canonical & current_flags & ~isa_flags)
+ {
+ current_flags &= ~(opt.flag_canonical | opt.flags_off);
+ outstr += "+no";
+ outstr += opt.name;
+ }
return outstr;
}
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
index bc6453945..6b425ea20 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_15
@@ -1,6 +1,6 @@
processor : 0
BogoMIPS : 100.00
-Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp
+Features : Lorem ipsum dolor sit ametd rebum expetendis per at Dolor lucilius referrentur ei mei virtute eruditi eum ne Iisque verter svesm4 asimd fp sve sve2 fphp asimdhp sm3 sm4
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
index 2c04ff19c..26f01c496 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_16
@@ -1,6 +1,6 @@
processor : 0
BogoMIPS : 100.00
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
CPU implementer : 0xfe
CPU architecture: 8
CPU variant : 0x0
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
index 2c04ff19c..26f01c496 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_17
@@ -1,6 +1,6 @@
processor : 0
BogoMIPS : 100.00
-Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2
+Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 asimddp sve sve2 fphp asimdhp
CPU implementer : 0xfe
CPU architecture: 8
CPU variant : 0x0
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
index d6d9d03a2..76da16c57 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_8
@@ -1,6 +1,6 @@
processor : 0
BogoMIPS : 100.00
-Features : asimd sve fp
+Features : asimd sve fp fphp asimdhp
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9 b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
index c9aa4a9a0..14703dd1d 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/info_9
@@ -1,6 +1,6 @@
processor : 0
BogoMIPS : 100.00
-Features : asimd fp svesm4
+Features : asimd fp svesm4 sve sve2 fphp asimdhp sm3 sm4
CPU implementer : 0x41
CPU architecture: 8
CPU variant : 0x0
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
index 6a753965c..ddb06b822 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_10.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
/* Test one with no entry in feature list. */
diff --git a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
index aad71f434..edbdb5626 100644
--- a/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/cpunative/native_cpu_2.c
@@ -7,6 +7,6 @@ int main()
return 0;
}
-/* { dg-final { scan-assembler {\.arch armv8-a\+nofp\+nosimd} } } */
+/* { dg-final { scan-assembler {\.arch armv8-a\+nofp} } } */
/* Test one where asimd is provided byt no fp. */
diff --git a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
index 108b372e4..069a00108 100644
--- a/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
+++ b/gcc/testsuite/gcc.target/aarch64/target_attr_15.c
@@ -10,4 +10,4 @@ foo (int a)
return a + 1;
}
-/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\\+nosimd\n" 1 } } */
+/* { dg-final { scan-assembler-times "\\.arch armv8-a\\+nofp\n" 1 } } */
--
2.33.0

View File

@ -0,0 +1,43 @@
From 7096be1673a10da5218a8620fb40b4b26e61c1d4 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:55 +0100
Subject: [PATCH 021/157] [Backport][SME] aarch64: Avoid std::string in static
data
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=13af9e9fda391f4f0566ad8f0b4d0448a7e984d0
Just a minor patch to avoid having to construct std::strings
in static data.
gcc/
* common/config/aarch64/aarch64-common.cc (processor_name_to_arch)
(arch_to_arch_name): Use const char * instead of std::string.
---
gcc/common/config/aarch64/aarch64-common.cc | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 057dc094d..2bdf51b8b 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -223,7 +223,7 @@ static const struct aarch64_option_extension all_extensions[] =
struct processor_name_to_arch
{
- const std::string processor_name;
+ const char *const processor_name;
const enum aarch64_arch arch;
const uint64_t flags;
};
@@ -231,7 +231,7 @@ struct processor_name_to_arch
struct arch_to_arch_name
{
const enum aarch64_arch arch;
- const std::string arch_name;
+ const char *const arch_name;
const uint64_t flags;
};
--
2.33.0

View File

@ -0,0 +1,195 @@
From 99c5eb58e898417632b6d9a7b2b3d288b50e9b65 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:55 +0100
Subject: [PATCH 022/157] [Backport][SME] aarch64: Tweak constness of
option-related data
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=60dee638c8a7ae59c033868de7e7638c88b38ed2
Some of the option structures have all-const member variables.
That doesn't seem necessary: we can just use const on the objects
that are supposed to be read-only.
Also, with the new, more C++-heavy option handling, it seems
better to use constexpr for the static data, to make sure that
we're not adding unexpected overhead.
gcc/
* common/config/aarch64/aarch64-common.cc (aarch64_option_extension)
(processor_name_to_arch, arch_to_arch_name): Remove const from
member variables.
(all_extensions, all_cores, all_architectures): Make a constexpr.
* config/aarch64/aarch64.cc (processor): Remove const from
member variables.
(all_architectures): Make a constexpr.
* config/aarch64/driver-aarch64.cc (aarch64_core_data)
(aarch64_arch_driver_info): Remove const from member variables.
(aarch64_cpu_data, aarch64_arches): Make a constexpr.
(get_arch_from_id): Return a pointer to const.
(host_detect_local_cpu): Update accordingly.
---
gcc/common/config/aarch64/aarch64-common.cc | 26 ++++++++++-----------
gcc/config/aarch64/aarch64.cc | 14 +++++------
gcc/config/aarch64/driver-aarch64.cc | 15 ++++++------
3 files changed, 27 insertions(+), 28 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 2bdf51b8b..ac3486d71 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -203,14 +203,14 @@ aarch64_handle_option (struct gcc_options *opts,
/* An ISA extension in the co-processor and main instruction set space. */
struct aarch64_option_extension
{
- const char *const name;
- const uint64_t flag_canonical;
- const uint64_t flags_on;
- const uint64_t flags_off;
+ const char *name;
+ uint64_t flag_canonical;
+ uint64_t flags_on;
+ uint64_t flags_off;
};
/* ISA extensions in AArch64. */
-static const struct aarch64_option_extension all_extensions[] =
+static constexpr aarch64_option_extension all_extensions[] =
{
#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
{NAME, AARCH64_FL_##IDENT, \
@@ -223,21 +223,21 @@ static const struct aarch64_option_extension all_extensions[] =
struct processor_name_to_arch
{
- const char *const processor_name;
- const enum aarch64_arch arch;
- const uint64_t flags;
+ const char *processor_name;
+ aarch64_arch arch;
+ uint64_t flags;
};
struct arch_to_arch_name
{
- const enum aarch64_arch arch;
- const char *const arch_name;
- const uint64_t flags;
+ aarch64_arch arch;
+ const char *arch_name;
+ uint64_t flags;
};
/* Map processor names to the architecture revision they implement and
the default set of architectural feature flags they support. */
-static const struct processor_name_to_arch all_cores[] =
+static constexpr processor_name_to_arch all_cores[] =
{
#define AARCH64_CORE(NAME, CORE_IDENT, C, ARCH_IDENT, E, F, G, H, I) \
{NAME, AARCH64_ARCH_##ARCH_IDENT, feature_deps::cpu_##CORE_IDENT},
@@ -247,7 +247,7 @@ static const struct processor_name_to_arch all_cores[] =
};
/* Map architecture revisions to their string representation. */
-static const struct arch_to_arch_name all_architectures[] =
+static constexpr arch_to_arch_name all_architectures[] =
{
#define AARCH64_ARCH(NAME, B, ARCH_IDENT, D, E) \
{AARCH64_ARCH_##ARCH_IDENT, NAME, feature_deps::ARCH_IDENT ().enable},
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 1363873b1..71db7ace1 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2925,16 +2925,16 @@ aarch64_tuning_override_functions[] =
/* A processor implementing AArch64. */
struct processor
{
- const char *const name;
- enum aarch64_processor ident;
- enum aarch64_processor sched_core;
- enum aarch64_arch arch;
- const uint64_t flags;
- const struct tune_params *const tune;
+ const char *name;
+ aarch64_processor ident;
+ aarch64_processor sched_core;
+ aarch64_arch arch;
+ uint64_t flags;
+ const tune_params *tune;
};
/* Architectures implementing AArch64. */
-static const struct processor all_architectures[] =
+static constexpr processor all_architectures[] =
{
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \
{NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index ddfc9451f..ee9cb65a5 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -50,7 +50,7 @@ struct aarch64_core_data
unsigned char implementer_id; /* Exactly 8 bits */
unsigned int part_no; /* 12 bits + 12 bits */
unsigned variant;
- const uint64_t flags;
+ uint64_t flags;
};
#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
@@ -64,7 +64,7 @@ struct aarch64_core_data
#define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
{ CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
-static struct aarch64_core_data aarch64_cpu_data[] =
+static constexpr aarch64_core_data aarch64_cpu_data[] =
{
#include "aarch64-cores.def"
{ NULL, NULL, INVALID_IMP, INVALID_CORE, ALL_VARIANTS, 0 }
@@ -75,14 +75,14 @@ struct aarch64_arch_driver_info
{
const char* id;
const char* name;
- const uint64_t flags;
+ uint64_t flags;
};
/* Skip the leading "V" in the architecture name. */
#define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, ARCH_REV, FLAGS) \
{ #ARCH_IDENT + 1, NAME, feature_deps::ARCH_IDENT ().enable },
-static struct aarch64_arch_driver_info aarch64_arches[] =
+static constexpr aarch64_arch_driver_info aarch64_arches[] =
{
#include "aarch64-arches.def"
{NULL, NULL, 0}
@@ -92,7 +92,7 @@ static struct aarch64_arch_driver_info aarch64_arches[] =
/* Return an aarch64_arch_driver_info for the architecture described
by ID, or NULL if ID describes something we don't know about. */
-static struct aarch64_arch_driver_info*
+static const aarch64_arch_driver_info *
get_arch_from_id (const char* id)
{
unsigned int i = 0;
@@ -396,8 +396,7 @@ host_detect_local_cpu (int argc, const char **argv)
if (aarch64_cpu_data[i].name == NULL)
{
- aarch64_arch_driver_info* arch_info
- = get_arch_from_id (DEFAULT_ARCH);
+ auto arch_info = get_arch_from_id (DEFAULT_ARCH);
gcc_assert (arch_info);
@@ -407,7 +406,7 @@ host_detect_local_cpu (int argc, const char **argv)
else if (arch)
{
const char *arch_id = aarch64_cpu_data[i].arch;
- aarch64_arch_driver_info* arch_info = get_arch_from_id (arch_id);
+ auto arch_info = get_arch_from_id (arch_id);
/* We got some arch indentifier that's not in aarch64-arches.def? */
if (!arch_info)
--
2.33.0

View File

@ -0,0 +1,394 @@
From bdb91009cf250fb22c21ae7f5072263492f2b08c Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:56 +0100
Subject: [PATCH 023/157] [Backport][SME] aarch64: Make more use of
aarch64_feature_flags
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fed55a60e5b230bc159617f26e33611073c672fd
A previous patch added a aarch64_feature_flags typedef, to abstract
the representation of the feature flags. This patch makes existing
code use the typedef too. Hope I've caught them all!
gcc/
* common/config/aarch64/aarch64-common.cc: Use aarch64_feature_flags
for feature flags throughout.
* config/aarch64/aarch64-protos.h: Likewise.
* config/aarch64/aarch64-sve-builtins.h: Likewise.
* config/aarch64/aarch64-sve-builtins.cc: Likewise.
* config/aarch64/aarch64.cc: Likewise.
* config/aarch64/aarch64.opt: Likewise.
* config/aarch64/driver-aarch64.cc: Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 19 +++++++-------
gcc/config/aarch64/aarch64-protos.h | 5 ++--
gcc/config/aarch64/aarch64-sve-builtins.cc | 29 ++++++++++++---------
gcc/config/aarch64/aarch64-sve-builtins.h | 9 ++++---
gcc/config/aarch64/aarch64.cc | 29 +++++++++++----------
gcc/config/aarch64/aarch64.opt | 2 +-
gcc/config/aarch64/driver-aarch64.cc | 10 +++----
7 files changed, 56 insertions(+), 47 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index ac3486d71..3efa57b26 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -204,9 +204,9 @@ aarch64_handle_option (struct gcc_options *opts,
struct aarch64_option_extension
{
const char *name;
- uint64_t flag_canonical;
- uint64_t flags_on;
- uint64_t flags_off;
+ aarch64_feature_flags flag_canonical;
+ aarch64_feature_flags flags_on;
+ aarch64_feature_flags flags_off;
};
/* ISA extensions in AArch64. */
@@ -225,14 +225,14 @@ struct processor_name_to_arch
{
const char *processor_name;
aarch64_arch arch;
- uint64_t flags;
+ aarch64_feature_flags flags;
};
struct arch_to_arch_name
{
aarch64_arch arch;
const char *arch_name;
- uint64_t flags;
+ aarch64_feature_flags flags;
};
/* Map processor names to the architecture revision they implement and
@@ -262,7 +262,7 @@ static constexpr arch_to_arch_name all_architectures[] =
a copy of the string is created and stored to INVALID_EXTENSION. */
enum aarch64_parse_opt_result
-aarch64_parse_extension (const char *str, uint64_t *isa_flags,
+aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
std::string *invalid_extension)
{
/* The extension string is parsed left to right. */
@@ -342,8 +342,9 @@ aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates)
that all the "+" flags come before the "+no" flags. */
std::string
-aarch64_get_extension_string_for_isa_flags (uint64_t isa_flags,
- uint64_t default_arch_flags)
+aarch64_get_extension_string_for_isa_flags
+ (aarch64_feature_flags isa_flags,
+ aarch64_feature_flags default_arch_flags)
{
std::string outstr = "";
@@ -451,7 +452,7 @@ aarch64_rewrite_selected_cpu (const char *name)
|| a_to_an->arch == aarch64_no_arch)
fatal_error (input_location, "unknown value %qs for %<-mcpu%>", name);
- uint64_t extensions = p_to_a->flags;
+ aarch64_feature_flags extensions = p_to_a->flags;
aarch64_parse_extension (extension_str.c_str (), &extensions, NULL);
std::string outstr = a_to_an->arch_name
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e60ce3c36..ef84df731 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1037,10 +1037,11 @@ bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
const struct cl_decoded_option *, location_t);
const char *aarch64_rewrite_selected_cpu (const char *name);
enum aarch64_parse_opt_result aarch64_parse_extension (const char *,
- uint64_t *,
+ aarch64_feature_flags *,
std::string *);
void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates);
-std::string aarch64_get_extension_string_for_isa_flags (uint64_t, uint64_t);
+std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
+ aarch64_feature_flags);
rtl_opt_pass *make_pass_fma_steering (gcc::context *);
rtl_opt_pass *make_pass_track_speculation (gcc::context *);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index c06e99339..b927a886e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -82,7 +82,7 @@ public:
/* The architecture extensions that the function requires, as a set of
AARCH64_FL_* flags. */
- uint64_t required_extensions;
+ aarch64_feature_flags required_extensions;
/* True if the decl represents an overloaded function that needs to be
resolved by function_resolver. */
@@ -694,13 +694,16 @@ check_required_registers (location_t location, tree fndecl)
Report an error against LOCATION if not. */
static bool
check_required_extensions (location_t location, tree fndecl,
- uint64_t required_extensions)
+ aarch64_feature_flags required_extensions)
{
- uint64_t missing_extensions = required_extensions & ~aarch64_isa_flags;
+ auto missing_extensions = required_extensions & ~aarch64_isa_flags;
if (missing_extensions == 0)
return check_required_registers (location, fndecl);
- static const struct { uint64_t flag; const char *name; } extensions[] = {
+ static const struct {
+ aarch64_feature_flags flag;
+ const char *name;
+ } extensions[] = {
#define AARCH64_OPT_EXTENSION(EXT_NAME, IDENT, C, D, E, F) \
{ AARCH64_FL_##IDENT, EXT_NAME },
#include "aarch64-option-extensions.def"
@@ -992,7 +995,7 @@ function_builder::get_attributes (const function_instance &instance)
registered_function &
function_builder::add_function (const function_instance &instance,
const char *name, tree fntype, tree attrs,
- uint64_t required_extensions,
+ aarch64_feature_flags required_extensions,
bool overloaded_p,
bool placeholder_p)
{
@@ -1034,11 +1037,12 @@ function_builder::add_function (const function_instance &instance,
one-to-one mapping between "short" and "full" names, and if standard
overload resolution therefore isn't necessary. */
void
-function_builder::add_unique_function (const function_instance &instance,
- tree return_type,
- vec<tree> &argument_types,
- uint64_t required_extensions,
- bool force_direct_overloads)
+function_builder::
+add_unique_function (const function_instance &instance,
+ tree return_type,
+ vec<tree> &argument_types,
+ aarch64_feature_flags required_extensions,
+ bool force_direct_overloads)
{
/* Add the function under its full (unique) name. */
char *name = get_name (instance, false);
@@ -1081,8 +1085,9 @@ function_builder::add_unique_function (const function_instance &instance,
features are available as part of resolving the function to the
relevant unique function. */
void
-function_builder::add_overloaded_function (const function_instance &instance,
- uint64_t required_extensions)
+function_builder::
+add_overloaded_function (const function_instance &instance,
+ aarch64_feature_flags required_extensions)
{
char *name = get_name (instance, true);
if (registered_function **map_value = m_overload_names.get (name))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 24594d584..63d1db776 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -263,7 +263,7 @@ struct function_group_info
/* The architecture extensions that the functions require, as a set of
AARCH64_FL_* flags. */
- uint64_t required_extensions;
+ aarch64_feature_flags required_extensions;
};
/* Describes a single fully-resolved function (i.e. one that has a
@@ -321,8 +321,9 @@ public:
~function_builder ();
void add_unique_function (const function_instance &, tree,
- vec<tree> &, uint64_t, bool);
- void add_overloaded_function (const function_instance &, uint64_t);
+ vec<tree> &, aarch64_feature_flags, bool);
+ void add_overloaded_function (const function_instance &,
+ aarch64_feature_flags);
void add_overloaded_functions (const function_group_info &,
mode_suffix_index);
@@ -338,7 +339,7 @@ private:
registered_function &add_function (const function_instance &,
const char *, tree, tree,
- uint64_t, bool, bool);
+ aarch64_feature_flags, bool, bool);
/* The function type to use for functions that are resolved by
function_resolver. */
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 71db7ace1..8cb820767 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -2929,7 +2929,7 @@ struct processor
aarch64_processor ident;
aarch64_processor sched_core;
aarch64_arch arch;
- uint64_t flags;
+ aarch64_feature_flags flags;
const tune_params *tune;
};
@@ -17428,7 +17428,8 @@ static void initialize_aarch64_code_model (struct gcc_options *);
static enum aarch64_parse_opt_result
aarch64_parse_arch (const char *to_parse, const struct processor **res,
- uint64_t *isa_flags, std::string *invalid_extension)
+ aarch64_feature_flags *isa_flags,
+ std::string *invalid_extension)
{
const char *ext;
const struct processor *arch;
@@ -17451,7 +17452,7 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
if (strlen (arch->name) == len
&& strncmp (arch->name, to_parse, len) == 0)
{
- uint64_t isa_temp = arch->flags;
+ auto isa_temp = arch->flags;
if (ext != NULL)
{
@@ -17483,7 +17484,8 @@ aarch64_parse_arch (const char *to_parse, const struct processor **res,
static enum aarch64_parse_opt_result
aarch64_parse_cpu (const char *to_parse, const struct processor **res,
- uint64_t *isa_flags, std::string *invalid_extension)
+ aarch64_feature_flags *isa_flags,
+ std::string *invalid_extension)
{
const char *ext;
const struct processor *cpu;
@@ -17505,8 +17507,7 @@ aarch64_parse_cpu (const char *to_parse, const struct processor **res,
{
if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
{
- uint64_t isa_temp = cpu->flags;
-
+ auto isa_temp = cpu->flags;
if (ext != NULL)
{
@@ -18137,7 +18138,7 @@ aarch64_print_hint_for_extensions (const std::string &str)
static bool
aarch64_validate_mcpu (const char *str, const struct processor **res,
- uint64_t *isa_flags)
+ aarch64_feature_flags *isa_flags)
{
std::string invalid_extension;
enum aarch64_parse_opt_result parse_res
@@ -18351,7 +18352,7 @@ aarch64_validate_mbranch_protection (const char *const_str)
static bool
aarch64_validate_march (const char *str, const struct processor **res,
- uint64_t *isa_flags)
+ aarch64_feature_flags *isa_flags)
{
std::string invalid_extension;
enum aarch64_parse_opt_result parse_res
@@ -18441,8 +18442,8 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
static void
aarch64_override_options (void)
{
- uint64_t cpu_isa = 0;
- uint64_t arch_isa = 0;
+ aarch64_feature_flags cpu_isa = 0;
+ aarch64_feature_flags arch_isa = 0;
aarch64_isa_flags = 0;
const struct processor *cpu = NULL;
@@ -18890,7 +18891,7 @@ static bool
aarch64_handle_attr_isa_flags (char *str)
{
enum aarch64_parse_opt_result parse_res;
- uint64_t isa_flags = aarch64_isa_flags;
+ auto isa_flags = aarch64_isa_flags;
/* We allow "+nothing" in the beginning to clear out all architectural
features if the user wants to handpick specific features. */
@@ -19162,7 +19163,7 @@ aarch64_process_target_attr (tree args)
{
/* Check if token is possibly an arch extension without
leading '+'. */
- uint64_t isa_temp = 0;
+ aarch64_feature_flags isa_temp = 0;
auto with_plus = std::string ("+") + token;
enum aarch64_parse_opt_result ext_res
= aarch64_parse_extension (with_plus.c_str (), &isa_temp, nullptr);
@@ -22771,7 +22772,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
const struct processor *this_arch
= aarch64_get_arch (targ_options->x_selected_arch);
- uint64_t isa_flags = targ_options->x_aarch64_isa_flags;
+ auto isa_flags = targ_options->x_aarch64_isa_flags;
std::string extension
= aarch64_get_extension_string_for_isa_flags (isa_flags,
this_arch->flags);
@@ -22901,7 +22902,7 @@ aarch64_start_file (void)
const struct processor *default_arch
= aarch64_get_arch (default_options->x_selected_arch);
- uint64_t default_isa_flags = default_options->x_aarch64_isa_flags;
+ auto default_isa_flags = default_options->x_aarch64_isa_flags;
std::string extension
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
default_arch->flags);
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 836a3c784..47ec7824f 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -28,7 +28,7 @@ TargetVariable
enum aarch64_arch selected_arch = aarch64_no_arch
TargetVariable
-uint64_t aarch64_isa_flags = 0
+aarch64_feature_flags aarch64_isa_flags = 0
TargetVariable
unsigned aarch64_enable_bti = 2
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index ee9cb65a5..2ae47c020 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -31,7 +31,7 @@
struct aarch64_arch_extension
{
const char *ext;
- uint64_t flag;
+ aarch64_feature_flags flag;
const char *feat_string;
};
@@ -50,7 +50,7 @@ struct aarch64_core_data
unsigned char implementer_id; /* Exactly 8 bits */
unsigned int part_no; /* 12 bits + 12 bits */
unsigned variant;
- uint64_t flags;
+ aarch64_feature_flags flags;
};
#define AARCH64_BIG_LITTLE(BIG, LITTLE) \
@@ -75,7 +75,7 @@ struct aarch64_arch_driver_info
{
const char* id;
const char* name;
- uint64_t flags;
+ aarch64_feature_flags flags;
};
/* Skip the leading "V" in the architecture name. */
@@ -261,8 +261,8 @@ host_detect_local_cpu (int argc, const char **argv)
unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
unsigned int n_variants = 0;
bool processed_exts = false;
- uint64_t extension_flags = 0;
- uint64_t default_flags = 0;
+ aarch64_feature_flags extension_flags = 0;
+ aarch64_feature_flags default_flags = 0;
std::string buf;
size_t sep_pos = -1;
char *fcpu_info;
--
2.33.0

View File

@ -0,0 +1,70 @@
From eb92c185c1c71edcbd83b1c66fe4f9e7d52a98b3 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:56 +0100
Subject: [PATCH 024/157] [Backport][SME] aarch64: Tweak contents of
flags_on/off fields
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bb7f43b62a58a0f0326fd3060f0bd43e6f3ef971
After previous changes, it's more convenient if the flags_on and
flags_off fields of all_extensions include the feature flag itself.
gcc/
* common/config/aarch64/aarch64-common.cc (all_extensions):
Include the feature flag in flags_on and flags_off.
(aarch64_parse_extension): Update accordingly.
(aarch64_get_extension_string_for_isa_flags): Likewise.
---
gcc/common/config/aarch64/aarch64-common.cc | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 3efa57b26..752ba5632 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -213,10 +213,8 @@ struct aarch64_option_extension
static constexpr aarch64_option_extension all_extensions[] =
{
#define AARCH64_OPT_EXTENSION(NAME, IDENT, C, D, E, F) \
- {NAME, AARCH64_FL_##IDENT, \
- feature_deps::IDENT ().explicit_on & ~AARCH64_FL_##IDENT, \
- feature_deps::get_flags_off (feature_deps::root_off_##IDENT) \
- & ~AARCH64_FL_##IDENT},
+ {NAME, AARCH64_FL_##IDENT, feature_deps::IDENT ().explicit_on, \
+ feature_deps::get_flags_off (feature_deps::root_off_##IDENT)},
#include "config/aarch64/aarch64-option-extensions.def"
{NULL, 0, 0, 0}
};
@@ -304,9 +302,9 @@ aarch64_parse_extension (const char *str, aarch64_feature_flags *isa_flags,
{
/* Add or remove the extension. */
if (adding_ext)
- *isa_flags |= (opt->flags_on | opt->flag_canonical);
+ *isa_flags |= opt->flags_on;
else
- *isa_flags &= ~(opt->flags_off | opt->flag_canonical);
+ *isa_flags &= ~opt->flags_off;
break;
}
}
@@ -380,7 +378,7 @@ aarch64_get_extension_string_for_isa_flags
if ((flags & isa_flags & (explicit_flags | ~current_flags)) == flags)
{
- current_flags |= opt.flag_canonical | opt.flags_on;
+ current_flags |= opt.flags_on;
added |= opt.flag_canonical;
}
}
@@ -395,7 +393,7 @@ aarch64_get_extension_string_for_isa_flags
for (auto &opt : all_extensions)
if (opt.flag_canonical & current_flags & ~isa_flags)
{
- current_flags &= ~(opt.flag_canonical | opt.flags_off);
+ current_flags &= ~opt.flags_off;
outstr += "+no";
outstr += opt.name;
}
--
2.33.0

View File

@ -0,0 +1,370 @@
From 91f7471cbc7dec42673b58a1896330d64eb6be2a Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:57 +0100
Subject: [PATCH 025/157] [Backport][SME] aarch64: Tweak handling of
-mgeneral-regs-only
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2a269bda9e7b8f9353699d0c965e7e9246500aa0
-mgeneral-regs-only is effectively "+nofp for the compiler without
changing the assembler's ISA flags". Currently that's implemented
by making TARGET_FLOAT, TARGET_SIMD and TARGET_SVE depend on
!TARGET_GENERAL_REGS_ONLY and then making any feature that needs FP
registers depend (directly or indirectly) on one of those three TARGET
macros. The problem is that it's easy to forgot to do the last bit.
This patch instead represents the distinction between "assemnbler
ISA flags" and "compiler ISA flags" more directly, funnelling
all updates through a new function that sets both sets of flags
together.
gcc/
* config/aarch64/aarch64.opt (aarch64_asm_isa_flags): New variable.
* config/aarch64/aarch64.h (aarch64_asm_isa_flags)
(aarch64_isa_flags): Redefine as read-only macros.
(TARGET_SIMD, TARGET_FLOAT, TARGET_SVE): Don't depend on
!TARGET_GENERAL_REGS_ONLY.
* common/config/aarch64/aarch64-common.cc
(aarch64_set_asm_isa_flags): New function.
(aarch64_handle_option): Call it when updating -mgeneral-regs.
* config/aarch64/aarch64-protos.h (aarch64_simd_switcher): Replace
m_old_isa_flags with m_old_asm_isa_flags.
(aarch64_set_asm_isa_flags): Declare.
* config/aarch64/aarch64-builtins.cc
(aarch64_simd_switcher::aarch64_simd_switcher)
(aarch64_simd_switcher::~aarch64_simd_switcher): Save and restore
aarch64_asm_isa_flags instead of aarch64_isa_flags.
* config/aarch64/aarch64-sve-builtins.cc
(check_required_extensions): Use aarch64_asm_isa_flags instead
of aarch64_isa_flags.
* config/aarch64/aarch64.cc (aarch64_set_asm_isa_flags): New function.
(aarch64_override_options, aarch64_handle_attr_arch)
(aarch64_handle_attr_cpu, aarch64_handle_attr_isa_flags): Use
aarch64_set_asm_isa_flags to set the ISA flags.
(aarch64_option_print, aarch64_declare_function_name)
(aarch64_start_file): Use aarch64_asm_isa_flags instead
of aarch64_isa_flags.
(aarch64_can_inline_p): Check aarch64_asm_isa_flags as well as
aarch64_isa_flags.
---
gcc/common/config/aarch64/aarch64-common.cc | 12 ++++++
gcc/config/aarch64/aarch64-builtins.cc | 6 +--
gcc/config/aarch64/aarch64-protos.h | 5 ++-
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
gcc/config/aarch64/aarch64.cc | 45 ++++++++++++++-------
gcc/config/aarch64/aarch64.h | 17 ++++++--
gcc/config/aarch64/aarch64.opt | 3 ++
7 files changed, 68 insertions(+), 22 deletions(-)
diff --git a/gcc/common/config/aarch64/aarch64-common.cc b/gcc/common/config/aarch64/aarch64-common.cc
index 752ba5632..c64b4987e 100644
--- a/gcc/common/config/aarch64/aarch64-common.cc
+++ b/gcc/common/config/aarch64/aarch64-common.cc
@@ -137,6 +137,17 @@ reset_tsv110_option ()
}
}
+/* Set OPTS->x_aarch64_asm_isa_flags to FLAGS and update
+ OPTS->x_aarch64_isa_flags accordingly. */
+void
+aarch64_set_asm_isa_flags (gcc_options *opts, aarch64_feature_flags flags)
+{
+ opts->x_aarch64_asm_isa_flags = flags;
+ opts->x_aarch64_isa_flags = flags;
+ if (opts->x_target_flags & MASK_GENERAL_REGS_ONLY)
+ opts->x_aarch64_isa_flags &= ~feature_deps::get_flags_off (AARCH64_FL_FP);
+}
+
/* Implement TARGET_HANDLE_OPTION.
This function handles the target specific options for CPU/target selection.
@@ -174,6 +185,7 @@ aarch64_handle_option (struct gcc_options *opts,
case OPT_mgeneral_regs_only:
opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
+ aarch64_set_asm_isa_flags (opts, opts->x_aarch64_asm_isa_flags);
return true;
case OPT_mfix_cortex_a53_835769:
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index 42276e7ca..015e9d975 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1336,20 +1336,20 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
set. */
aarch64_simd_switcher::aarch64_simd_switcher (unsigned int extra_flags)
- : m_old_isa_flags (aarch64_isa_flags),
+ : m_old_asm_isa_flags (aarch64_asm_isa_flags),
m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
{
/* Changing the ISA flags should be enough here. We shouldn't need to
pay the compile-time cost of a full target switch. */
- aarch64_isa_flags = AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags;
global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+ aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
}
aarch64_simd_switcher::~aarch64_simd_switcher ()
{
if (m_old_general_regs_only)
global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
- aarch64_isa_flags = m_old_isa_flags;
+ aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
}
/* Implement #pragma GCC aarch64 "arm_neon.h". */
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ef84df731..86e444a60 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -747,7 +747,7 @@ public:
~aarch64_simd_switcher ();
private:
- unsigned long m_old_isa_flags;
+ unsigned long m_old_asm_isa_flags;
bool m_old_general_regs_only;
};
@@ -1032,7 +1032,10 @@ extern bool aarch64_classify_address (struct aarch64_address_info *, rtx,
machine_mode, bool,
aarch64_addr_query_type = ADDR_QUERY_M);
+void aarch64_set_asm_isa_flags (aarch64_feature_flags);
+
/* Defined in common/config/aarch64-common.cc. */
+void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
const struct cl_decoded_option *, location_t);
const char *aarch64_rewrite_selected_cpu (const char *name);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index b927a886e..a70e3a6b4 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -696,7 +696,7 @@ static bool
check_required_extensions (location_t location, tree fndecl,
aarch64_feature_flags required_extensions)
{
- auto missing_extensions = required_extensions & ~aarch64_isa_flags;
+ auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
if (missing_extensions == 0)
return check_required_registers (location, fndecl);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8cb820767..3e83e48ec 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -18432,10 +18432,19 @@ aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits_enum value)
return (int) value / 64;
}
+/* Set the global aarch64_asm_isa_flags to FLAGS and update
+ aarch64_isa_flags accordingly. */
+
+void
+aarch64_set_asm_isa_flags (aarch64_feature_flags flags)
+{
+ aarch64_set_asm_isa_flags (&global_options, flags);
+}
+
/* Implement TARGET_OPTION_OVERRIDE. This is called once in the beginning
and is used to parse the -m{cpu,tune,arch} strings and setup the initial
tuning structs. In particular it must set selected_tune and
- aarch64_isa_flags that define the available ISA features and tuning
+ aarch64_asm_isa_flags that define the available ISA features and tuning
decisions. It must also set selected_arch as this will be used to
output the .arch asm tags for each function. */
@@ -18444,7 +18453,7 @@ aarch64_override_options (void)
{
aarch64_feature_flags cpu_isa = 0;
aarch64_feature_flags arch_isa = 0;
- aarch64_isa_flags = 0;
+ aarch64_set_asm_isa_flags (0);
const struct processor *cpu = NULL;
const struct processor *arch = NULL;
@@ -18484,25 +18493,25 @@ aarch64_override_options (void)
}
selected_arch = arch->arch;
- aarch64_isa_flags = arch_isa;
+ aarch64_set_asm_isa_flags (arch_isa);
}
else if (cpu)
{
selected_arch = cpu->arch;
- aarch64_isa_flags = cpu_isa;
+ aarch64_set_asm_isa_flags (cpu_isa);
}
else if (arch)
{
cpu = &all_cores[arch->ident];
selected_arch = arch->arch;
- aarch64_isa_flags = arch_isa;
+ aarch64_set_asm_isa_flags (arch_isa);
}
else
{
/* No -mcpu or -march specified, so use the default CPU. */
cpu = &all_cores[TARGET_CPU_DEFAULT];
selected_arch = cpu->arch;
- aarch64_isa_flags = cpu->flags;
+ aarch64_set_asm_isa_flags (cpu->flags);
}
selected_tune = tune ? tune->ident : cpu->ident;
@@ -18644,7 +18653,7 @@ aarch64_option_print (FILE *file, int indent, struct cl_target_option *ptr)
= aarch64_get_tune_cpu (ptr->x_selected_tune);
const struct processor *arch = aarch64_get_arch (ptr->x_selected_arch);
std::string extension
- = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_isa_flags,
+ = aarch64_get_extension_string_for_isa_flags (ptr->x_aarch64_asm_isa_flags,
arch->flags);
fprintf (file, "%*sselected tune = %s\n", indent, "", cpu->name);
@@ -18752,13 +18761,15 @@ aarch64_handle_attr_arch (const char *str)
{
const struct processor *tmp_arch = NULL;
std::string invalid_extension;
+ aarch64_feature_flags tmp_flags;
enum aarch64_parse_opt_result parse_res
- = aarch64_parse_arch (str, &tmp_arch, &aarch64_isa_flags, &invalid_extension);
+ = aarch64_parse_arch (str, &tmp_arch, &tmp_flags, &invalid_extension);
if (parse_res == AARCH64_PARSE_OK)
{
gcc_assert (tmp_arch);
selected_arch = tmp_arch->arch;
+ aarch64_set_asm_isa_flags (tmp_flags);
return true;
}
@@ -18790,14 +18801,16 @@ aarch64_handle_attr_cpu (const char *str)
{
const struct processor *tmp_cpu = NULL;
std::string invalid_extension;
+ aarch64_feature_flags tmp_flags;
enum aarch64_parse_opt_result parse_res
- = aarch64_parse_cpu (str, &tmp_cpu, &aarch64_isa_flags, &invalid_extension);
+ = aarch64_parse_cpu (str, &tmp_cpu, &tmp_flags, &invalid_extension);
if (parse_res == AARCH64_PARSE_OK)
{
gcc_assert (tmp_cpu);
selected_tune = tmp_cpu->ident;
selected_arch = tmp_cpu->arch;
+ aarch64_set_asm_isa_flags (tmp_flags);
return true;
}
@@ -18891,7 +18904,7 @@ static bool
aarch64_handle_attr_isa_flags (char *str)
{
enum aarch64_parse_opt_result parse_res;
- auto isa_flags = aarch64_isa_flags;
+ auto isa_flags = aarch64_asm_isa_flags;
/* We allow "+nothing" in the beginning to clear out all architectural
features if the user wants to handpick specific features. */
@@ -18906,7 +18919,7 @@ aarch64_handle_attr_isa_flags (char *str)
if (parse_res == AARCH64_PARSE_OK)
{
- aarch64_isa_flags = isa_flags;
+ aarch64_set_asm_isa_flags (isa_flags);
return true;
}
@@ -19328,8 +19341,12 @@ aarch64_can_inline_p (tree caller, tree callee)
: target_option_default_node);
/* Callee's ISA flags should be a subset of the caller's. */
+ if ((caller_opts->x_aarch64_asm_isa_flags
+ & callee_opts->x_aarch64_asm_isa_flags)
+ != callee_opts->x_aarch64_asm_isa_flags)
+ return false;
if ((caller_opts->x_aarch64_isa_flags & callee_opts->x_aarch64_isa_flags)
- != callee_opts->x_aarch64_isa_flags)
+ != callee_opts->x_aarch64_isa_flags)
return false;
/* Allow non-strict aligned functions inlining into strict
@@ -22772,7 +22789,7 @@ aarch64_declare_function_name (FILE *stream, const char* name,
const struct processor *this_arch
= aarch64_get_arch (targ_options->x_selected_arch);
- auto isa_flags = targ_options->x_aarch64_isa_flags;
+ auto isa_flags = targ_options->x_aarch64_asm_isa_flags;
std::string extension
= aarch64_get_extension_string_for_isa_flags (isa_flags,
this_arch->flags);
@@ -22902,7 +22919,7 @@ aarch64_start_file (void)
const struct processor *default_arch
= aarch64_get_arch (default_options->x_selected_arch);
- auto default_isa_flags = default_options->x_aarch64_isa_flags;
+ auto default_isa_flags = default_options->x_aarch64_asm_isa_flags;
std::string extension
= aarch64_get_extension_string_for_isa_flags (default_isa_flags,
default_arch->flags);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 50a2ef444..521031efe 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -22,6 +22,17 @@
#ifndef GCC_AARCH64_H
#define GCC_AARCH64_H
+/* Make these flags read-only so that all uses go via
+ aarch64_set_asm_isa_flags. */
+#ifndef GENERATOR_FILE
+#undef aarch64_asm_isa_flags
+#define aarch64_asm_isa_flags \
+ ((aarch64_feature_flags) global_options.x_aarch64_asm_isa_flags)
+#undef aarch64_isa_flags
+#define aarch64_isa_flags \
+ ((aarch64_feature_flags) global_options.x_aarch64_isa_flags)
+#endif
+
/* Target CPU builtins. */
#define TARGET_CPU_CPP_BUILTINS() \
aarch64_cpu_cpp_builtins (pfile)
@@ -51,8 +62,8 @@
/* AdvSIMD is supported in the default configuration, unless disabled by
-mgeneral-regs-only or by the +nosimd extension. */
-#define TARGET_SIMD (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SIMD)
-#define TARGET_FLOAT (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_FP)
+#define TARGET_SIMD (AARCH64_ISA_SIMD)
+#define TARGET_FLOAT (AARCH64_ISA_FP)
#define UNITS_PER_WORD 8
@@ -242,7 +253,7 @@ enum class aarch64_feature : unsigned char {
#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD)
/* SVE instructions, enabled through +sve. */
-#define TARGET_SVE (!TARGET_GENERAL_REGS_ONLY && AARCH64_ISA_SVE)
+#define TARGET_SVE (AARCH64_ISA_SVE)
/* SVE2 instructions, enabled through +sve2. */
#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2)
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 47ec7824f..5f507abd4 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -27,6 +27,9 @@ enum aarch64_processor selected_tune = aarch64_none
TargetVariable
enum aarch64_arch selected_arch = aarch64_no_arch
+TargetVariable
+aarch64_feature_flags aarch64_asm_isa_flags = 0
+
TargetVariable
aarch64_feature_flags aarch64_isa_flags = 0
--
2.33.0

View File

@ -0,0 +1,453 @@
From 77a86d955dd1c9cd8c7fc35e6caf0cb707799129 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 29 Sep 2022 11:32:57 +0100
Subject: [PATCH 026/157] [Backport][SME] aarch64: Remove redundant TARGET_*
checks
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a31641840af2c40cf36036fa472df34d4a4402c3
After previous patches, it's possible to remove TARGET_*
options that are redundant due to (IMO) obvious dependencies.
gcc/
* config/aarch64/aarch64.h (TARGET_CRYPTO, TARGET_SHA3, TARGET_SM4)
(TARGET_DOTPROD): Don't depend on TARGET_SIMD.
(TARGET_AES, TARGET_SHA2): Likewise. Remove TARGET_CRYPTO test.
(TARGET_FP_F16INST): Don't depend on TARGET_FLOAT.
(TARGET_SVE2, TARGET_SVE_F32MM, TARGET_SVE_F64MM): Don't depend
on TARGET_SVE.
(TARGET_SVE2_AES, TARGET_SVE2_BITPERM, TARGET_SVE2_SHA3)
(TARGET_SVE2_SM4): Don't depend on TARGET_SVE2.
(TARGET_F32MM, TARGET_F64MM): Delete.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Guard
float macros with just TARGET_FLOAT rather than TARGET_FLOAT
|| TARGET_SIMD.
* config/aarch64/aarch64-simd.md (copysign<mode>3): Depend
only on TARGET_SIMD, rather than TARGET_FLOAT && TARGET_SIMD.
(aarch64_crypto_aes<aes_op>v16qi): Depend only on TARGET_AES,
rather than TARGET_SIMD && TARGET_AES.
(aarch64_crypto_aes<aesmc_op>v16qi): Likewise.
(*aarch64_crypto_aese_fused): Likewise.
(*aarch64_crypto_aesd_fused): Likewise.
(aarch64_crypto_pmulldi): Likewise.
(aarch64_crypto_pmullv2di): Likewise.
(aarch64_crypto_sha1hsi): Likewise TARGET_SHA2.
(aarch64_crypto_sha1hv4si): Likewise.
(aarch64_be_crypto_sha1hv4si): Likewise.
(aarch64_crypto_sha1su1v4si): Likewise.
(aarch64_crypto_sha1<sha1_op>v4si): Likewise.
(aarch64_crypto_sha1su0v4si): Likewise.
(aarch64_crypto_sha256h<sha256_op>v4si): Likewise.
(aarch64_crypto_sha256su0v4si): Likewise.
(aarch64_crypto_sha256su1v4si): Likewise.
(aarch64_crypto_sha512h<sha512_op>qv2di): Likewise TARGET_SHA3.
(aarch64_crypto_sha512su0qv2di): Likewise.
(aarch64_crypto_sha512su1qv2di, eor3q<mode>4): Likewise.
(aarch64_rax1qv2di, aarch64_xarqv2di, bcaxq<mode>4): Likewise.
(aarch64_sm3ss1qv4si): Likewise TARGET_SM4.
(aarch64_sm3tt<sm3tt_op>qv4si): Likewise.
(aarch64_sm3partw<sm3part_op>qv4si): Likewise.
(aarch64_sm4eqv4si, aarch64_sm4ekeyqv4si): Likewise.
* config/aarch64/aarch64.md (<FLOATUORS:optab>dihf2)
(copysign<GPF:mode>3, copysign<GPF:mode>3_insn)
(xorsign<mode>3): Remove redundant TARGET_FLOAT condition.
---
gcc/config/aarch64/aarch64-c.cc | 2 +-
gcc/config/aarch64/aarch64-simd.md | 56 +++++++++++++++---------------
gcc/config/aarch64/aarch64.h | 30 ++++++++--------
gcc/config/aarch64/aarch64.md | 8 ++---
4 files changed, 47 insertions(+), 49 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 18c9b975b..2dfe2b8f8 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -92,7 +92,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
aarch64_def_or_undef (TARGET_FLOAT, "__ARM_FEATURE_FMA", pfile);
- if (TARGET_FLOAT || TARGET_SIMD)
+ if (TARGET_FLOAT)
{
builtin_define_with_int_value ("__ARM_FP", 0x0E);
builtin_define ("__ARM_FP16_FORMAT_IEEE");
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index de92802f5..a47b39281 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -693,7 +693,7 @@
[(match_operand:VHSDF 0 "register_operand")
(match_operand:VHSDF 1 "register_operand")
(match_operand:VHSDF 2 "register_operand")]
- "TARGET_FLOAT && TARGET_SIMD"
+ "TARGET_SIMD"
{
rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
@@ -8352,7 +8352,7 @@
(match_operand:V16QI 1 "register_operand" "%0")
(match_operand:V16QI 2 "register_operand" "w"))]
CRYPTO_AES))]
- "TARGET_SIMD && TARGET_AES"
+ "TARGET_AES"
"aes<aes_op>\\t%0.16b, %2.16b"
[(set_attr "type" "crypto_aese")]
)
@@ -8361,7 +8361,7 @@
[(set (match_operand:V16QI 0 "register_operand" "=w")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
CRYPTO_AESMC))]
- "TARGET_SIMD && TARGET_AES"
+ "TARGET_AES"
"aes<aesmc_op>\\t%0.16b, %1.16b"
[(set_attr "type" "crypto_aesmc")]
)
@@ -8380,7 +8380,7 @@
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESE)]
UNSPEC_AESMC))]
- "TARGET_SIMD && TARGET_AES
+ "TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
[(set_attr "type" "crypto_aese")
@@ -8401,7 +8401,7 @@
(match_operand:V16QI 2 "register_operand" "w"))]
UNSPEC_AESD)]
UNSPEC_AESIMC))]
- "TARGET_SIMD && TARGET_AES
+ "TARGET_AES
&& aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
"aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
[(set_attr "type" "crypto_aese")
@@ -8415,7 +8415,7 @@
(unspec:SI [(match_operand:SI 1
"register_operand" "w")]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -8425,7 +8425,7 @@
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 0)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
+ "TARGET_SHA2 && !BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -8435,7 +8435,7 @@
(unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
(parallel [(const_int 3)]))]
UNSPEC_SHA1H))]
- "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
+ "TARGET_SHA2 && BYTES_BIG_ENDIAN"
"sha1h\\t%s0, %s1"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -8445,7 +8445,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA1SU1))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha1su1\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha1_fast")]
)
@@ -8456,7 +8456,7 @@
(match_operand:SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA1))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha1<sha1_op>\\t%q0, %s2, %3.4s"
[(set_attr "type" "crypto_sha1_slow")]
)
@@ -8467,7 +8467,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA1SU0))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha1su0\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha1_xor")]
)
@@ -8480,7 +8480,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SHA256))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha256h<sha256_op>\\t%q0, %q2, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
@@ -8490,7 +8490,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SHA256SU0))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha256su0\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sha256_fast")]
)
@@ -8501,7 +8501,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SHA256SU1))]
- "TARGET_SIMD && TARGET_SHA2"
+ "TARGET_SHA2"
"sha256su1\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sha256_slow")]
)
@@ -8514,7 +8514,7 @@
(match_operand:V2DI 2 "register_operand" "w")
(match_operand:V2DI 3 "register_operand" "w")]
CRYPTO_SHA512))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"sha512h<sha512_op>\\t%q0, %q2, %3.2d"
[(set_attr "type" "crypto_sha512")]
)
@@ -8524,7 +8524,7 @@
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_SHA512SU0))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"sha512su0\\t%0.2d, %2.2d"
[(set_attr "type" "crypto_sha512")]
)
@@ -8535,7 +8535,7 @@
(match_operand:V2DI 2 "register_operand" "w")
(match_operand:V2DI 3 "register_operand" "w")]
UNSPEC_SHA512SU1))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"sha512su1\\t%0.2d, %2.2d, %3.2d"
[(set_attr "type" "crypto_sha512")]
)
@@ -8549,7 +8549,7 @@
(match_operand:VQ_I 2 "register_operand" "w")
(match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:VQ_I 1 "register_operand" "w")))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
)
@@ -8561,7 +8561,7 @@
(match_operand:V2DI 2 "register_operand" "w")
(const_int 1))
(match_operand:V2DI 1 "register_operand" "w")))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"rax1\\t%0.2d, %1.2d, %2.2d"
[(set_attr "type" "crypto_sha3")]
)
@@ -8573,7 +8573,7 @@
(match_operand:V2DI 1 "register_operand" "%w")
(match_operand:V2DI 2 "register_operand" "w"))
(match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"xar\\t%0.2d, %1.2d, %2.2d, %3"
[(set_attr "type" "crypto_sha3")]
)
@@ -8585,7 +8585,7 @@
(not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
(match_operand:VQ_I 2 "register_operand" "w"))
(match_operand:VQ_I 1 "register_operand" "w")))]
- "TARGET_SIMD && TARGET_SHA3"
+ "TARGET_SHA3"
"bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
[(set_attr "type" "crypto_sha3")]
)
@@ -8598,7 +8598,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
UNSPEC_SM3SS1))]
- "TARGET_SIMD && TARGET_SM4"
+ "TARGET_SM4"
"sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sm3")]
)
@@ -8611,7 +8611,7 @@
(match_operand:V4SI 3 "register_operand" "w")
(match_operand:SI 4 "aarch64_imm2" "Ui2")]
CRYPTO_SM3TT))]
- "TARGET_SIMD && TARGET_SM4"
+ "TARGET_SM4"
"sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
[(set_attr "type" "crypto_sm3")]
)
@@ -8622,7 +8622,7 @@
(match_operand:V4SI 2 "register_operand" "w")
(match_operand:V4SI 3 "register_operand" "w")]
CRYPTO_SM3PART))]
- "TARGET_SIMD && TARGET_SM4"
+ "TARGET_SM4"
"sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
[(set_attr "type" "crypto_sm3")]
)
@@ -8634,7 +8634,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SM4E))]
- "TARGET_SIMD && TARGET_SM4"
+ "TARGET_SM4"
"sm4e\\t%0.4s, %2.4s"
[(set_attr "type" "crypto_sm4")]
)
@@ -8644,7 +8644,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
(match_operand:V4SI 2 "register_operand" "w")]
UNSPEC_SM4EKEY))]
- "TARGET_SIMD && TARGET_SM4"
+ "TARGET_SM4"
"sm4ekey\\t%0.4s, %1.4s, %2.4s"
[(set_attr "type" "crypto_sm4")]
)
@@ -9230,7 +9230,7 @@
(unspec:TI [(match_operand:DI 1 "register_operand" "w")
(match_operand:DI 2 "register_operand" "w")]
UNSPEC_PMULL))]
- "TARGET_SIMD && TARGET_AES"
+ "TARGET_AES"
"pmull\\t%0.1q, %1.1d, %2.1d"
[(set_attr "type" "crypto_pmull")]
)
@@ -9240,7 +9240,7 @@
(unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
(match_operand:V2DI 2 "register_operand" "w")]
UNSPEC_PMULL2))]
- "TARGET_SIMD && TARGET_AES"
+ "TARGET_AES"
"pmull2\\t%0.1q, %1.2d, %2.2d"
[(set_attr "type" "crypto_pmull")]
)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 521031efe..2a9d2d031 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -222,19 +222,19 @@ enum class aarch64_feature : unsigned char {
#define AARCH64_ISA_LS64 (aarch64_isa_flags & AARCH64_FL_LS64)
/* Crypto is an optional extension to AdvSIMD. */
-#define TARGET_CRYPTO (TARGET_SIMD && AARCH64_ISA_CRYPTO)
+#define TARGET_CRYPTO (AARCH64_ISA_CRYPTO)
/* SHA2 is an optional extension to AdvSIMD. */
-#define TARGET_SHA2 ((TARGET_SIMD && AARCH64_ISA_SHA2) || TARGET_CRYPTO)
+#define TARGET_SHA2 (AARCH64_ISA_SHA2)
/* SHA3 is an optional extension to AdvSIMD. */
-#define TARGET_SHA3 (TARGET_SIMD && AARCH64_ISA_SHA3)
+#define TARGET_SHA3 (AARCH64_ISA_SHA3)
/* AES is an optional extension to AdvSIMD. */
-#define TARGET_AES ((TARGET_SIMD && AARCH64_ISA_AES) || TARGET_CRYPTO)
+#define TARGET_AES (AARCH64_ISA_AES)
/* SM is an optional extension to AdvSIMD. */
-#define TARGET_SM4 (TARGET_SIMD && AARCH64_ISA_SM4)
+#define TARGET_SM4 (AARCH64_ISA_SM4)
/* FP16FML is an optional extension to AdvSIMD. */
#define TARGET_F16FML (TARGET_SIMD && AARCH64_ISA_F16FML && TARGET_FP_F16INST)
@@ -246,29 +246,29 @@ enum class aarch64_feature : unsigned char {
#define TARGET_LSE (AARCH64_ISA_LSE)
/* ARMv8.2-A FP16 support that can be enabled through the +fp16 extension. */
-#define TARGET_FP_F16INST (TARGET_FLOAT && AARCH64_ISA_F16)
+#define TARGET_FP_F16INST (AARCH64_ISA_F16)
#define TARGET_SIMD_F16INST (TARGET_SIMD && AARCH64_ISA_F16)
/* Dot Product is an optional extension to AdvSIMD enabled through +dotprod. */
-#define TARGET_DOTPROD (TARGET_SIMD && AARCH64_ISA_DOTPROD)
+#define TARGET_DOTPROD (AARCH64_ISA_DOTPROD)
/* SVE instructions, enabled through +sve. */
#define TARGET_SVE (AARCH64_ISA_SVE)
/* SVE2 instructions, enabled through +sve2. */
-#define TARGET_SVE2 (TARGET_SVE && AARCH64_ISA_SVE2)
+#define TARGET_SVE2 (AARCH64_ISA_SVE2)
/* SVE2 AES instructions, enabled through +sve2-aes. */
-#define TARGET_SVE2_AES (TARGET_SVE2 && AARCH64_ISA_SVE2_AES)
+#define TARGET_SVE2_AES (AARCH64_ISA_SVE2_AES)
/* SVE2 BITPERM instructions, enabled through +sve2-bitperm. */
-#define TARGET_SVE2_BITPERM (TARGET_SVE2 && AARCH64_ISA_SVE2_BITPERM)
+#define TARGET_SVE2_BITPERM (AARCH64_ISA_SVE2_BITPERM)
/* SVE2 SHA3 instructions, enabled through +sve2-sha3. */
-#define TARGET_SVE2_SHA3 (TARGET_SVE2 && AARCH64_ISA_SVE2_SHA3)
+#define TARGET_SVE2_SHA3 (AARCH64_ISA_SVE2_SHA3)
/* SVE2 SM4 instructions, enabled through +sve2-sm4. */
-#define TARGET_SVE2_SM4 (TARGET_SVE2 && AARCH64_ISA_SVE2_SM4)
+#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
@@ -296,12 +296,10 @@ enum class aarch64_feature : unsigned char {
#define TARGET_SVE_I8MM (TARGET_SVE && AARCH64_ISA_I8MM)
/* F32MM instructions are enabled through +f32mm. */
-#define TARGET_F32MM (AARCH64_ISA_F32MM)
-#define TARGET_SVE_F32MM (TARGET_SVE && AARCH64_ISA_F32MM)
+#define TARGET_SVE_F32MM (AARCH64_ISA_F32MM)
/* F64MM instructions are enabled through +f64mm. */
-#define TARGET_F64MM (AARCH64_ISA_F64MM)
-#define TARGET_SVE_F64MM (TARGET_SVE && AARCH64_ISA_F64MM)
+#define TARGET_SVE_F64MM (AARCH64_ISA_F64MM)
/* BF16 instructions are enabled through +bf16. */
#define TARGET_BF16_FP (AARCH64_ISA_BF16)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c0c64a798..7ee26284d 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6417,7 +6417,7 @@
(define_expand "<optab>dihf2"
[(set (match_operand:HF 0 "register_operand")
(FLOATUORS:HF (match_operand:DI 1 "register_operand")))]
- "TARGET_FLOAT && (TARGET_FP_F16INST || TARGET_SIMD)"
+ "TARGET_FP_F16INST || TARGET_SIMD"
{
if (TARGET_FP_F16INST)
emit_insn (gen_aarch64_fp16_<optab>dihf2 (operands[0], operands[1]));
@@ -6676,7 +6676,7 @@
[(match_operand:GPF 0 "register_operand")
(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")]
- "TARGET_FLOAT && TARGET_SIMD"
+ "TARGET_SIMD"
{
rtx bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
emit_move_insn (bitmask, GEN_INT (HOST_WIDE_INT_M1U
@@ -6693,7 +6693,7 @@
(match_operand:GPF 2 "register_operand" "w,w,0,0")
(match_operand:<V_INT_EQUIV> 3 "register_operand" "0,w,w,X")]
UNSPEC_COPYSIGN))]
- "TARGET_FLOAT && TARGET_SIMD"
+ "TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
bit\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
@@ -6714,7 +6714,7 @@
[(match_operand:GPF 0 "register_operand")
(match_operand:GPF 1 "register_operand")
(match_operand:GPF 2 "register_operand")]
- "TARGET_FLOAT && TARGET_SIMD"
+ "TARGET_SIMD"
{
machine_mode imode = <V_INT_EQUIV>mode;
--
2.33.0

View File

@ -0,0 +1,132 @@
From 53a858c0c371cbea27ed4170a94fb3918b9fcdcf Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 4 Oct 2022 16:39:18 +0100
Subject: [PATCH 027/157] [Backport][SME] aarch64: Define __ARM_FEATURE_RCPC
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c1b0a767f04a8ccbaff2a7b71d5c817cdb469630
https://github.com/ARM-software/acle/pull/199 adds a new feature
macro for RCPC, for use in things like inline assembly. This patch
adds the associated support to GCC.
Also, RCPC is required for Armv8.3-A and later, but the armv8.3-a
entry didn't include it. This was probably harmless in practice
since GCC simply ignored the extension until now. (The GAS
definition is OK.)
gcc/
* config/aarch64/aarch64.h (AARCH64_ISA_RCPC): New macro.
* config/aarch64/aarch64-arches.def (armv8.3-a): Include RCPC.
* config/aarch64/aarch64-cores.def (thunderx3t110, zeus, neoverse-v1)
(neoverse-512tvb, saphira): Remove RCPC from these Armv8.3-A+ cores.
* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins): Define
__ARM_FEATURE_RCPC when appropriate.
gcc/testsuite/
* gcc.target/aarch64/pragma_cpp_predefs_1.c: Add RCPC tests.
---
gcc/config/aarch64/aarch64-arches.def | 2 +-
gcc/config/aarch64/aarch64-c.cc | 1 +
gcc/config/aarch64/aarch64-cores.def | 10 +++++-----
gcc/config/aarch64/aarch64.h | 1 +
.../gcc.target/aarch64/pragma_cpp_predefs_1.c | 20 +++++++++++++++++++
5 files changed, 28 insertions(+), 6 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index 9f8246618..5a9eff336 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -33,7 +33,7 @@
AARCH64_ARCH("armv8-a", generic, V8A, 8, (SIMD))
AARCH64_ARCH("armv8.1-a", generic, V8_1A, 8, (V8A, LSE, CRC, RDMA))
AARCH64_ARCH("armv8.2-a", generic, V8_2A, 8, (V8_1A))
-AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH))
+AARCH64_ARCH("armv8.3-a", generic, V8_3A, 8, (V8_2A, PAUTH, RCPC))
AARCH64_ARCH("armv8.4-a", generic, V8_4A, 8, (V8_3A, F16FML, DOTPROD, FLAGM))
AARCH64_ARCH("armv8.5-a", generic, V8_5A, 8, (V8_4A, SB, SSBS, PREDRES))
AARCH64_ARCH("armv8.6-a", generic, V8_6A, 8, (V8_5A, I8MM, BF16))
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 2dfe2b8f8..4085ad840 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -202,6 +202,7 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
"__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", pfile);
aarch64_def_or_undef (TARGET_LS64,
"__ARM_FEATURE_LS64", pfile);
+ aarch64_def_or_undef (AARCH64_ISA_RCPC, "__ARM_FEATURE_RCPC", pfile);
/* Not for ACLE, but required to keep "float.h" correct if we switch
target between implementations that do or do not support ARMv8.2-A
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 60299160b..b50628d6b 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -133,17 +133,17 @@ AARCH64_CORE("tsv110", tsv110, tsv110, V8_2A, (CRYPTO, F16), tsv110, 0x48, 0
/* ARMv8.3-A Architecture Processors. */
/* Marvell cores (TX3). */
-AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, RCPC, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
+AARCH64_CORE("thunderx3t110", thunderx3t110, thunderx3t110, V8_3A, (CRYPTO, SM4, SHA3, F16FML), thunderx3t110, 0x43, 0x0b8, 0x0a)
/* ARMv8.4-A Architecture Processors. */
/* Arm ('A') cores. */
-AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
-AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, RCPC, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
+AARCH64_CORE("zeus", zeus, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-v1", neoversev1, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoversev1, 0x41, 0xd40, -1)
+AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A, (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
/* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO, RCPC), saphira, 0x51, 0xC01, -1)
+AARCH64_CORE("saphira", saphira, saphira, V8_4A, (CRYPTO), saphira, 0x51, 0xC01, -1)
/* ARMv8-A big.LITTLE implementations. */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2a9d2d031..19b82b4f3 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -201,6 +201,7 @@ enum class aarch64_feature : unsigned char {
#define AARCH64_ISA_SM4 (aarch64_isa_flags & AARCH64_FL_SM4)
#define AARCH64_ISA_SHA3 (aarch64_isa_flags & AARCH64_FL_SHA3)
#define AARCH64_ISA_F16FML (aarch64_isa_flags & AARCH64_FL_F16FML)
+#define AARCH64_ISA_RCPC (aarch64_isa_flags & AARCH64_FL_RCPC)
#define AARCH64_ISA_RCPC8_4 (aarch64_isa_flags & AARCH64_FL_V8_4A)
#define AARCH64_ISA_RNG (aarch64_isa_flags & AARCH64_FL_RNG)
#define AARCH64_ISA_V8_5A (aarch64_isa_flags & AARCH64_FL_V8_5A)
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
index bfb044f5d..307fa3d67 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_1.c
@@ -248,6 +248,26 @@
#error "__ARM_FEATURE_CRC32 is not defined but should be!"
#endif
+#pragma GCC target ("arch=armv8.2-a")
+#ifdef __ARM_FEATURE_RCPC
+#error "__ARM_FEATURE_RCPC is defined but should not be!"
+#endif
+
+#pragma GCC target ("arch=armv8.2-a+rcpc")
+#ifndef __ARM_FEATURE_RCPC
+#error "__ARM_FEATURE_RCPC is not defined but should be!"
+#endif
+
+#pragma GCC target ("+norcpc")
+#ifdef __ARM_FEATURE_RCPC
+#error "__ARM_FEATURE_RCPC is defined but should not be!"
+#endif
+
+#pragma GCC target ("arch=armv8.3-a")
+#ifndef __ARM_FEATURE_RCPC
+#error "__ARM_FEATURE_RCPC is not defined but should be!"
+#endif
+
int
foo (int a)
{
--
2.33.0

View File

@ -0,0 +1,29 @@
From f6b2917888292c694bae1debe8abb0d6c2c6f59e Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Tue, 20 Feb 2024 11:03:47 +0800
Subject: [PATCH 028/157] [Backport][SME] Add Ampere-1 and Ampere-1A core
definition in aarch64-cores.def
From commit db2f5d661239737157cf131de7d4df1c17d8d88d and
590a06afbf0e96813b5879742f38f3665512c854
---
gcc/config/aarch64/aarch64-cores.def | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index b50628d6b..f069c81cf 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -69,7 +69,8 @@ AARCH64_CORE("thunderxt81", thunderxt81, thunderx, V8A, (CRC, CRYPTO), thu
AARCH64_CORE("thunderxt83", thunderxt83, thunderx, V8A, (CRC, CRYPTO), thunderx, 0x43, 0x0a3, -1)
/* Ampere Computing ('\xC0') cores. */
-AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (), ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1", ampere1, cortexa57, V8_6A, (F16, RNG, AES, SHA3), ampere1, 0xC0, 0xac3, -1)
+AARCH64_CORE("ampere1a", ampere1a, cortexa57, V8_6A, (F16, RNG, AES, SHA3, MEMTAG), ampere1a, 0xC0, 0xac4, -1)
/* Do not swap around "emag" and "xgene1",
this order is required to handle variant correctly. */
AARCH64_CORE("emag", emag, xgene1, V8A, (CRC, CRYPTO), emag, 0x50, 0x000, 3)
--
2.33.0

View File

@ -0,0 +1,968 @@
From 81a4b464d01cf00f8b355115588e67bf2c021acd Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Wed, 7 Sep 2022 10:52:04 +0100
Subject: [PATCH 029/157] [Backport][SME] aarch64: Fix +nosimd handling of FPR
moves
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d6106132907f6bd01109f2616d20a87edecc6fc6
8-bit and 16-bit FPR moves would ICE for +nosimd+fp, and some other
moves would handle FPR<-zero inefficiently. This is very much a
niche case at the moment, but something like it becomes more
important with SME streaming mode.
The si, di and vector tests already passed, they're just included for
completeness.
We're a bit inconsistent about whether alternatives involving FPRs
are marked with arch==fp or arch=* (i.e. default). E.g. FPR loads
and stores are sometimes * and sometimes fp.
IMO * makes more sense. FPRs should not be used at all without
TARGET_FLOAT, so TARGET_FLOAT represents the base architecture
when FPRs are enabled. I think it's more useful if non-default
arches represent a genuine restriction.
gcc/
* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Extend
w<-w, r<-w and w<-r alternatives to !simd, using 32-bit moves
in that case. Extend w<-r to w<-Z.
(*mov<HFBF:mode>_aarch64): Likewise, but with Y instead of Z.
(*movti_aarch64): Use an FMOV from XZR for w<-Z if MOVI is not
available.
(define_split): Do not apply the floating-point immediate-to-register
split to zeros, even if MOVI is not available.
gcc/testsuite/
* gcc.target/aarch64/movqi_1.c: New test.
* gcc.target/aarch64/movhi_1.c: Likewise.
* gcc.target/aarch64/movsi_1.c: Likewise.
* gcc.target/aarch64/movdi_2.c: Likewise.
* gcc.target/aarch64/movti_2.c: Likewise.
* gcc.target/aarch64/movhf_1.c: Likewise.
* gcc.target/aarch64/movsf_1.c: Likewise.
* gcc.target/aarch64/movdf_1.c: Likewise.
* gcc.target/aarch64/movtf_2.c: Likewise.
* gcc.target/aarch64/movv8qi_1.c: Likewise.
* gcc.target/aarch64/movv16qi_1.c: Likewise.
---
gcc/config/aarch64/aarch64.md | 38 ++++----
gcc/testsuite/gcc.target/aarch64/movdf_1.c | 53 ++++++++++++
gcc/testsuite/gcc.target/aarch64/movdi_2.c | 61 +++++++++++++
gcc/testsuite/gcc.target/aarch64/movhf_1.c | 53 ++++++++++++
gcc/testsuite/gcc.target/aarch64/movhi_1.c | 61 +++++++++++++
gcc/testsuite/gcc.target/aarch64/movqi_1.c | 61 +++++++++++++
gcc/testsuite/gcc.target/aarch64/movsf_1.c | 53 ++++++++++++
gcc/testsuite/gcc.target/aarch64/movsi_1.c | 61 +++++++++++++
gcc/testsuite/gcc.target/aarch64/movtf_2.c | 81 +++++++++++++++++
gcc/testsuite/gcc.target/aarch64/movti_2.c | 86 +++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/movv16qi_1.c | 82 ++++++++++++++++++
gcc/testsuite/gcc.target/aarch64/movv8qi_1.c | 55 ++++++++++++
12 files changed, 729 insertions(+), 16 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdf_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movdi_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhf_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movhi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movqi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsf_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movsi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_2.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7ee26284d..7267a74d6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1201,7 +1201,7 @@
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w")
- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,r,w"))]
+ (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
{
@@ -1225,11 +1225,11 @@
case 7:
return "str\t%<size>1, %0";
case 8:
- return "umov\t%w0, %1.<v>[0]";
+ return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
case 9:
- return "dup\t%0.<Vallxd>, %w1";
+ return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
case 10:
- return "dup\t%<Vetype>0, %1.<v>[0]";
+ return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
default:
gcc_unreachable ();
}
@@ -1237,7 +1237,7 @@
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
- (set_attr "arch" "*,*,simd,sve,*,*,*,*,simd,simd,simd")]
+ (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
)
(define_expand "mov<mode>"
@@ -1399,14 +1399,15 @@
(define_insn "*movti_aarch64"
[(set (match_operand:TI 0
- "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m")
+ "nonimmediate_operand" "= r,w,w,w, r,w,r,m,m,w,m")
(match_operand:TI 1
- "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))]
+ "aarch64_movti_operand" " rUti,Z,Z,r, w,w,m,r,Z,m,w"))]
"(register_operand (operands[0], TImode)
|| aarch64_reg_or_zero (operands[1], TImode))"
"@
#
movi\\t%0.2d, #0
+ fmov\t%d0, xzr
#
#
mov\\t%0.16b, %1.16b
@@ -1415,11 +1416,11 @@
stp\\txzr, xzr, %0
ldr\\t%q0, %1
str\\t%q1, %0"
- [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \
+ [(set_attr "type" "multiple,neon_move,f_mcr,f_mcr,f_mrc,neon_logic_q, \
load_16,store_16,store_16,\
load_16,store_16")
- (set_attr "length" "8,4,8,8,4,4,4,4,4,4")
- (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")]
+ (set_attr "length" "8,4,4,8,8,4,4,4,4,4,4")
+ (set_attr "arch" "*,simd,*,*,*,simd,*,*,*,fp,fp")]
)
;; Split a TImode register-register or register-immediate move into
@@ -1458,16 +1459,19 @@
)
(define_insn "*mov<mode>_aarch64"
- [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w , w,?r,w,w ,w ,w,m,r,m ,r")
- (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r, w,w,Ufc,Uvi,m,w,m,rY,r"))]
+ [(set (match_operand:HFBF 0 "nonimmediate_operand" "=w,w ,w ,w ,?r,?r,w,w,w ,w ,w,m,r,m ,r")
+ (match_operand:HFBF 1 "general_operand" "Y ,?rY,?r,?rY, w, w,w,w,Ufc,Uvi,m,w,m,rY,r"))]
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
movi\\t%0.4h, #0
fmov\\t%h0, %w1
dup\\t%w0.4h, %w1
+ fmov\\t%s0, %w1
umov\\t%w0, %1.h[0]
+ fmov\\t%w0, %s1
mov\\t%0.h[0], %1.h[0]
+ fmov\\t%s0, %s1
fmov\\t%h0, %1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], HImode);
ldr\\t%h0, %1
@@ -1475,9 +1479,10 @@
ldrh\\t%w0, %1
strh\\t%w1, %0
mov\\t%w0, %w1"
- [(set_attr "type" "neon_move,f_mcr,neon_move,neon_to_gp, neon_move,fconsts, \
- neon_move,f_loads,f_stores,load_4,store_4,mov_reg")
- (set_attr "arch" "simd,fp16,simd,simd,simd,fp16,simd,*,*,*,*,*")]
+ [(set_attr "type" "neon_move,f_mcr,neon_move,f_mcr,neon_to_gp,f_mrc,
+ neon_move,fmov,fconsts,neon_move,f_loads,f_stores,
+ load_4,store_4,mov_reg")
+ (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
)
(define_insn "*movsf_aarch64"
@@ -1530,10 +1535,11 @@
(define_split
[(set (match_operand:GPF_HF 0 "nonimmediate_operand")
- (match_operand:GPF_HF 1 "general_operand"))]
+ (match_operand:GPF_HF 1 "const_double_operand"))]
"can_create_pseudo_p ()
&& !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
&& !aarch64_float_const_representable_p (operands[1])
+ && !aarch64_float_const_zero_rtx_p (operands[1])
&& aarch64_float_const_rtx_p (operands[1])"
[(const_int 0)]
{
diff --git a/gcc/testsuite/gcc.target/aarch64/movdf_1.c b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
new file mode 100644
index 000000000..a51ded1d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movdf_1.c
@@ -0,0 +1,53 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+/*
+** fpr_to_fpr:
+** fmov d0, d1
+** ret
+*/
+double
+fpr_to_fpr (double q0, double q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr:
+** fmov d0, x0
+** ret
+*/
+double
+gpr_to_fpr ()
+{
+ register double x0 asm ("x0");
+ asm volatile ("" : "=r" (x0));
+ return x0;
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+double
+zero_to_fpr ()
+{
+ return 0;
+}
+
+/*
+** fpr_to_gpr:
+** fmov x0, d0
+** ret
+*/
+void
+fpr_to_gpr (double q0)
+{
+ register double x0 asm ("x0");
+ x0 = q0;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movdi_2.c b/gcc/testsuite/gcc.target/aarch64/movdi_2.c
new file mode 100644
index 000000000..dd3fc3e8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movdi_2.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+#include <stdint.h>
+
+/*
+** fpr_to_fpr:
+** fmov d0, d1
+** ret
+*/
+void
+fpr_to_fpr (void)
+{
+ register uint64_t q0 asm ("q0");
+ register uint64_t q1 asm ("q1");
+ asm volatile ("" : "=w" (q1));
+ q0 = q1;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** gpr_to_fpr:
+** fmov d0, x0
+** ret
+*/
+void
+gpr_to_fpr (uint64_t x0)
+{
+ register uint64_t q0 asm ("q0");
+ q0 = x0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+void
+zero_to_fpr ()
+{
+ register uint64_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** fpr_to_gpr:
+** fmov x0, d0
+** ret
+*/
+uint64_t
+fpr_to_gpr ()
+{
+ register uint64_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ return q0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movhf_1.c b/gcc/testsuite/gcc.target/aarch64/movhf_1.c
new file mode 100644
index 000000000..cae25d4e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movhf_1.c
@@ -0,0 +1,53 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+/*
+** fpr_to_fpr:
+** fmov s0, s1
+** ret
+*/
+_Float16
+fpr_to_fpr (_Float16 q0, _Float16 q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr:
+** fmov s0, w0
+** ret
+*/
+_Float16
+gpr_to_fpr ()
+{
+ register _Float16 w0 asm ("w0");
+ asm volatile ("" : "=r" (w0));
+ return w0;
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+_Float16
+zero_to_fpr ()
+{
+ return 0;
+}
+
+/*
+** fpr_to_gpr:
+** fmov w0, s0
+** ret
+*/
+void
+fpr_to_gpr (_Float16 q0)
+{
+ register _Float16 w0 asm ("w0");
+ w0 = q0;
+ asm volatile ("" :: "r" (w0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movhi_1.c b/gcc/testsuite/gcc.target/aarch64/movhi_1.c
new file mode 100644
index 000000000..8017abc5f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movhi_1.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+#include <stdint.h>
+
+/*
+** fpr_to_fpr:
+** fmov s0, s1
+** ret
+*/
+void
+fpr_to_fpr (void)
+{
+ register uint16_t q0 asm ("q0");
+ register uint16_t q1 asm ("q1");
+ asm volatile ("" : "=w" (q1));
+ q0 = q1;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** gpr_to_fpr:
+** fmov s0, w0
+** ret
+*/
+void
+gpr_to_fpr (uint16_t w0)
+{
+ register uint16_t q0 asm ("q0");
+ q0 = w0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+void
+zero_to_fpr ()
+{
+ register uint16_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** fpr_to_gpr:
+** fmov w0, s0
+** ret
+*/
+uint16_t
+fpr_to_gpr ()
+{
+ register uint16_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ return q0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movqi_1.c b/gcc/testsuite/gcc.target/aarch64/movqi_1.c
new file mode 100644
index 000000000..401a79630
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movqi_1.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+#include <stdint.h>
+
+/*
+** fpr_to_fpr:
+** fmov s0, s1
+** ret
+*/
+void
+fpr_to_fpr (void)
+{
+ register uint8_t q0 asm ("q0");
+ register uint8_t q1 asm ("q1");
+ asm volatile ("" : "=w" (q1));
+ q0 = q1;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** gpr_to_fpr:
+** fmov s0, w0
+** ret
+*/
+void
+gpr_to_fpr (uint8_t w0)
+{
+ register uint8_t q0 asm ("q0");
+ q0 = w0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+void
+zero_to_fpr ()
+{
+ register uint8_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** fpr_to_gpr:
+** fmov w0, s0
+** ret
+*/
+uint8_t
+fpr_to_gpr ()
+{
+ register uint8_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ return q0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movsf_1.c b/gcc/testsuite/gcc.target/aarch64/movsf_1.c
new file mode 100644
index 000000000..09715aa4f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movsf_1.c
@@ -0,0 +1,53 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+/*
+** fpr_to_fpr:
+** fmov s0, s1
+** ret
+*/
+float
+fpr_to_fpr (float q0, float q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr:
+** fmov s0, w0
+** ret
+*/
+float
+gpr_to_fpr ()
+{
+ register float w0 asm ("w0");
+ asm volatile ("" : "=r" (w0));
+ return w0;
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+float
+zero_to_fpr ()
+{
+ return 0;
+}
+
+/*
+** fpr_to_gpr:
+** fmov w0, s0
+** ret
+*/
+void
+fpr_to_gpr (float q0)
+{
+ register float w0 asm ("w0");
+ w0 = q0;
+ asm volatile ("" :: "r" (w0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movsi_1.c b/gcc/testsuite/gcc.target/aarch64/movsi_1.c
new file mode 100644
index 000000000..5314139aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movsi_1.c
@@ -0,0 +1,61 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+#include <stdint.h>
+
+/*
+** fpr_to_fpr:
+** fmov s0, s1
+** ret
+*/
+void
+fpr_to_fpr (void)
+{
+ register uint32_t q0 asm ("q0");
+ register uint32_t q1 asm ("q1");
+ asm volatile ("" : "=w" (q1));
+ q0 = q1;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** gpr_to_fpr:
+** fmov s0, w0
+** ret
+*/
+void
+gpr_to_fpr (uint32_t w0)
+{
+ register uint32_t q0 asm ("q0");
+ q0 = w0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+void
+zero_to_fpr ()
+{
+ register uint32_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** fpr_to_gpr:
+** fmov w0, s0
+** ret
+*/
+uint32_t
+fpr_to_gpr ()
+{
+ register uint32_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ return q0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_2.c b/gcc/testsuite/gcc.target/aarch64/movtf_2.c
new file mode 100644
index 000000000..38b16358d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movtf_2.c
@@ -0,0 +1,81 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target large_long_double } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+/*
+** fpr_to_fpr:
+** sub sp, sp, #16
+** str q1, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, #?16
+** ret
+*/
+long double
+fpr_to_fpr (long double q0, long double q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr: { target aarch64_little_endian }
+** fmov d0, x0
+** fmov v0.d\[1\], x1
+** ret
+*/
+/*
+** gpr_to_fpr: { target aarch64_big_endian }
+** fmov d0, x1
+** fmov v0.d\[1\], x0
+** ret
+*/
+long double
+gpr_to_fpr ()
+{
+ register long double x0 asm ("x0");
+ asm volatile ("" : "=r" (x0));
+ return x0;
+}
+
+/*
+** zero_to_fpr:
+** fmov s0, wzr
+** ret
+*/
+long double
+zero_to_fpr ()
+{
+ return 0;
+}
+
+/*
+** fpr_to_gpr: { target aarch64_little_endian }
+** (
+** fmov x0, d0
+** fmov x1, v0.d\[1\]
+** |
+** fmov x1, v0.d\[1\]
+** fmov x0, d0
+** )
+** ret
+*/
+/*
+** fpr_to_gpr: { target aarch64_big_endian }
+** (
+** fmov x1, d0
+** fmov x0, v0.d\[1\]
+** |
+** fmov x0, v0.d\[1\]
+** fmov x1, d0
+** )
+** ret
+*/
+void
+fpr_to_gpr (long double q0)
+{
+ register long double x0 asm ("x0");
+ x0 = q0;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movti_2.c b/gcc/testsuite/gcc.target/aarch64/movti_2.c
new file mode 100644
index 000000000..c393b1220
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movti_2.c
@@ -0,0 +1,86 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+/*
+** fpr_to_fpr:
+** sub sp, sp, #16
+** str q1, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, #?16
+** ret
+*/
+void
+fpr_to_fpr (void)
+{
+ register __int128_t q0 asm ("q0");
+ register __int128_t q1 asm ("q1");
+ asm volatile ("" : "=w" (q1));
+ q0 = q1;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** gpr_to_fpr: { target aarch64_little_endian }
+** fmov d0, x0
+** fmov v0.d\[1\], x1
+** ret
+*/
+/*
+** gpr_to_fpr: { target aarch64_big_endian }
+** fmov d0, x1
+** fmov v0.d\[1\], x0
+** ret
+*/
+void
+gpr_to_fpr (__int128_t x0)
+{
+ register __int128_t q0 asm ("q0");
+ q0 = x0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+void
+zero_to_fpr ()
+{
+ register __int128_t q0 asm ("q0");
+ q0 = 0;
+ asm volatile ("" :: "w" (q0));
+}
+
+/*
+** fpr_to_gpr: { target aarch64_little_endian }
+** (
+** fmov x0, d0
+** fmov x1, v0.d\[1\]
+** |
+** fmov x1, v0.d\[1\]
+** fmov x0, d0
+** )
+** ret
+*/
+/*
+** fpr_to_gpr: { target aarch64_big_endian }
+** (
+** fmov x1, d0
+** fmov x0, v0.d\[1\]
+** |
+** fmov x0, v0.d\[1\]
+** fmov x1, d0
+** )
+** ret
+*/
+__int128_t
+fpr_to_gpr ()
+{
+ register __int128_t q0 asm ("q0");
+ asm volatile ("" : "=w" (q0));
+ return q0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
new file mode 100644
index 000000000..8a6afb13b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
@@ -0,0 +1,82 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+typedef unsigned char v16qi __attribute__((vector_size(16)));
+
+/*
+** fpr_to_fpr:
+** sub sp, sp, #16
+** str q1, \[sp\]
+** ldr q0, \[sp\]
+** add sp, sp, #?16
+** ret
+*/
+v16qi
+fpr_to_fpr (v16qi q0, v16qi q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr: { target aarch64_little_endian }
+** fmov d0, x0
+** fmov v0.d\[1\], x1
+** ret
+*/
+/*
+** gpr_to_fpr: { target aarch64_big_endian }
+** fmov d0, x1
+** fmov v0.d\[1\], x0
+** ret
+*/
+v16qi
+gpr_to_fpr ()
+{
+ register v16qi x0 asm ("x0");
+ asm volatile ("" : "=r" (x0));
+ return x0;
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+v16qi
+zero_to_fpr ()
+{
+ return (v16qi) {};
+}
+
+/*
+** fpr_to_gpr: { target aarch64_little_endian }
+** (
+** fmov x0, d0
+** fmov x1, v0.d\[1\]
+** |
+** fmov x1, v0.d\[1\]
+** fmov x0, d0
+** )
+** ret
+*/
+/*
+** fpr_to_gpr: { target aarch64_big_endian }
+** (
+** fmov x1, d0
+** fmov x0, v0.d\[1\]
+** |
+** fmov x0, v0.d\[1\]
+** fmov x1, d0
+** )
+** ret
+*/
+void
+fpr_to_gpr (v16qi q0)
+{
+ register v16qi x0 asm ("x0");
+ x0 = q0;
+ asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
new file mode 100644
index 000000000..4c97e6fbc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
@@ -0,0 +1,55 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+typedef unsigned char v8qi __attribute__((vector_size(8)));
+
+/*
+** fpr_to_fpr:
+** fmov d0, d1
+** ret
+*/
+v8qi
+fpr_to_fpr (v8qi q0, v8qi q1)
+{
+ return q1;
+}
+
+/*
+** gpr_to_fpr:
+** fmov d0, x0
+** ret
+*/
+v8qi
+gpr_to_fpr ()
+{
+ register v8qi x0 asm ("x0");
+ asm volatile ("" : "=r" (x0));
+ return x0;
+}
+
+/*
+** zero_to_fpr:
+** fmov d0, xzr
+** ret
+*/
+v8qi
+zero_to_fpr ()
+{
+ return (v8qi) {};
+}
+
+/*
+** fpr_to_gpr:
+** fmov x0, d0
+** ret
+*/
+void
+fpr_to_gpr (v8qi q0)
+{
+ register v8qi x0 asm ("x0");
+ x0 = q0;
+ asm volatile ("" :: "r" (x0));
+}
--
2.33.0

View File

@ -0,0 +1,83 @@
From 805a7aec3ddab49b92bf2d5c1a3e288860cc14bf Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Thu, 20 Oct 2022 10:37:35 +0100
Subject: [PATCH 030/157] [Backport][SME] aarch64: Commonise some folding code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=df99e9e42094dee0833ac38f53e7fae09b4d133c
Add an aarch64_sve::gimple_folder helper for folding calls
to integer constants. SME will make more use of this.
gcc/
* config/aarch64/aarch64-sve-builtins.h
(gimple_folder::fold_to_cstu): New member function.
* config/aarch64/aarch64-sve-builtins.cc
(gimple_folder::fold_to_cstu): Define.
* config/aarch64/aarch64-sve-builtins-base.cc
(svcnt_bhwd_impl::fold): Use it.
---
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 9 ++-------
gcc/config/aarch64/aarch64-sve-builtins.cc | 7 +++++++
gcc/config/aarch64/aarch64-sve-builtins.h | 1 +
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index c24c05487..56c9d75e7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -516,9 +516,7 @@ public:
gimple *
fold (gimple_folder &f) const OVERRIDE
{
- tree count = build_int_cstu (TREE_TYPE (f.lhs),
- GET_MODE_NUNITS (m_ref_mode));
- return gimple_build_assign (f.lhs, count);
+ return f.fold_to_cstu (GET_MODE_NUNITS (m_ref_mode));
}
rtx
@@ -553,10 +551,7 @@ public:
unsigned int elements_per_vq = 128 / GET_MODE_UNIT_BITSIZE (m_ref_mode);
HOST_WIDE_INT value = aarch64_fold_sve_cnt_pat (pattern, elements_per_vq);
if (value >= 0)
- {
- tree count = build_int_cstu (TREE_TYPE (f.lhs), value);
- return gimple_build_assign (f.lhs, count);
- }
+ return f.fold_to_cstu (value);
return NULL;
}
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index a70e3a6b4..e168c8334 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2615,6 +2615,13 @@ gimple_folder::redirect_call (const function_instance &instance)
return call;
}
+/* Fold the call to constant VAL. */
+gimple *
+gimple_folder::fold_to_cstu (poly_uint64 val)
+{
+ return gimple_build_assign (lhs, build_int_cstu (TREE_TYPE (lhs), val));
+}
+
/* Fold the call to a PTRUE, taking the element size from type suffix 0. */
gimple *
gimple_folder::fold_to_ptrue ()
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 63d1db776..0d130b871 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -500,6 +500,7 @@ public:
tree load_store_cookie (tree);
gimple *redirect_call (const function_instance &);
+ gimple *fold_to_cstu (poly_uint64);
gimple *fold_to_pfalse ();
gimple *fold_to_ptrue ();
gimple *fold_to_vl_pred (unsigned int);
--
2.33.0

View File

@ -0,0 +1,49 @@
From 8dc1eee26c61bea8aab62080bd961825142685f9 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 7 Nov 2023 15:22:57 +0000
Subject: [PATCH 031/157] [Backport][SME] aarch64: Add a %Z operand modifier
for SVE registers
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f40eac535bd55192cf93daca16235efbcd91157a
This patch adds a %Z operand modifier that prints registers as SVE z
registers. The SME patches need this, but so do Tamar's patches.
I'm separating this out to unblock those.
We should probably document the [wxbhsdqZ] modifiers as
user-facing, but doing that for all of them is a separate patch.
gcc/
* config/aarch64/aarch64.cc (aarch64_print_operand): Add a %Z
modifier for SVE registers.
---
gcc/config/aarch64/aarch64.cc | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3e83e48ec..fd1114b52 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11901,6 +11901,10 @@ sizetochar (int size)
'N': Take the duplicated element in a vector constant
and print the negative of it in decimal.
'b/h/s/d/q': Print a scalar FP/SIMD register name.
+ 'Z': Same for SVE registers. ('z' was already taken.)
+ Note that it is not necessary to use %Z for operands
+ that have SVE modes. The convention is to use %Z
+ only for non-SVE (or potentially non-SVE) modes.
'S/T/U/V': Print a FP/SIMD register name for a register list.
The register printed is the FP/SIMD register name
of X + 0/1/2/3 for S/T/U/V.
@@ -12073,6 +12077,8 @@ aarch64_print_operand (FILE *f, rtx x, int code)
case 's':
case 'd':
case 'q':
+ case 'Z':
+ code = TOLOWER (code);
if (!REG_P (x) || !FP_REGNUM_P (REGNO (x)))
{
output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code);
--
2.33.0

View File

@ -0,0 +1,104 @@
From 8a43bd7885ce479cadb0643fbb0fc22d2b0ffced Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sun, 5 Nov 2023 18:28:46 +0000
Subject: [PATCH 032/157] [Backport][SME] mode-switching: Remove unused bbnum
field
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2d55ed2b8a754d7279cd002941f7cb481f0fd133
seginfo had an unused bbnum field, presumably dating from before
BB information was attached directly to insns.
gcc/
* mode-switching.cc: Remove unused forward references.
(seginfo): Remove bbnum.
(new_seginfo): Remove associated argument.
(optimize_mode_switching): Update calls accordingly.
---
gcc/mode-switching.cc | 18 +++++-------------
1 file changed, 5 insertions(+), 13 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 6e3f1dc65..4cf8f03a0 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -65,13 +65,11 @@ along with GCC; see the file COPYING3. If not see
MODE is the mode this insn must be executed in.
INSN_PTR is the insn to be executed (may be the note that marks the
beginning of a basic block).
- BBNUM is the flow graph basic block this insn occurs in.
NEXT is the next insn in the same basic block. */
struct seginfo
{
int mode;
rtx_insn *insn_ptr;
- int bbnum;
struct seginfo *next;
HARD_REG_SET regs_live;
};
@@ -84,11 +82,6 @@ struct bb_info
int mode_in;
};
-static struct seginfo * new_seginfo (int, rtx_insn *, int, HARD_REG_SET);
-static void add_seginfo (struct bb_info *, struct seginfo *);
-static void reg_dies (rtx, HARD_REG_SET *);
-static void reg_becomes_live (rtx, const_rtx, void *);
-
/* Clear ode I from entity J in bitmap B. */
#define clear_mode_bit(b, j, i) \
bitmap_clear_bit (b, (j * max_num_modes) + i)
@@ -148,13 +141,13 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
}
/* Allocate a new BBINFO structure, initialized with the MODE, INSN,
- and basic block BB parameters.
+ and REGS_LIVE parameters.
INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
basic block; that allows us later to insert instructions in a FIFO-like
manner. */
static struct seginfo *
-new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
+new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
{
struct seginfo *ptr;
@@ -163,7 +156,6 @@ new_seginfo (int mode, rtx_insn *insn, int bb, HARD_REG_SET regs_live)
ptr = XNEW (struct seginfo);
ptr->mode = mode;
ptr->insn_ptr = insn;
- ptr->bbnum = bb;
ptr->next = NULL;
ptr->regs_live = regs_live;
return ptr;
@@ -604,7 +596,7 @@ optimize_mode_switching (void)
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
if (ins_pos != BB_END (bb))
ins_pos = NEXT_INSN (ins_pos);
- ptr = new_seginfo (no_mode, ins_pos, bb->index, live_now);
+ ptr = new_seginfo (no_mode, ins_pos, live_now);
add_seginfo (info + bb->index, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
@@ -622,7 +614,7 @@ optimize_mode_switching (void)
{
any_set_required = true;
last_mode = mode;
- ptr = new_seginfo (mode, insn, bb->index, live_now);
+ ptr = new_seginfo (mode, insn, live_now);
add_seginfo (info + bb->index, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
@@ -651,7 +643,7 @@ optimize_mode_switching (void)
mark the block as nontransparent. */
if (!any_set_required)
{
- ptr = new_seginfo (no_mode, BB_END (bb), bb->index, live_now);
+ ptr = new_seginfo (no_mode, BB_END (bb), live_now);
add_seginfo (info + bb->index, ptr);
if (last_mode != no_mode)
for (i = 0; i < no_mode; i++)
--
2.33.0

View File

@ -0,0 +1,311 @@
From c980e40d2c27ac3ee33c9b6aea6d2b0d4080852e Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:54 +0000
Subject: [PATCH 033/157] [Backport][SME] mode-switching: Tweak the macro/hook
documentation
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8479a3759025961f80cf0cd6bb3f127e09d0510d
I found the documentation for the mode-switching macros/hooks
a bit hard to follow at first. This patch tries to add the
information that I think would have made it easier to understand.
Of course, documentation preferences are personal, and so I could
be changing something that others understood to something that
seems impenetrable.
Some notes on specific changes:
- "in an optimizing compilation" didn't seem accurate; the pass
is run even at -O0, and often needs to be for correctness.
- "at run time" meant when the compiler was run, rather than when
the compiled code was run.
- Removing the list of optional macros isn't a clarification,
but it means that upcoming patches don't create an absurdly
long list.
- I don't really understand the purpose of TARGET_MODE_PRIORITY,
so I mostly left that alone.
gcc/
* target.def: Tweak documentation of mode-switching hooks.
* doc/tm.texi.in (OPTIMIZE_MODE_SWITCHING): Tweak documentation.
(NUM_MODES_FOR_MODE_SWITCHING): Likewise.
* doc/tm.texi: Regenerate.
---
gcc/doc/tm.texi | 69 ++++++++++++++++++++++++++++------------------
gcc/doc/tm.texi.in | 26 +++++++++--------
gcc/target.def | 43 ++++++++++++++++++-----------
3 files changed, 84 insertions(+), 54 deletions(-)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 851d31c18..553aa4cf2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10234,7 +10234,7 @@ The following macros control mode switching optimizations:
@defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
Define this macro if the port needs extra instructions inserted for mode
-switching in an optimizing compilation.
+switching.
For an example, the SH4 can perform both single and double precision
floating point operations, but to perform a single precision operation,
@@ -10244,73 +10244,88 @@ purpose register as a scratch register, hence these FPSCR sets have to
be inserted before reload, i.e.@: you cannot put this into instruction emitting
or @code{TARGET_MACHINE_DEPENDENT_REORG}.
-You can have multiple entities that are mode-switched, and select at run time
-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should
-return nonzero for any @var{entity} that needs mode-switching.
+You can have multiple entities that are mode-switched, some of which might
+only be needed conditionally. The entities are identified by their index
+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
+of the initializer determining the number of entities.
+
+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
+that needs mode-switching.
+
If you define this macro, you also have to define
@code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED},
@code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}.
-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT}
-are optional.
+The other macros in this section are optional.
@end defmac
@defmac NUM_MODES_FOR_MODE_SWITCHING
If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as
initializer for an array of integers. Each initializer element
N refers to an entity that needs mode switching, and specifies the number
-of different modes that might need to be set for this entity.
-The position of the initializer in the initializer---starting counting at
+of different modes that are defined for that entity.
+The position of the element in the initializer---starting counting at
zero---determines the integer that is used to refer to the mode-switched
entity in question.
-In macros that take mode arguments / yield a mode result, modes are
-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode
-switch is needed / supplied.
+Modes are represented as numbers 0 @dots{} N @minus{} 1.
+In mode arguments and return values, N either represents an unknown
+mode or ``no mode'', depending on context.
@end defmac
@deftypefn {Target Hook} void TARGET_MODE_EMIT (int @var{entity}, int @var{mode}, int @var{prev_mode}, HARD_REG_SET @var{regs_live})
Generate one or more insns to set @var{entity} to @var{mode}.
@var{hard_reg_live} is the set of hard registers live at the point where
the insn(s) are to be inserted. @var{prev_moxde} indicates the mode
-to switch from. Sets of a lower numbered entity will be emitted before
+to switch from, or is the number of modes if the previous mode is not
+known. Sets of a lower numbered entity will be emitted before
sets of a higher numbered entity to a mode of the same or lower priority.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
@var{entity} is an integer specifying a mode-switched entity.
-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro
-to return an integer value not larger than the corresponding element
-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}
-must be switched into prior to the execution of @var{insn}.
+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
+to return the mode that @var{entity} must be switched into prior to the
+execution of @var{insn}, or the number of modes if @var{insn} has no
+such requirement.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
@var{entity} is an integer specifying a mode-switched entity.
-If this macro is defined, it is evaluated for every @var{insn} during mode
-switching. It determines the mode that an insn results
-in (if different from the incoming mode).
+If this hook is defined, it is evaluated for every @var{insn} during mode
+switching. It returns the mode that @var{entity} is in after @var{insn}
+has been executed. @var{mode} is the mode that @var{entity} was in
+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
+
+@var{mode} is equal to the number of modes defined for @var{entity}
+if the mode before @var{insn} is unknown. The hook should likewise return
+the number of modes if it does not know what mode @var{entity} has after
+@var{insn}.
+
+Not defining the hook is equivalent to returning @var{mode}.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
-If this macro is defined, it is evaluated for every @var{entity} that
-needs mode switching. It should evaluate to an integer, which is a mode
-that @var{entity} is assumed to be switched to at function entry.
+If this hook is defined, it is evaluated for every @var{entity} that
+needs mode switching. It should return the mode that @var{entity} is
+guaranteed to be in on entry to the function, or the number of modes
+if there is no such guarantee.
If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}
must be defined.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_EXIT (int @var{entity})
-If this macro is defined, it is evaluated for every @var{entity} that
-needs mode switching. It should evaluate to an integer, which is a mode
-that @var{entity} is assumed to be switched to at function exit.
+If this hook is defined, it is evaluated for every @var{entity} that
+needs mode switching. It should return the mode that @var{entity} must
+be in on return from the function, or the number of modes if there is no
+such requirement.
If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
must be defined.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
-This macro specifies the order in which modes for @var{entity}
+This hook specifies the order in which modes for @var{entity}
are processed. 0 is the highest priority,
@code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest.
-The value of the macro should be an integer designating a mode
+The hook returns an integer designating a mode
for @var{entity}. For any fixed @var{entity}, @code{mode_priority}
(@var{entity}, @var{n}) shall be a bijection in 0 @dots{}
@code{num_modes_for_mode_switching[@var{entity}] - 1}.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index ac95cdf7a..9ec11b15c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6879,7 +6879,7 @@ The following macros control mode switching optimizations:
@defmac OPTIMIZE_MODE_SWITCHING (@var{entity})
Define this macro if the port needs extra instructions inserted for mode
-switching in an optimizing compilation.
+switching.
For an example, the SH4 can perform both single and double precision
floating point operations, but to perform a single precision operation,
@@ -6889,27 +6889,31 @@ purpose register as a scratch register, hence these FPSCR sets have to
be inserted before reload, i.e.@: you cannot put this into instruction emitting
or @code{TARGET_MACHINE_DEPENDENT_REORG}.
-You can have multiple entities that are mode-switched, and select at run time
-which entities actually need it. @code{OPTIMIZE_MODE_SWITCHING} should
-return nonzero for any @var{entity} that needs mode-switching.
+You can have multiple entities that are mode-switched, some of which might
+only be needed conditionally. The entities are identified by their index
+into the @code{NUM_MODES_FOR_MODE_SWITCHING} initializer, with the length
+of the initializer determining the number of entities.
+
+@code{OPTIMIZE_MODE_SWITCHING} should return nonzero for any @var{entity}
+that needs mode-switching.
+
If you define this macro, you also have to define
@code{NUM_MODES_FOR_MODE_SWITCHING}, @code{TARGET_MODE_NEEDED},
@code{TARGET_MODE_PRIORITY} and @code{TARGET_MODE_EMIT}.
-@code{TARGET_MODE_AFTER}, @code{TARGET_MODE_ENTRY}, and @code{TARGET_MODE_EXIT}
-are optional.
+The other macros in this section are optional.
@end defmac
@defmac NUM_MODES_FOR_MODE_SWITCHING
If you define @code{OPTIMIZE_MODE_SWITCHING}, you have to define this as
initializer for an array of integers. Each initializer element
N refers to an entity that needs mode switching, and specifies the number
-of different modes that might need to be set for this entity.
-The position of the initializer in the initializer---starting counting at
+of different modes that are defined for that entity.
+The position of the element in the initializer---starting counting at
zero---determines the integer that is used to refer to the mode-switched
entity in question.
-In macros that take mode arguments / yield a mode result, modes are
-represented as numbers 0 @dots{} N @minus{} 1. N is used to specify that no mode
-switch is needed / supplied.
+Modes are represented as numbers 0 @dots{} N @minus{} 1.
+In mode arguments and return values, N either represents an unknown
+mode or ``no mode'', depending on context.
@end defmac
@hook TARGET_MODE_EMIT
diff --git a/gcc/target.def b/gcc/target.def
index c9bb2b4c2..b87b0f927 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -6992,51 +6992,62 @@ DEFHOOK
"Generate one or more insns to set @var{entity} to @var{mode}.\n\
@var{hard_reg_live} is the set of hard registers live at the point where\n\
the insn(s) are to be inserted. @var{prev_moxde} indicates the mode\n\
-to switch from. Sets of a lower numbered entity will be emitted before\n\
+to switch from, or is the number of modes if the previous mode is not\n\
+known. Sets of a lower numbered entity will be emitted before\n\
sets of a higher numbered entity to a mode of the same or lower priority.",
void, (int entity, int mode, int prev_mode, HARD_REG_SET regs_live), NULL)
DEFHOOK
(needed,
"@var{entity} is an integer specifying a mode-switched entity.\n\
-If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this macro\n\
-to return an integer value not larger than the corresponding element\n\
-in @code{NUM_MODES_FOR_MODE_SWITCHING}, to denote the mode that @var{entity}\n\
-must be switched into prior to the execution of @var{insn}.",
+If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\
+to return the mode that @var{entity} must be switched into prior to the\n\
+execution of @var{insn}, or the number of modes if @var{insn} has no\n\
+such requirement.",
int, (int entity, rtx_insn *insn), NULL)
DEFHOOK
(after,
"@var{entity} is an integer specifying a mode-switched entity.\n\
-If this macro is defined, it is evaluated for every @var{insn} during mode\n\
-switching. It determines the mode that an insn results\n\
-in (if different from the incoming mode).",
+If this hook is defined, it is evaluated for every @var{insn} during mode\n\
+switching. It returns the mode that @var{entity} is in after @var{insn}\n\
+has been executed. @var{mode} is the mode that @var{entity} was in\n\
+before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\
+\n\
+@var{mode} is equal to the number of modes defined for @var{entity}\n\
+if the mode before @var{insn} is unknown. The hook should likewise return\n\
+the number of modes if it does not know what mode @var{entity} has after\n\
+@var{insn}.\n\
+\n\
+Not defining the hook is equivalent to returning @var{mode}.",
int, (int entity, int mode, rtx_insn *insn), NULL)
DEFHOOK
(entry,
- "If this macro is defined, it is evaluated for every @var{entity} that\n\
-needs mode switching. It should evaluate to an integer, which is a mode\n\
-that @var{entity} is assumed to be switched to at function entry.\n\
+ "If this hook is defined, it is evaluated for every @var{entity} that\n\
+needs mode switching. It should return the mode that @var{entity} is\n\
+guaranteed to be in on entry to the function, or the number of modes\n\
+if there is no such guarantee.\n\
If @code{TARGET_MODE_ENTRY} is defined then @code{TARGET_MODE_EXIT}\n\
must be defined.",
int, (int entity), NULL)
DEFHOOK
(exit,
- "If this macro is defined, it is evaluated for every @var{entity} that\n\
-needs mode switching. It should evaluate to an integer, which is a mode\n\
-that @var{entity} is assumed to be switched to at function exit.\n\
+ "If this hook is defined, it is evaluated for every @var{entity} that\n\
+needs mode switching. It should return the mode that @var{entity} must\n\
+be in on return from the function, or the number of modes if there is no\n\
+such requirement.\n\
If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\
must be defined.",
int, (int entity), NULL)
DEFHOOK
(priority,
- "This macro specifies the order in which modes for @var{entity}\n\
+ "This hook specifies the order in which modes for @var{entity}\n\
are processed. 0 is the highest priority,\n\
@code{NUM_MODES_FOR_MODE_SWITCHING[@var{entity}] - 1} the lowest.\n\
-The value of the macro should be an integer designating a mode\n\
+The hook returns an integer designating a mode\n\
for @var{entity}. For any fixed @var{entity}, @code{mode_priority}\n\
(@var{entity}, @var{n}) shall be a bijection in 0 @dots{}\n\
@code{num_modes_for_mode_switching[@var{entity}] - 1}.",
--
2.33.0

View File

@ -0,0 +1,35 @@
From 7ab54a765239bdd2ce548cffdd5b83f9c20f69da Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:55 +0000
Subject: [PATCH 034/157] [Backport][SME] mode-switching: Add note problem
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3cd3a09b3f91a1d023cb180763d40598d6bb274b
optimize_mode_switching uses REG_DEAD notes to track register
liveness, but it failed to tell DF to calculate up-to-date notes.
Noticed by inspection. I don't have a testcase that fails
because of this.
gcc/
* mode-switching.cc (optimize_mode_switching): Call
df_note_add_problem.
---
gcc/mode-switching.cc | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 4cf8f03a0..2a9f98793 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -540,6 +540,7 @@ optimize_mode_switching (void)
pre_exit = create_pre_exit (n_entities, entity_map, num_modes);
}
+ df_note_add_problem ();
df_analyze ();
/* Create the bitmap vectors. */
--
2.33.0

View File

@ -0,0 +1,90 @@
From a2a8b560c1749293d3b6d027e20753a7ea042c80 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:55 +0000
Subject: [PATCH 035/157] [Backport][SME] mode-switching: Avoid quadractic list
operation
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=174ee5115a3004d3664165e9d619535b579111d4
add_seginfo chained insn information to the end of a list
by starting at the head of the list. This patch avoids the
quadraticness by keeping track of the tail pointer.
gcc/
* mode-switching.cc (add_seginfo): Replace head pointer with
a pointer to the tail pointer.
(optimize_mode_switching): Update calls accordingly.
---
gcc/mode-switching.cc | 24 ++++++++----------------
1 file changed, 8 insertions(+), 16 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 2a9f98793..6a13951c9 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -162,23 +162,14 @@ new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
}
/* Add a seginfo element to the end of a list.
- HEAD is a pointer to the list beginning.
+ TAIL is a pointer to the list's null terminator.
INFO is the structure to be linked in. */
static void
-add_seginfo (struct bb_info *head, struct seginfo *info)
+add_seginfo (struct seginfo ***tail_ptr, struct seginfo *info)
{
- struct seginfo *ptr;
-
- if (head->seginfo == NULL)
- head->seginfo = info;
- else
- {
- ptr = head->seginfo;
- while (ptr->next != NULL)
- ptr = ptr->next;
- ptr->next = info;
- }
+ **tail_ptr = info;
+ *tail_ptr = &info->next;
}
/* Record in LIVE that register REG died. */
@@ -573,6 +564,7 @@ optimize_mode_switching (void)
Also compute the initial transparency settings. */
FOR_EACH_BB_FN (bb, cfun)
{
+ struct seginfo **tail_ptr = &info[bb->index].seginfo;
struct seginfo *ptr;
int last_mode = no_mode;
bool any_set_required = false;
@@ -598,7 +590,7 @@ optimize_mode_switching (void)
if (ins_pos != BB_END (bb))
ins_pos = NEXT_INSN (ins_pos);
ptr = new_seginfo (no_mode, ins_pos, live_now);
- add_seginfo (info + bb->index, ptr);
+ add_seginfo (&tail_ptr, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
}
@@ -616,7 +608,7 @@ optimize_mode_switching (void)
any_set_required = true;
last_mode = mode;
ptr = new_seginfo (mode, insn, live_now);
- add_seginfo (info + bb->index, ptr);
+ add_seginfo (&tail_ptr, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
}
@@ -645,7 +637,7 @@ optimize_mode_switching (void)
if (!any_set_required)
{
ptr = new_seginfo (no_mode, BB_END (bb), live_now);
- add_seginfo (info + bb->index, ptr);
+ add_seginfo (&tail_ptr, ptr);
if (last_mode != no_mode)
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
--
2.33.0

View File

@ -0,0 +1,136 @@
From 194700063ed04b56d84912f7ace1b8370af6c696 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:56 +0000
Subject: [PATCH 036/157] [Backport][SME] mode-switching: Fix the mode passed
to the emit hook
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5afd208beaef50bcc43b556d4c41d41656b06436
optimize_mode_switching passes an entity's current mode (if known)
to the emit hook. However, the mode that it passed ignored the
effect of the after hook. Instead, the mode for the first emit
call in a block was taken from the incoming mode, whereas the
mode for each subsequent emit call was taken from the result
of the previous call.
The previous pass through the insns already calculated the
correct mode, so this patch records it in the seginfo structure.
(There was a 32-bit hole on 64-bit hosts, so this doesn't increase
the size of the structure for them.)
gcc/
* mode-switching.cc (seginfo): Add a prev_mode field.
(new_seginfo): Take and initialize the prev_mode.
(optimize_mode_switching): Update calls accordingly.
Use the recorded modes during the emit phase, rather than
computing one on the fly.
---
gcc/mode-switching.cc | 30 +++++++++++++++++-------------
1 file changed, 17 insertions(+), 13 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 6a13951c9..584cd4f67 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see
NEXT is the next insn in the same basic block. */
struct seginfo
{
+ int prev_mode;
int mode;
rtx_insn *insn_ptr;
struct seginfo *next;
@@ -140,20 +141,22 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
return need_commit;
}
-/* Allocate a new BBINFO structure, initialized with the MODE, INSN,
- and REGS_LIVE parameters.
+/* Allocate a new BBINFO structure, initialized with the PREV_MODE, MODE,
+ INSN, and REGS_LIVE parameters.
INSN may not be a NOTE_INSN_BASIC_BLOCK, unless it is an empty
basic block; that allows us later to insert instructions in a FIFO-like
manner. */
static struct seginfo *
-new_seginfo (int mode, rtx_insn *insn, const HARD_REG_SET &regs_live)
+new_seginfo (int prev_mode, int mode, rtx_insn *insn,
+ const HARD_REG_SET &regs_live)
{
struct seginfo *ptr;
gcc_assert (!NOTE_INSN_BASIC_BLOCK_P (insn)
|| insn == BB_END (NOTE_BASIC_BLOCK (insn)));
ptr = XNEW (struct seginfo);
+ ptr->prev_mode = prev_mode;
ptr->mode = mode;
ptr->insn_ptr = insn;
ptr->next = NULL;
@@ -589,7 +592,7 @@ optimize_mode_switching (void)
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
if (ins_pos != BB_END (bb))
ins_pos = NEXT_INSN (ins_pos);
- ptr = new_seginfo (no_mode, ins_pos, live_now);
+ ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
add_seginfo (&tail_ptr, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
@@ -605,12 +608,12 @@ optimize_mode_switching (void)
if (mode != no_mode && mode != last_mode)
{
- any_set_required = true;
- last_mode = mode;
- ptr = new_seginfo (mode, insn, live_now);
+ ptr = new_seginfo (last_mode, mode, insn, live_now);
add_seginfo (&tail_ptr, ptr);
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
+ any_set_required = true;
+ last_mode = mode;
}
if (targetm.mode_switching.after)
@@ -636,7 +639,7 @@ optimize_mode_switching (void)
mark the block as nontransparent. */
if (!any_set_required)
{
- ptr = new_seginfo (no_mode, BB_END (bb), live_now);
+ ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
add_seginfo (&tail_ptr, ptr);
if (last_mode != no_mode)
for (i = 0; i < no_mode; i++)
@@ -777,9 +780,9 @@ optimize_mode_switching (void)
FOR_EACH_BB_FN (bb, cfun)
{
struct seginfo *ptr, *next;
- int cur_mode = bb_info[j][bb->index].mode_in;
+ struct seginfo *first = bb_info[j][bb->index].seginfo;
- for (ptr = bb_info[j][bb->index].seginfo; ptr; ptr = next)
+ for (ptr = first; ptr; ptr = next)
{
next = ptr->next;
if (ptr->mode != no_mode)
@@ -789,14 +792,15 @@ optimize_mode_switching (void)
rtl_profile_for_bb (bb);
start_sequence ();
+ int cur_mode = (ptr == first && ptr->prev_mode == no_mode
+ ? bb_info[j][bb->index].mode_in
+ : ptr->prev_mode);
+
targetm.mode_switching.emit (entity_map[j], ptr->mode,
cur_mode, ptr->regs_live);
mode_set = get_insns ();
end_sequence ();
- /* modes kill each other inside a basic block. */
- cur_mode = ptr->mode;
-
/* Insert MODE_SET only if it is nonempty. */
if (mode_set != NULL_RTX)
{
--
2.33.0

View File

@ -0,0 +1,103 @@
From ac51d446ee605e942b0831d3ff617980d94bf502 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:56 +0000
Subject: [PATCH 037/157] [Backport][SME] mode-switching: Simplify recording of
transparency
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=335b55f4146c5ef9e3bf4bcb7e58e887c3150b02
For a given block, an entity is either transparent for
all modes or for none. Each update to the transparency set
therefore used a loop like:
for (i = 0; i < no_mode; i++)
clear_mode_bit (transp[bb->index], j, i);
This patch instead starts out with a bit-per-block bitmap
and updates the main bitmap at the end.
This isn't much of a simplification on its own. The main
purpose is to simplify later patches.
gcc/
* mode-switching.cc (optimize_mode_switching): Initially
compute transparency in a bit-per-block bitmap.
---
gcc/mode-switching.cc | 19 +++++++++++--------
1 file changed, 11 insertions(+), 8 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 584cd4f67..4d2b9e284 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -555,6 +555,8 @@ optimize_mode_switching (void)
bitmap_vector_clear (antic, last_basic_block_for_fn (cfun));
bitmap_vector_clear (comp, last_basic_block_for_fn (cfun));
+ auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
+
for (j = n_entities - 1; j >= 0; j--)
{
int e = entity_map[j];
@@ -562,6 +564,8 @@ optimize_mode_switching (void)
struct bb_info *info = bb_info[j];
rtx_insn *insn;
+ bitmap_ones (transp_all);
+
/* Determine what the first use (if any) need for a mode of entity E is.
This will be the mode that is anticipatable for this block.
Also compute the initial transparency settings. */
@@ -594,8 +598,7 @@ optimize_mode_switching (void)
ins_pos = NEXT_INSN (ins_pos);
ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
add_seginfo (&tail_ptr, ptr);
- for (i = 0; i < no_mode; i++)
- clear_mode_bit (transp[bb->index], j, i);
+ bitmap_clear_bit (transp_all, bb->index);
}
}
@@ -610,8 +613,7 @@ optimize_mode_switching (void)
{
ptr = new_seginfo (last_mode, mode, insn, live_now);
add_seginfo (&tail_ptr, ptr);
- for (i = 0; i < no_mode; i++)
- clear_mode_bit (transp[bb->index], j, i);
+ bitmap_clear_bit (transp_all, bb->index);
any_set_required = true;
last_mode = mode;
}
@@ -642,8 +644,7 @@ optimize_mode_switching (void)
ptr = new_seginfo (last_mode, no_mode, BB_END (bb), live_now);
add_seginfo (&tail_ptr, ptr);
if (last_mode != no_mode)
- for (i = 0; i < no_mode; i++)
- clear_mode_bit (transp[bb->index], j, i);
+ bitmap_clear_bit (transp_all, bb->index);
}
}
if (targetm.mode_switching.entry && targetm.mode_switching.exit)
@@ -666,8 +667,7 @@ optimize_mode_switching (void)
an extra check in make_preds_opaque. We also
need this to avoid confusing pre_edge_lcm when
antic is cleared but transp and comp are set. */
- for (i = 0; i < no_mode; i++)
- clear_mode_bit (transp[bb->index], j, i);
+ bitmap_clear_bit (transp_all, bb->index);
/* Insert a fake computing definition of MODE into entry
blocks which compute no mode. This represents the mode on
@@ -687,6 +687,9 @@ optimize_mode_switching (void)
FOR_EACH_BB_FN (bb, cfun)
{
+ if (!bitmap_bit_p (transp_all, bb->index))
+ clear_mode_bit (transp[bb->index], j, m);
+
if (info[bb->index].seginfo->mode == m)
set_mode_bit (antic[bb->index], j, m);
--
2.33.0

View File

@ -0,0 +1,92 @@
From c0aaf329d9c547b249ac120a8d1995d8546a1edb Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:57 +0000
Subject: [PATCH 038/157] [Backport][SME] mode-switching: Tweak entry/exit
handling
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e59ec35276599805cdc6c3979d8a167b027d286e
An entity isn't transparent in a block that requires a specific mode.
optimize_mode_switching took that into account for normal insns,
but didn't for the exit block. Later patches misbehaved because
of this.
In contrast, an entity was correctly marked as non-transparent
in the entry block, but the reasoning seemed a bit convoluted.
It also referred to a function that no longer exists.
Since KILL = ~TRANSP, the entity is by definition not transparent
in a block that defines the entity, so I think we can make it so
without comment.
Finally, the exit handling was nested in the entry handling,
but that doesn't seem necessary. A target could say that an
entity is undefined on entry but must be defined on return,
on a "be liberal in what you accept, be conservative in what
you do" principle.
gcc/
* mode-switching.cc (optimize_mode_switching): Mark the exit
block as nontransparent if it requires a specific mode.
Handle the entry and exit mode as sibling rather than nested
concepts. Remove outdated comment.
---
gcc/mode-switching.cc | 34 +++++++++++++++-------------------
1 file changed, 15 insertions(+), 19 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 4d2b9e284..4761c2ff0 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -649,34 +649,30 @@ optimize_mode_switching (void)
}
if (targetm.mode_switching.entry && targetm.mode_switching.exit)
{
- int mode = targetm.mode_switching.entry (e);
-
info[post_entry->index].mode_out =
info[post_entry->index].mode_in = no_mode;
- if (pre_exit)
- {
- info[pre_exit->index].mode_out =
- info[pre_exit->index].mode_in = no_mode;
- }
+ int mode = targetm.mode_switching.entry (e);
if (mode != no_mode)
{
- bb = post_entry;
-
- /* By always making this nontransparent, we save
- an extra check in make_preds_opaque. We also
- need this to avoid confusing pre_edge_lcm when
- antic is cleared but transp and comp are set. */
- bitmap_clear_bit (transp_all, bb->index);
-
/* Insert a fake computing definition of MODE into entry
blocks which compute no mode. This represents the mode on
entry. */
- info[bb->index].computing = mode;
+ info[post_entry->index].computing = mode;
+ bitmap_clear_bit (transp_all, post_entry->index);
+ }
- if (pre_exit)
- info[pre_exit->index].seginfo->mode =
- targetm.mode_switching.exit (e);
+ if (pre_exit)
+ {
+ info[pre_exit->index].mode_out =
+ info[pre_exit->index].mode_in = no_mode;
+
+ int mode = targetm.mode_switching.exit (e);
+ if (mode != no_mode)
+ {
+ info[pre_exit->index].seginfo->mode = mode;
+ bitmap_clear_bit (transp_all, pre_exit->index);
+ }
}
}
--
2.33.0

View File

@ -0,0 +1,93 @@
From 9505464aec8f95125293c64e2eea9577e9be4700 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:57 +0000
Subject: [PATCH 039/157] [Backport][SME] mode-switching: Allow targets to set
the mode for EH handlers
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=4b803fbf839439b1deca660e32d5ced211111dfa
The mode-switching pass already had hooks to say what mode
an entity is in on entry to a function and what mode it must
be in on return. For SME, we also want to say what mode an
entity is guaranteed to be in on entry to an exception handler.
gcc/
* target.def (mode_switching.eh_handler): New hook.
* doc/tm.texi.in (TARGET_MODE_EH_HANDLER): New @hook.
* doc/tm.texi: Regenerate.
* mode-switching.cc (optimize_mode_switching): Use eh_handler
to get the mode on entry to an exception handler.
---
gcc/doc/tm.texi | 6 ++++++
gcc/doc/tm.texi.in | 2 ++
gcc/mode-switching.cc | 5 ++++-
gcc/target.def | 7 +++++++
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 553aa4cf2..4788b3f7a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10321,6 +10321,12 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}
must be defined.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_MODE_EH_HANDLER (int @var{entity})
+If this hook is defined, it should return the mode that @var{entity} is
+guaranteed to be in on entry to an exception handler, or the number of modes
+if there is no such guarantee.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_MODE_PRIORITY (int @var{entity}, int @var{n})
This hook specifies the order in which modes for @var{entity}
are processed. 0 is the highest priority,
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 9ec11b15c..ad343504f 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6926,6 +6926,8 @@ mode or ``no mode'', depending on context.
@hook TARGET_MODE_EXIT
+@hook TARGET_MODE_EH_HANDLER
+
@hook TARGET_MODE_PRIORITY
@node Target Attributes
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 4761c2ff0..9a6ba6cca 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -596,7 +596,10 @@ optimize_mode_switching (void)
gcc_assert (NOTE_INSN_BASIC_BLOCK_P (ins_pos));
if (ins_pos != BB_END (bb))
ins_pos = NEXT_INSN (ins_pos);
- ptr = new_seginfo (no_mode, no_mode, ins_pos, live_now);
+ if (bb_has_eh_pred (bb)
+ && targetm.mode_switching.eh_handler)
+ last_mode = targetm.mode_switching.eh_handler (e);
+ ptr = new_seginfo (no_mode, last_mode, ins_pos, live_now);
add_seginfo (&tail_ptr, ptr);
bitmap_clear_bit (transp_all, bb->index);
}
diff --git a/gcc/target.def b/gcc/target.def
index b87b0f927..bbb482de6 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7042,6 +7042,13 @@ If @code{TARGET_MODE_EXIT} is defined then @code{TARGET_MODE_ENTRY}\n\
must be defined.",
int, (int entity), NULL)
+DEFHOOK
+(eh_handler,
+ "If this hook is defined, it should return the mode that @var{entity} is\n\
+guaranteed to be in on entry to an exception handler, or the number of modes\n\
+if there is no such guarantee.",
+ int, (int entity), NULL)
+
DEFHOOK
(priority,
"This hook specifies the order in which modes for @var{entity}\n\
--
2.33.0

View File

@ -0,0 +1,211 @@
From a6964e11c7f624cdaed2c9608565a5968292b70f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:58 +0000
Subject: [PATCH 040/157] [Backport][SME] mode-switching: Pass set of live
registers to the needed hook
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=29d3e1892ebec8abce784077d1f1a3e21d763218
The emit hook already takes the set of live hard registers as input.
This patch passes it to the needed hook too. SME uses this to
optimise the mode choice based on whether state is live or dead.
The main caller already had access to the required info, but the
special handling of return values did not.
gcc/
* target.def (mode_switching.needed): Add a regs_live parameter.
* doc/tm.texi: Regenerate.
* config/epiphany/epiphany-protos.h (epiphany_mode_needed): Update
accordingly.
* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
* config/epiphany/mode-switch-use.cc (insert_uses): Likewise.
* config/i386/i386.cc (ix86_mode_needed): Likewise.
* config/riscv/riscv.cc (riscv_mode_needed): Likewise.
* config/sh/sh.cc (sh_mode_needed): Likewise.
* mode-switching.cc (optimize_mode_switching): Likewise.
(create_pre_exit): Likewise, using the DF simulate functions
to calculate the required information.
---
gcc/config/epiphany/epiphany-protos.h | 4 +++-
gcc/config/epiphany/epiphany.cc | 2 +-
gcc/config/epiphany/mode-switch-use.cc | 2 +-
gcc/config/i386/i386.cc | 2 +-
gcc/config/sh/sh.cc | 4 ++--
gcc/doc/tm.texi | 5 +++--
gcc/mode-switching.cc | 14 ++++++++++++--
gcc/target.def | 5 +++--
8 files changed, 26 insertions(+), 12 deletions(-)
diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
index 61b63234e..d463e5483 100644
--- a/gcc/config/epiphany/epiphany-protos.h
+++ b/gcc/config/epiphany/epiphany-protos.h
@@ -44,7 +44,9 @@ extern void emit_set_fp_mode (int entity, int mode, int prev_mode,
#endif
extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
extern void epiphany_expand_set_fp_mode (rtx *operands);
-extern int epiphany_mode_needed (int entity, rtx_insn *insn);
+#ifdef HARD_CONST
+extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
+#endif
extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
extern bool epiphany_epilogue_uses (int regno);
extern bool epiphany_optimize_mode_switching (int entity);
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
index f8c049340..be0fbc68c 100644
--- a/gcc/config/epiphany/epiphany.cc
+++ b/gcc/config/epiphany/epiphany.cc
@@ -2400,7 +2400,7 @@ epiphany_mode_priority (int entity, int priority)
}
int
-epiphany_mode_needed (int entity, rtx_insn *insn)
+epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
{
enum attr_fp_mode mode;
diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc
index 887550a33..cacb1ce5a 100644
--- a/gcc/config/epiphany/mode-switch-use.cc
+++ b/gcc/config/epiphany/mode-switch-use.cc
@@ -58,7 +58,7 @@ insert_uses (void)
{
if (!INSN_P (insn))
continue;
- mode = epiphany_mode_needed (e, insn);
+ mode = epiphany_mode_needed (e, insn, {});
if (mode == no_mode)
continue;
if (target_insert_mode_switch_use)
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 60f3296b0..4d591d217 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14522,7 +14522,7 @@ ix86_i387_mode_needed (int entity, rtx_insn *insn)
prior to the execution of insn. */
static int
-ix86_mode_needed (int entity, rtx_insn *insn)
+ix86_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
{
switch (entity)
{
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 03e1c04ec..85e83e12e 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -195,7 +195,7 @@ static int calc_live_regs (HARD_REG_SET *);
static HOST_WIDE_INT rounded_frame_size (int);
static bool sh_frame_pointer_required (void);
static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
-static int sh_mode_needed (int, rtx_insn *);
+static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
static int sh_mode_after (int, int, rtx_insn *);
static int sh_mode_entry (int);
static int sh_mode_exit (int);
@@ -12529,7 +12529,7 @@ sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
}
static int
-sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
+sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
{
return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
}
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 4788b3f7a..d8ac6c4d6 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10280,12 +10280,13 @@ known. Sets of a lower numbered entity will be emitted before
sets of a higher numbered entity to a mode of the same or lower priority.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn})
+@deftypefn {Target Hook} int TARGET_MODE_NEEDED (int @var{entity}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
@var{entity} is an integer specifying a mode-switched entity.
If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook
to return the mode that @var{entity} must be switched into prior to the
execution of @var{insn}, or the number of modes if @var{insn} has no
-such requirement.
+such requirement. @var{regs_live} contains the set of hard registers
+that are live before @var{insn}.
@end deftypefn
@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 9a6ba6cca..6bbda5058 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -254,6 +254,9 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
&& GET_CODE (PATTERN (last_insn)) == USE
&& GET_CODE ((ret_reg = XEXP (PATTERN (last_insn), 0))) == REG)
{
+ auto_bitmap live;
+ df_simulate_initialize_backwards (src_bb, live);
+
int ret_start = REGNO (ret_reg);
int nregs = REG_NREGS (ret_reg);
int ret_end = ret_start + nregs;
@@ -262,6 +265,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
bool forced_late_switch = false;
rtx_insn *before_return_copy;
+ df_simulate_one_insn_backwards (src_bb, last_insn, live);
+
do
{
rtx_insn *return_copy = PREV_INSN (last_insn);
@@ -269,6 +274,8 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
int copy_start, copy_num;
int j;
+ df_simulate_one_insn_backwards (src_bb, return_copy, live);
+
if (NONDEBUG_INSN_P (return_copy))
{
/* When using SJLJ exceptions, the call to the
@@ -368,11 +375,14 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
the case for floating point on SH4 - then it might
be set by an arithmetic operation that needs a
different mode than the exit block. */
+ HARD_REG_SET hard_regs_live;
+ REG_SET_TO_HARD_REG_SET (hard_regs_live, live);
for (j = n_entities - 1; j >= 0; j--)
{
int e = entity_map[j];
int mode =
- targetm.mode_switching.needed (e, return_copy);
+ targetm.mode_switching.needed (e, return_copy,
+ hard_regs_live);
if (mode != num_modes[e]
&& mode != targetm.mode_switching.exit (e))
@@ -609,7 +619,7 @@ optimize_mode_switching (void)
{
if (INSN_P (insn))
{
- int mode = targetm.mode_switching.needed (e, insn);
+ int mode = targetm.mode_switching.needed (e, insn, live_now);
rtx link;
if (mode != no_mode && mode != last_mode)
diff --git a/gcc/target.def b/gcc/target.def
index bbb482de6..06a52bdaf 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7003,8 +7003,9 @@ DEFHOOK
If @code{OPTIMIZE_MODE_SWITCHING} is defined, you must define this hook\n\
to return the mode that @var{entity} must be switched into prior to the\n\
execution of @var{insn}, or the number of modes if @var{insn} has no\n\
-such requirement.",
- int, (int entity, rtx_insn *insn), NULL)
+such requirement. @var{regs_live} contains the set of hard registers\n\
+that are live before @var{insn}.",
+ int, (int entity, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
DEFHOOK
(after,
--
2.33.0

View File

@ -0,0 +1,177 @@
From 4457604c11c0a32f3736d73429d1e5fb7baae3a5 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:58 +0000
Subject: [PATCH 041/157] [Backport][SME] mode-switching: Pass the set of live
registers to the after hook
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=93d65f39bc5c3dc318deb6da0e3633f3a4c6c34d
This patch passes the set of live hard registers to the after hook,
like the previous one did for the needed hook.
gcc/
* target.def (mode_switching.after): Add a regs_live parameter.
* doc/tm.texi: Regenerate.
* config/epiphany/epiphany-protos.h (epiphany_mode_after): Update
accordingly.
* config/epiphany/epiphany.cc (epiphany_mode_needed): Likewise.
(epiphany_mode_after): Likewise.
* config/i386/i386.cc (ix86_mode_after): Likewise.
* config/riscv/riscv.cc (riscv_mode_after): Likewise.
* config/sh/sh.cc (sh_mode_after): Likewise.
* mode-switching.cc (optimize_mode_switching): Likewise.
---
gcc/config/epiphany/epiphany-protos.h | 3 ++-
gcc/config/epiphany/epiphany.cc | 5 +++--
gcc/config/i386/i386.cc | 2 +-
gcc/config/sh/sh.cc | 5 +++--
gcc/doc/tm.texi | 4 +++-
gcc/mode-switching.cc | 8 ++++----
gcc/target.def | 4 +++-
7 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
index d463e5483..6326b7e80 100644
--- a/gcc/config/epiphany/epiphany-protos.h
+++ b/gcc/config/epiphany/epiphany-protos.h
@@ -46,8 +46,9 @@ extern void epiphany_insert_mode_switch_use (rtx_insn *insn, int, int);
extern void epiphany_expand_set_fp_mode (rtx *operands);
#ifdef HARD_CONST
extern int epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET);
+extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
+ HARD_REG_SET);
#endif
-extern int epiphany_mode_after (int entity, int last_mode, rtx_insn *insn);
extern bool epiphany_epilogue_uses (int regno);
extern bool epiphany_optimize_mode_switching (int entity);
extern bool epiphany_is_interrupt_p (tree);
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
index be0fbc68c..62636b1ec 100644
--- a/gcc/config/epiphany/epiphany.cc
+++ b/gcc/config/epiphany/epiphany.cc
@@ -2437,7 +2437,7 @@ epiphany_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
return 2;
case EPIPHANY_MSW_ENTITY_ROUND_KNOWN:
if (recog_memoized (insn) == CODE_FOR_set_fp_mode)
- mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn);
+ mode = (enum attr_fp_mode) epiphany_mode_after (entity, mode, insn, {});
/* Fall through. */
case EPIPHANY_MSW_ENTITY_NEAREST:
case EPIPHANY_MSW_ENTITY_TRUNC:
@@ -2498,7 +2498,8 @@ epiphany_mode_entry_exit (int entity, bool exit)
}
int
-epiphany_mode_after (int entity, int last_mode, rtx_insn *insn)
+epiphany_mode_after (int entity, int last_mode, rtx_insn *insn,
+ HARD_REG_SET)
{
/* We have too few call-saved registers to hope to keep the masks across
calls. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4d591d217..593185fa6 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -14583,7 +14583,7 @@ ix86_avx_u128_mode_after (int mode, rtx_insn *insn)
/* Return the mode that an insn results in. */
static int
-ix86_mode_after (int entity, int mode, rtx_insn *insn)
+ix86_mode_after (int entity, int mode, rtx_insn *insn, HARD_REG_SET)
{
switch (entity)
{
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 85e83e12e..74d61c43b 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -196,7 +196,7 @@ static HOST_WIDE_INT rounded_frame_size (int);
static bool sh_frame_pointer_required (void);
static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
static int sh_mode_needed (int, rtx_insn *, HARD_REG_SET);
-static int sh_mode_after (int, int, rtx_insn *);
+static int sh_mode_after (int, int, rtx_insn *, HARD_REG_SET);
static int sh_mode_entry (int);
static int sh_mode_exit (int);
static int sh_mode_priority (int entity, int n);
@@ -12535,7 +12535,8 @@ sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn, HARD_REG_SET)
}
static int
-sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
+sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn,
+ HARD_REG_SET)
{
if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
get_attr_fp_set (insn) != FP_SET_NONE)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index d8ac6c4d6..7fce485b2 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10289,12 +10289,14 @@ such requirement. @var{regs_live} contains the set of hard registers
that are live before @var{insn}.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn})
+@deftypefn {Target Hook} int TARGET_MODE_AFTER (int @var{entity}, int @var{mode}, rtx_insn *@var{insn}, HARD_REG_SET @var{regs_live})
@var{entity} is an integer specifying a mode-switched entity.
If this hook is defined, it is evaluated for every @var{insn} during mode
switching. It returns the mode that @var{entity} is in after @var{insn}
has been executed. @var{mode} is the mode that @var{entity} was in
before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.
+@var{regs_live} is the set of hard registers that are live after @var{insn}
+has been executed.
@var{mode} is equal to the number of modes defined for @var{entity}
if the mode before @var{insn} is unknown. The hook should likewise return
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 6bbda5058..4f0445894 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -631,10 +631,6 @@ optimize_mode_switching (void)
last_mode = mode;
}
- if (targetm.mode_switching.after)
- last_mode = targetm.mode_switching.after (e, last_mode,
- insn);
-
/* Update LIVE_NOW. */
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
if (REG_NOTE_KIND (link) == REG_DEAD)
@@ -644,6 +640,10 @@ optimize_mode_switching (void)
for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
if (REG_NOTE_KIND (link) == REG_UNUSED)
reg_dies (XEXP (link, 0), &live_now);
+
+ if (targetm.mode_switching.after)
+ last_mode = targetm.mode_switching.after (e, last_mode,
+ insn, live_now);
}
}
diff --git a/gcc/target.def b/gcc/target.def
index 06a52bdaf..67c20bbb0 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7014,6 +7014,8 @@ If this hook is defined, it is evaluated for every @var{insn} during mode\n\
switching. It returns the mode that @var{entity} is in after @var{insn}\n\
has been executed. @var{mode} is the mode that @var{entity} was in\n\
before @var{insn} was executed, taking account of @var{TARGET_MODE_NEEDED}.\n\
+@var{regs_live} is the set of hard registers that are live after @var{insn}\n\
+has been executed.\n\
\n\
@var{mode} is equal to the number of modes defined for @var{entity}\n\
if the mode before @var{insn} is unknown. The hook should likewise return\n\
@@ -7021,7 +7023,7 @@ the number of modes if it does not know what mode @var{entity} has after\n\
@var{insn}.\n\
\n\
Not defining the hook is equivalent to returning @var{mode}.",
- int, (int entity, int mode, rtx_insn *insn), NULL)
+ int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
DEFHOOK
(entry,
--
2.33.0

View File

@ -0,0 +1,56 @@
From b0d3536b2a28d3a7084e3bbb9532e719aaf2016b Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:59 +0000
Subject: [PATCH 042/157] [Backport][SME] mode-switching: Use 1-based edge aux
fields
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=718228a6f479fe252e0e6f71933c2465b7b480a9
The pass used the edge aux field to record which mode change
should happen on the edge, with -1 meaning "none". It's more
convenient for later patches to leave aux zero for "none",
and use numbers based at 1 to record a change.
gcc/
* mode-switching.cc (commit_mode_sets): Use 1-based edge aux values.
---
gcc/mode-switching.cc | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 4f0445894..89a8494c6 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -106,10 +106,10 @@ commit_mode_sets (struct edge_list *edge_list, int e, struct bb_info *info)
for (int ed = NUM_EDGES (edge_list) - 1; ed >= 0; ed--)
{
edge eg = INDEX_EDGE (edge_list, ed);
- int mode;
- if ((mode = (int)(intptr_t)(eg->aux)) != -1)
+ if (eg->aux)
{
+ int mode = (int) (intptr_t) eg->aux - 1;
HARD_REG_SET live_at_edge;
basic_block src_bb = eg->src;
int cur_mode = info[src_bb->index].mode_out;
@@ -727,14 +727,14 @@ optimize_mode_switching (void)
{
edge eg = INDEX_EDGE (edge_list, ed);
- eg->aux = (void *)(intptr_t)-1;
+ eg->aux = (void *) (intptr_t) 0;
for (i = 0; i < no_mode; i++)
{
int m = targetm.mode_switching.priority (entity_map[j], i);
if (mode_bit_p (insert[ed], j, m))
{
- eg->aux = (void *)(intptr_t)m;
+ eg->aux = (void *) (intptr_t) (m + 1);
break;
}
}
--
2.33.0

View File

@ -0,0 +1,337 @@
From 88d76baa38bb29d5cc732b3c0188b74ef9783713 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:28:59 +0000
Subject: [PATCH 043/157] [Backport][SME] mode-switching: Add a
target-configurable confluence operator
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=493b0038d7d04986c7de977074d095e4eb7d9a27
The mode-switching pass assumed that all of an entity's modes
were mutually exclusive. However, the upcoming SME changes
have an entity with some overlapping modes, so that there is
sometimes a "superunion" mode that contains two given modes.
We can use this relationship to pass something more helpful than
"don't know" to the emit hook.
This patch adds a new hook that targets can use to specify
a mode confluence operator.
With mutually exclusive modes, it's possible to compute a block's
incoming and outgoing modes by looking at its availability sets.
With the confluence operator, we instead need to solve a full
dataflow problem.
However, when emitting a mode transition, the upcoming SME use of
mode-switching benefits from having as much information as possible
about the starting mode. Calculating this information is definitely
worth the compile time.
The dataflow problem is written to work before and after the LCM
problem has been solved. A later patch makes use of this.
While there (since git blame would ping me for the reindented code),
I used a lambda to avoid the cut-&-pasted loops.
gcc/
* target.def (mode_switching.confluence): New hook.
* doc/tm.texi (TARGET_MODE_CONFLUENCE): New @hook.
* doc/tm.texi.in: Regenerate.
* mode-switching.cc (confluence_info): New variable.
(mode_confluence, forward_confluence_n, forward_transfer): New
functions.
(optimize_mode_switching): Use them to calculate mode_in when
TARGET_MODE_CONFLUENCE is defined.
---
gcc/doc/tm.texi | 16 ++++
gcc/doc/tm.texi.in | 2 +
gcc/mode-switching.cc | 179 +++++++++++++++++++++++++++++++++++-------
gcc/target.def | 17 ++++
4 files changed, 186 insertions(+), 28 deletions(-)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 7fce485b2..d7053ec9e 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10306,6 +10306,22 @@ the number of modes if it does not know what mode @var{entity} has after
Not defining the hook is equivalent to returning @var{mode}.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_MODE_CONFLUENCE (int @var{entity}, int @var{mode1}, int @var{mode2})
+By default, the mode-switching pass assumes that a given entity's modes
+are mutually exclusive. This means that the pass can only tell
+@code{TARGET_MODE_EMIT} about an entity's previous mode if all
+incoming paths of execution leave the entity in the same state.
+
+However, some entities might have overlapping, non-exclusive modes,
+so that it is sometimes possible to represent ``mode @var{mode1} or mode
+@var{mode2}'' with something more specific than ``mode not known''.
+If this is true for at least one entity, you should define this hook
+and make it return a mode that includes @var{mode1} and @var{mode2}
+as possibilities. (The mode can include other possibilities too.)
+The hook should return the number of modes if no suitable mode exists
+for the given arguments.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
If this hook is defined, it is evaluated for every @var{entity} that
needs mode switching. It should return the mode that @var{entity} is
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index ad343504f..d420e62fd 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6922,6 +6922,8 @@ mode or ``no mode'', depending on context.
@hook TARGET_MODE_AFTER
+@hook TARGET_MODE_CONFLUENCE
+
@hook TARGET_MODE_ENTRY
@hook TARGET_MODE_EXIT
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 89a8494c6..065767902 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -484,6 +484,101 @@ create_pre_exit (int n_entities, int *entity_map, const int *num_modes)
return pre_exit;
}
+/* Return the confluence of modes MODE1 and MODE2 for entity ENTITY,
+ using NO_MODE to represent an unknown mode if nothing more precise
+ is available. */
+
+int
+mode_confluence (int entity, int mode1, int mode2, int no_mode)
+{
+ if (mode1 == mode2)
+ return mode1;
+
+ if (mode1 != no_mode
+ && mode2 != no_mode
+ && targetm.mode_switching.confluence)
+ return targetm.mode_switching.confluence (entity, mode1, mode2);
+
+ return no_mode;
+}
+
+/* Information for the dataflow problems below. */
+struct
+{
+ /* Information about each basic block, indexed by block id. */
+ struct bb_info *bb_info;
+
+ /* The entity that we're processing. */
+ int entity;
+
+ /* The number of modes defined for the entity, and thus the identifier
+ of the "don't know" mode. */
+ int no_mode;
+} confluence_info;
+
+/* Propagate information about any mode change on edge E to the
+ destination block's mode_in. Return true if something changed.
+
+ The mode_in and mode_out fields use no_mode + 1 to mean "not yet set". */
+
+static bool
+forward_confluence_n (edge e)
+{
+ /* The entry and exit blocks have no useful mode information. */
+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK)
+ return false;
+
+ /* We don't control mode changes across abnormal edges. */
+ if (e->flags & EDGE_ABNORMAL)
+ return false;
+
+ /* E->aux is nonzero if we have computed the LCM problem and scheduled
+ E to change the mode to E->aux - 1. Otherwise model the change
+ from the source to the destination. */
+ struct bb_info *bb_info = confluence_info.bb_info;
+ int no_mode = confluence_info.no_mode;
+ int src_mode = bb_info[e->src->index].mode_out;
+ if (e->aux)
+ src_mode = (int) (intptr_t) e->aux - 1;
+ if (src_mode == no_mode + 1)
+ return false;
+
+ int dest_mode = bb_info[e->dest->index].mode_in;
+ if (dest_mode == no_mode + 1)
+ {
+ bb_info[e->dest->index].mode_in = src_mode;
+ return true;
+ }
+
+ int entity = confluence_info.entity;
+ int new_mode = mode_confluence (entity, src_mode, dest_mode, no_mode);
+ if (dest_mode == new_mode)
+ return false;
+
+ bb_info[e->dest->index].mode_in = new_mode;
+ return true;
+}
+
+/* Update block BB_INDEX's mode_out based on its mode_in. Return true if
+ something changed. */
+
+static bool
+forward_transfer (int bb_index)
+{
+ /* The entry and exit blocks have no useful mode information. */
+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
+ return false;
+
+ /* Only propagate through a block if the entity is transparent. */
+ struct bb_info *bb_info = confluence_info.bb_info;
+ if (bb_info[bb_index].computing != confluence_info.no_mode
+ || bb_info[bb_index].mode_out == bb_info[bb_index].mode_in)
+ return false;
+
+ bb_info[bb_index].mode_out = bb_info[bb_index].mode_in;
+ return true;
+}
+
/* Find all insns that need a particular mode setting, and insert the
necessary mode switches. Return true if we did work. */
@@ -567,6 +662,39 @@ optimize_mode_switching (void)
auto_sbitmap transp_all (last_basic_block_for_fn (cfun));
+ auto_bitmap blocks;
+
+ /* Forward-propagate mode information through blocks where the entity
+ is transparent, so that mode_in describes the mode on entry to each
+ block and mode_out describes the mode on exit from each block. */
+ auto forwprop_mode_info = [&](struct bb_info *info,
+ int entity, int no_mode)
+ {
+ /* Use no_mode + 1 to mean "not yet set". */
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ if (bb_has_abnormal_pred (bb))
+ info[bb->index].mode_in = info[bb->index].seginfo->mode;
+ else
+ info[bb->index].mode_in = no_mode + 1;
+ if (info[bb->index].computing != no_mode)
+ info[bb->index].mode_out = info[bb->index].computing;
+ else
+ info[bb->index].mode_out = no_mode + 1;
+ }
+
+ confluence_info.bb_info = info;
+ confluence_info.entity = entity;
+ confluence_info.no_mode = no_mode;
+
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
+ df_simple_dataflow (DF_FORWARD, NULL, NULL, forward_confluence_n,
+ forward_transfer, blocks,
+ df_get_postorder (DF_FORWARD),
+ df_get_n_blocks (DF_FORWARD));
+
+ };
+
for (j = n_entities - 1; j >= 0; j--)
{
int e = entity_map[j];
@@ -720,6 +848,7 @@ optimize_mode_switching (void)
for (j = n_entities - 1; j >= 0; j--)
{
int no_mode = num_modes[entity_map[j]];
+ struct bb_info *info = bb_info[j];
/* Insert all mode sets that have been inserted by lcm. */
@@ -740,39 +869,33 @@ optimize_mode_switching (void)
}
}
+ /* mode_in and mode_out can be calculated directly from avin and
+ avout if all the modes are mutually exclusive. Use the target-
+ provided confluence function otherwise. */
+ if (targetm.mode_switching.confluence)
+ forwprop_mode_info (info, entity_map[j], no_mode);
+
FOR_EACH_BB_FN (bb, cfun)
{
- struct bb_info *info = bb_info[j];
- int last_mode = no_mode;
-
- /* intialize mode in availability for bb. */
- for (i = 0; i < no_mode; i++)
- if (mode_bit_p (avout[bb->index], j, i))
- {
- if (last_mode == no_mode)
- last_mode = i;
- if (last_mode != i)
+ auto modes_confluence = [&](sbitmap *av)
+ {
+ for (int i = 0; i < no_mode; ++i)
+ if (mode_bit_p (av[bb->index], j, i))
{
- last_mode = no_mode;
- break;
+ for (int i2 = i + 1; i2 < no_mode; ++i2)
+ if (mode_bit_p (av[bb->index], j, i2))
+ return no_mode;
+ return i;
}
- }
- info[bb->index].mode_out = last_mode;
+ return no_mode;
+ };
- /* intialize mode out availability for bb. */
- last_mode = no_mode;
- for (i = 0; i < no_mode; i++)
- if (mode_bit_p (avin[bb->index], j, i))
- {
- if (last_mode == no_mode)
- last_mode = i;
- if (last_mode != i)
- {
- last_mode = no_mode;
- break;
- }
- }
- info[bb->index].mode_in = last_mode;
+ /* intialize mode in/out availability for bb. */
+ if (!targetm.mode_switching.confluence)
+ {
+ info[bb->index].mode_out = modes_confluence (avout);
+ info[bb->index].mode_in = modes_confluence (avin);
+ }
for (i = 0; i < no_mode; i++)
if (mode_bit_p (del[bb->index], j, i))
diff --git a/gcc/target.def b/gcc/target.def
index 67c20bbb0..1e2091ed3 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7025,6 +7025,23 @@ the number of modes if it does not know what mode @var{entity} has after\n\
Not defining the hook is equivalent to returning @var{mode}.",
int, (int entity, int mode, rtx_insn *insn, HARD_REG_SET regs_live), NULL)
+DEFHOOK
+(confluence,
+ "By default, the mode-switching pass assumes that a given entity's modes\n\
+are mutually exclusive. This means that the pass can only tell\n\
+@code{TARGET_MODE_EMIT} about an entity's previous mode if all\n\
+incoming paths of execution leave the entity in the same state.\n\
+\n\
+However, some entities might have overlapping, non-exclusive modes,\n\
+so that it is sometimes possible to represent ``mode @var{mode1} or mode\n\
+@var{mode2}'' with something more specific than ``mode not known''.\n\
+If this is true for at least one entity, you should define this hook\n\
+and make it return a mode that includes @var{mode1} and @var{mode2}\n\
+as possibilities. (The mode can include other possibilities too.)\n\
+The hook should return the number of modes if no suitable mode exists\n\
+for the given arguments.",
+ int, (int entity, int mode1, int mode2), NULL)
+
DEFHOOK
(entry,
"If this hook is defined, it is evaluated for every @var{entity} that\n\
--
2.33.0

View File

@ -0,0 +1,483 @@
From cb4189b45a3a411958ab6aa85108f6dc7516acf5 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Sat, 11 Nov 2023 17:29:00 +0000
Subject: [PATCH 044/157] [Backport][SME] mode-switching: Add a backprop hook
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fc8458e20a524d053f576d64a606e21f8bd03b84
This patch adds a way for targets to ask that selected mode changes
be brought forward, through a combination of:
(1) requiring a mode in blocks where the entity was previously
transparent
(2) pushing the transition at the head of a block onto incomging edges
SME has two uses for this:
- A "one-shot" entity that, for any given path of execution,
either stays off or makes exactly one transition from off to on.
This relies only on (1) above; see the hook description for more info.
The main purpose of using mode-switching for this entity is to
shrink-wrap the code that requires it.
- A second entity for which all transitions must be from known
modes, which is enforced using a combination of (1) and (2).
More specifically, (1) looks for edges B1->B2 for which:
- B2 requires a specific mode and
- B1 does not guarantee a specific starting mode
In this system, such an edge is only possible if the entity is
transparent in B1. (1) then forces B1 to require some safe common
mode. Applying this inductively means that all incoming edges are
from known modes. If different edges give different starting modes,
(2) pushes the transitions onto the edges themselves; this only
happens if the entity is not transparent in some predecessor block.
The patch also uses the back-propagation as an excuse to do a simple
on-the-fly optimisation.
Hopefully the comments in the patch explain things a bit better.
gcc/
* target.def (mode_switching.backprop): New hook.
* doc/tm.texi.in (TARGET_MODE_BACKPROP): New @hook.
* doc/tm.texi: Regenerate.
* mode-switching.cc (struct bb_info): Add single_succ.
(confluence_info): Add transp field.
(single_succ_confluence_n, single_succ_transfer): New functions.
(backprop_confluence_n, backprop_transfer): Likewise.
(optimize_mode_switching): Use them. Push mode transitions onto
a block's incoming edges, if the backprop hook requires it.
---
gcc/doc/tm.texi | 28 +++++
gcc/doc/tm.texi.in | 2 +
gcc/mode-switching.cc | 275 ++++++++++++++++++++++++++++++++++++++++++
gcc/target.def | 29 +++++
4 files changed, 334 insertions(+)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index d7053ec9e..5f0972356 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -10322,6 +10322,34 @@ The hook should return the number of modes if no suitable mode exists
for the given arguments.
@end deftypefn
+@deftypefn {Target Hook} int TARGET_MODE_BACKPROP (int @var{entity}, int @var{mode1}, int @var{mode2})
+If defined, the mode-switching pass uses this hook to back-propagate mode
+requirements through blocks that have no mode requirements of their own.
+Specifically, @var{mode1} is the mode that @var{entity} has on exit
+from a block B1 (say) and @var{mode2} is the mode that the next block
+requires @var{entity} to have. B1 does not have any mode requirements
+of its own.
+
+The hook should return the mode that it prefers or requires @var{entity}
+to have in B1, or the number of modes if there is no such requirement.
+If the hook returns a required mode for more than one of B1's outgoing
+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.
+
+For example, suppose there is a ``one-shot'' entity that,
+for a given execution of a function, either stays off or makes exactly
+one transition from off to on. It is safe to make the transition at any
+time, but it is better not to do so unnecessarily. This hook allows the
+function to manage such an entity without having to track its state at
+runtime. Specifically. the entity would have two modes, 0 for off and
+1 for on, with 2 representing ``don't know''. The system is forbidden from
+transitioning from 2 to 1, since 2 represents the possibility that the
+entity is already on (and the aim is to avoid having to emit code to
+check for that case). This hook would therefore return 1 when @var{mode1}
+is 2 and @var{mode2} is 1, which would force the entity to be on in the
+source block. Applying this inductively would remove all transitions
+in which the previous state is unknown.
+@end deftypefn
+
@deftypefn {Target Hook} int TARGET_MODE_ENTRY (int @var{entity})
If this hook is defined, it is evaluated for every @var{entity} that
needs mode switching. It should return the mode that @var{entity} is
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index d420e62fd..fcab21744 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -6924,6 +6924,8 @@ mode or ``no mode'', depending on context.
@hook TARGET_MODE_CONFLUENCE
+@hook TARGET_MODE_BACKPROP
+
@hook TARGET_MODE_ENTRY
@hook TARGET_MODE_EXIT
diff --git a/gcc/mode-switching.cc b/gcc/mode-switching.cc
index 065767902..c2a0f0294 100644
--- a/gcc/mode-switching.cc
+++ b/gcc/mode-switching.cc
@@ -81,6 +81,7 @@ struct bb_info
int computing;
int mode_out;
int mode_in;
+ int single_succ;
};
/* Clear ode I from entity J in bitmap B. */
@@ -508,6 +509,9 @@ struct
/* Information about each basic block, indexed by block id. */
struct bb_info *bb_info;
+ /* A bitmap of blocks for which the current entity is transparent. */
+ sbitmap transp;
+
/* The entity that we're processing. */
int entity;
@@ -579,6 +583,210 @@ forward_transfer (int bb_index)
return true;
}
+/* A backwards confluence function. Update the the bb_info single_succ
+ field for E's source block, based on changes to E's destination block.
+ At the end of the dataflow problem, single_succ is the single mode
+ that all successors require (directly or indirectly), or no_mode
+ if there are conflicting requirements.
+
+ Initially, a value of no_mode + 1 means "don't know". */
+
+static bool
+single_succ_confluence_n (edge e)
+{
+ /* The entry block has no associated mode information. */
+ if (e->src->index == ENTRY_BLOCK)
+ return false;
+
+ /* We don't control mode changes across abnormal edges. */
+ if (e->flags & EDGE_ABNORMAL)
+ return false;
+
+ /* Do nothing if we've already found a conflict. */
+ struct bb_info *bb_info = confluence_info.bb_info;
+ int no_mode = confluence_info.no_mode;
+ int src_mode = bb_info[e->src->index].single_succ;
+ if (src_mode == no_mode)
+ return false;
+
+ /* Work out what mode the destination block (or its successors) require. */
+ int dest_mode;
+ if (e->dest->index == EXIT_BLOCK)
+ dest_mode = no_mode;
+ else if (bitmap_bit_p (confluence_info.transp, e->dest->index))
+ dest_mode = bb_info[e->dest->index].single_succ;
+ else
+ dest_mode = bb_info[e->dest->index].seginfo->mode;
+
+ /* Do nothing if the destination block has no new information. */
+ if (dest_mode == no_mode + 1 || dest_mode == src_mode)
+ return false;
+
+ /* Detect conflicting modes. */
+ if (src_mode != no_mode + 1)
+ dest_mode = no_mode;
+
+ bb_info[e->src->index].single_succ = dest_mode;
+ return true;
+}
+
+/* A backward transfer function for computing the bb_info single_succ
+ fields, as described above single_succ_confluence. */
+
+static bool
+single_succ_transfer (int bb_index)
+{
+ /* We don't have any field to transfer to. Assume that, after the
+ first iteration, we are only called if single_succ has changed.
+ We should then process incoming edges if the entity is transparent. */
+ return bitmap_bit_p (confluence_info.transp, bb_index);
+}
+
+/* Check whether the target wants to back-propagate a mode change across
+ edge E, and update the source block's computed mode if so. Return true
+ if something changed. */
+
+static bool
+backprop_confluence_n (edge e)
+{
+ /* The entry and exit blocks have no useful mode information. */
+ if (e->src->index == ENTRY_BLOCK || e->dest->index == EXIT_BLOCK)
+ return false;
+
+ /* We don't control mode changes across abnormal edges. */
+ if (e->flags & EDGE_ABNORMAL)
+ return false;
+
+ /* We can only require a new mode in the source block if the entity
+ was originally transparent there. */
+ if (!bitmap_bit_p (confluence_info.transp, e->src->index))
+ return false;
+
+ /* Exit now if there is no required mode, or if all paths into the
+ source block leave the entity in the required mode. */
+ struct bb_info *bb_info = confluence_info.bb_info;
+ int no_mode = confluence_info.no_mode;
+ int src_mode = bb_info[e->src->index].mode_out;
+ int dest_mode = bb_info[e->dest->index].mode_in;
+ if (dest_mode == no_mode || src_mode == dest_mode)
+ return false;
+
+ /* See what the target thinks about this transition. */
+ int entity = confluence_info.entity;
+ int new_mode = targetm.mode_switching.backprop (entity, src_mode,
+ dest_mode);
+ if (new_mode == no_mode)
+ return false;
+
+ /* The target doesn't like the current transition, but would be happy
+ with a transition from NEW_MODE.
+
+ If we force the source block to use NEW_MODE, we might introduce a
+ double transition on at least one path through the function (one to
+ NEW_MODE and then one to DEST_MODE). Therefore, if all destination
+ blocks require the same mode, it is usually better to bring that
+ mode requirement forward.
+
+ If that isn't possible, merge the preference for this edge with
+ the preferences for other edges. no_mode + 1 indicates that there
+ was no previous preference. */
+ int old_mode = bb_info[e->src->index].computing;
+ if (bb_info[e->src->index].single_succ != no_mode)
+ new_mode = bb_info[e->src->index].single_succ;
+ else if (old_mode != no_mode + 1)
+ new_mode = mode_confluence (entity, old_mode, new_mode, no_mode);
+
+ if (old_mode == new_mode)
+ return false;
+
+ bb_info[e->src->index].computing = new_mode;
+ return true;
+}
+
+/* If the current entity was originally transparent in block BB_INDEX,
+ update the incoming mode to match the outgoing mode. Register a mode
+ change if the entity is no longer transparent.
+
+ Also, as an on-the-fly optimization, check whether the entity was
+ originally transparent in BB_INDEX and if all successor blocks require
+ the same mode. If so, anticipate the mode change in BB_INDEX if
+ doing it on the incoming edges would require no more mode changes than
+ doing it on the outgoing edges. The aim is to reduce the total number
+ of mode changes emitted for the function (and thus reduce code size and
+ cfg complexity) without increasing the number of mode changes on any
+ given path through the function. A typical case where it helps is:
+
+ T
+ / \
+ T M
+ \ /
+ M
+
+ where the entity is transparent in the T blocks and is required to have
+ mode M in the M blocks. If there are no redundancies leading up to this,
+ there will be two mutually-exclusive changes to mode M, one on each of
+ the T->M edges. The optimization instead converts it to:
+
+ T T M
+ / \ / \ / \
+ T M -> M M -> M M
+ \ / \ / \ /
+ M M M
+
+ which creates a single transition to M for both paths through the diamond.
+
+ Return true if something changed. */
+
+static bool
+backprop_transfer (int bb_index)
+{
+ /* The entry and exit blocks have no useful mode information. */
+ if (bb_index == ENTRY_BLOCK || bb_index == EXIT_BLOCK)
+ return false;
+
+ /* We can only require a new mode if the entity was previously
+ transparent. */
+ if (!bitmap_bit_p (confluence_info.transp, bb_index))
+ return false;
+
+ struct bb_info *bb_info = confluence_info.bb_info;
+ basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
+ int no_mode = confluence_info.no_mode;
+ int mode_in = bb_info[bb_index].mode_in;
+ int mode_out = bb_info[bb_index].computing;
+ if (mode_out == no_mode + 1)
+ {
+ /* The entity is still transparent for this block. See whether
+ all successor blocks need the same mode, either directly or
+ indirectly. */
+ mode_out = bb_info[bb_index].single_succ;
+ if (mode_out == no_mode)
+ return false;
+
+ /* Get a minimum bound on the number of transitions that would be
+ removed if BB itself required MODE_OUT. */
+ unsigned int moved = 0;
+ for (edge e : bb->succs)
+ if (e->dest->index != EXIT_BLOCK
+ && mode_out == bb_info[e->dest->index].seginfo->mode)
+ moved += 1;
+
+ /* See whether making the mode change on all incoming edges would
+ be no worse than making it on MOVED outgoing edges. */
+ if (moved < EDGE_COUNT (bb->preds))
+ return false;
+
+ bb_info[bb_index].mode_out = mode_out;
+ bb_info[bb_index].computing = mode_out;
+ }
+ else if (mode_out == mode_in)
+ return false;
+
+ bb_info[bb_index].mode_in = mode_out;
+ bb_info[bb_index].seginfo->mode = mode_out;
+ return true;
+}
+
/* Find all insns that need a particular mode setting, and insert the
necessary mode switches. Return true if we did work. */
@@ -684,6 +892,7 @@ optimize_mode_switching (void)
}
confluence_info.bb_info = info;
+ confluence_info.transp = nullptr;
confluence_info.entity = entity;
confluence_info.no_mode = no_mode;
@@ -695,6 +904,9 @@ optimize_mode_switching (void)
};
+ if (targetm.mode_switching.backprop)
+ clear_aux_for_edges ();
+
for (j = n_entities - 1; j >= 0; j--)
{
int e = entity_map[j];
@@ -817,6 +1029,53 @@ optimize_mode_switching (void)
}
}
+ /* If the target requests it, back-propagate selected mode requirements
+ through transparent blocks. */
+ if (targetm.mode_switching.backprop)
+ {
+ /* First work out the mode on entry to and exit from each block. */
+ forwprop_mode_info (info, e, no_mode);
+
+ /* Compute the single_succ fields, as described above
+ single_succ_confluence. */
+ FOR_EACH_BB_FN (bb, cfun)
+ info[bb->index].single_succ = no_mode + 1;
+
+ confluence_info.transp = transp_all;
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
+ df_simple_dataflow (DF_BACKWARD, NULL, NULL,
+ single_succ_confluence_n,
+ single_succ_transfer, blocks,
+ df_get_postorder (DF_BACKWARD),
+ df_get_n_blocks (DF_BACKWARD));
+
+ FOR_EACH_BB_FN (bb, cfun)
+ {
+ /* Repurpose mode_in as the first mode required by the block,
+ or the output mode if none. */
+ if (info[bb->index].seginfo->mode != no_mode)
+ info[bb->index].mode_in = info[bb->index].seginfo->mode;
+
+ /* In transparent blocks, use computing == no_mode + 1
+ to indicate that no propagation has taken place. */
+ if (info[bb->index].computing == no_mode)
+ info[bb->index].computing = no_mode + 1;
+ }
+
+ bitmap_set_range (blocks, 0, last_basic_block_for_fn (cfun));
+ df_simple_dataflow (DF_BACKWARD, NULL, NULL, backprop_confluence_n,
+ backprop_transfer, blocks,
+ df_get_postorder (DF_BACKWARD),
+ df_get_n_blocks (DF_BACKWARD));
+
+ /* Any block that now computes a mode is no longer transparent. */
+ FOR_EACH_BB_FN (bb, cfun)
+ if (info[bb->index].computing == no_mode + 1)
+ info[bb->index].computing = no_mode;
+ else if (info[bb->index].computing != no_mode)
+ bitmap_clear_bit (transp_all, bb->index);
+ }
+
/* Set the anticipatable and computing arrays. */
for (i = 0; i < no_mode; i++)
{
@@ -900,6 +1159,22 @@ optimize_mode_switching (void)
for (i = 0; i < no_mode; i++)
if (mode_bit_p (del[bb->index], j, i))
info[bb->index].seginfo->mode = no_mode;
+
+ /* See whether the target can perform the first transition.
+ If not, push it onto the incoming edges. The earlier backprop
+ pass should ensure that the resulting transitions are valid. */
+ if (targetm.mode_switching.backprop)
+ {
+ int from_mode = info[bb->index].mode_in;
+ int to_mode = info[bb->index].seginfo->mode;
+ if (targetm.mode_switching.backprop (entity_map[j], from_mode,
+ to_mode) != no_mode)
+ {
+ for (edge e : bb->preds)
+ e->aux = (void *) (intptr_t) (to_mode + 1);
+ info[bb->index].mode_in = to_mode;
+ }
+ }
}
/* Now output the remaining mode sets in all the segments. */
diff --git a/gcc/target.def b/gcc/target.def
index 1e2091ed3..4d77c1523 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -7042,6 +7042,35 @@ The hook should return the number of modes if no suitable mode exists\n\
for the given arguments.",
int, (int entity, int mode1, int mode2), NULL)
+DEFHOOK
+(backprop,
+ "If defined, the mode-switching pass uses this hook to back-propagate mode\n\
+requirements through blocks that have no mode requirements of their own.\n\
+Specifically, @var{mode1} is the mode that @var{entity} has on exit\n\
+from a block B1 (say) and @var{mode2} is the mode that the next block\n\
+requires @var{entity} to have. B1 does not have any mode requirements\n\
+of its own.\n\
+\n\
+The hook should return the mode that it prefers or requires @var{entity}\n\
+to have in B1, or the number of modes if there is no such requirement.\n\
+If the hook returns a required mode for more than one of B1's outgoing\n\
+edges, those modes are combined as for @code{TARGET_MODE_CONFLUENCE}.\n\
+\n\
+For example, suppose there is a ``one-shot'' entity that,\n\
+for a given execution of a function, either stays off or makes exactly\n\
+one transition from off to on. It is safe to make the transition at any\n\
+time, but it is better not to do so unnecessarily. This hook allows the\n\
+function to manage such an entity without having to track its state at\n\
+runtime. Specifically. the entity would have two modes, 0 for off and\n\
+1 for on, with 2 representing ``don't know''. The system is forbidden from\n\
+transitioning from 2 to 1, since 2 represents the possibility that the\n\
+entity is already on (and the aim is to avoid having to emit code to\n\
+check for that case). This hook would therefore return 1 when @var{mode1}\n\
+is 2 and @var{mode2} is 1, which would force the entity to be on in the\n\
+source block. Applying this inductively would remove all transitions\n\
+in which the previous state is unknown.",
+ int, (int entity, int mode1, int mode2), NULL)
+
DEFHOOK
(entry,
"If this hook is defined, it is evaluated for every @var{entity} that\n\
--
2.33.0

View File

@ -0,0 +1,81 @@
From 4553f252c10968037edceba4abe3984dc9bbad2a Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Fri, 1 Dec 2023 08:36:15 +0000
Subject: [PATCH 045/157] [Backport][SME] aarch64: Add a result_mode helper
function
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a1bc121c00e30bd1bdaa62d87cbe64eb88e74f45
SME will add more intrinsics whose expansion code requires
the mode of the function return value. This patch adds an
associated helper routine.
gcc/
* config/aarch64/aarch64-sve-builtins.h
(function_expander::result_mode): New member function.
* config/aarch64/aarch64-sve-builtins-base.cc
(svld234_impl::expand): Use it.
* config/aarch64/aarch64-sve-builtins.cc
(function_expander::get_reg_target): Likewise.
---
gcc/config/aarch64/aarch64-sve-builtins-base.cc | 2 +-
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
gcc/config/aarch64/aarch64-sve-builtins.h | 9 +++++++++
3 files changed, 11 insertions(+), 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 56c9d75e7..c9bf13792 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1272,7 +1272,7 @@ public:
rtx
expand (function_expander &e) const OVERRIDE
{
- machine_mode tuple_mode = TYPE_MODE (TREE_TYPE (e.call_expr));
+ machine_mode tuple_mode = e.result_mode ();
insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
tuple_mode, e.vector_mode (0));
return e.use_contiguous_load_insn (icode);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index e168c8334..91af96687 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2796,7 +2796,7 @@ function_expander::get_fallback_value (machine_mode mode, unsigned int nops,
rtx
function_expander::get_reg_target ()
{
- machine_mode target_mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
+ machine_mode target_mode = result_mode ();
if (!possible_target || GET_MODE (possible_target) != target_mode)
possible_target = gen_reg_rtx (target_mode);
return possible_target;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 0d130b871..52994cde0 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -528,6 +528,8 @@ public:
insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0,
machine_mode = E_VOIDmode);
+ machine_mode result_mode () const;
+
bool overlaps_input_p (rtx);
rtx convert_to_pmode (rtx);
@@ -877,6 +879,13 @@ function_base::call_properties (const function_instance &instance) const
return flags;
}
+/* Return the mode of the result of a call. */
+inline machine_mode
+function_expander::result_mode () const
+{
+ return TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl)));
+}
+
}
#endif
--
2.33.0

View File

@ -0,0 +1,232 @@
From 60612cbd9cdd9b5079c0505b9d53c9cd98fba4b1 Mon Sep 17 00:00:00 2001
From: Kewen Lin <linkw@linux.ibm.com>
Date: Tue, 15 Nov 2022 20:26:07 -0600
Subject: [PATCH 046/157] [Backport][SME] rtl: Try to remove EH edges after
{pro,epi}logue generation [PR90259]
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=63e1b2e767a3f4695373c2406ff719c0a60c1858
After prologue and epilogue generation, the judgement on whether
one memory access onto stack frame may trap or not could change,
since we get more exact stack information by now.
As PR90259 shows, some memory access becomes impossible to trap
any more after prologue and epilogue generation, it can make
subsequent optimization be able to remove it if safe, but it
results in unexpected control flow status due to REG_EH_REGION
note missing.
This patch proposes to try to remove EH edges with function
purge_all_dead_edges after prologue and epilogue generation,
it simplifies CFG as early as we can and don't need any fixup
in downstream passes.
CFG simplification result with PR90259's case as example:
*before*
18: %1:TF=call [`__gcc_qdiv'] argc:0
REG_EH_REGION 0x2
77: NOTE_INSN_BASIC_BLOCK 3
19: NOTE_INSN_DELETED
20: NOTE_INSN_DELETED
110: [%31:SI+0x20]=%1:DF
REG_EH_REGION 0x2
116: NOTE_INSN_BASIC_BLOCK 4
111: [%31:SI+0x28]=%2:DF
REG_EH_REGION 0x2
22: NOTE_INSN_BASIC_BLOCK 5
108: %0:DF=[%31:SI+0x20]
REG_EH_REGION 0x2
117: NOTE_INSN_BASIC_BLOCK 6
109: %1:DF=[%31:SI+0x28]
REG_EH_REGION 0x2
79: NOTE_INSN_BASIC_BLOCK 7
26: [%31:SI+0x18]=%0:DF
104: pc=L69
105: barrier
*after*
18: %1:TF=call [`__gcc_qdiv'] argc:0
REG_EH_REGION 0x2
77: NOTE_INSN_BASIC_BLOCK 3
19: NOTE_INSN_DELETED
20: NOTE_INSN_DELETED
110: [%31:SI+0x20]=%1:DF
111: [%31:SI+0x28]=%2:DF
108: %0:DF=[%31:SI+0x20]
109: %1:DF=[%31:SI+0x28]
26: [%31:SI+0x18]=%0:DF
104: pc=L69
105: barrier
PR rtl-optimization/90259
gcc/ChangeLog:
* function.cc (rest_of_handle_thread_prologue_and_epilogue): Add
parameter fun, and call function purge_all_dead_edges.
(pass_thread_prologue_and_epilogue::execute): Name unamed parameter
as fun, and use it for rest_of_handle_thread_prologue_and_epilogue.
gcc/testsuite/ChangeLog:
* g++.target/powerpc/pr90259.C: New.
---
gcc/function.cc | 13 ++-
gcc/testsuite/g++.target/powerpc/pr90259.C | 103 +++++++++++++++++++++
2 files changed, 113 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/g++.target/powerpc/pr90259.C
diff --git a/gcc/function.cc b/gcc/function.cc
index 49c7ccf4b..28de39dd6 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6529,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
}
static unsigned int
-rest_of_handle_thread_prologue_and_epilogue (void)
+rest_of_handle_thread_prologue_and_epilogue (function *fun)
{
/* prepare_shrink_wrap is sensitive to the block structure of the control
flow graph, so clean it up first. */
@@ -6546,6 +6546,13 @@ rest_of_handle_thread_prologue_and_epilogue (void)
Fix that up. */
fixup_partitions ();
+ /* After prologue and epilogue generation, the judgement on whether
+ one memory access onto stack frame may trap or not could change,
+ since we get more exact stack information by now. So try to
+ remove any EH edges here, see PR90259. */
+ if (fun->can_throw_non_call_exceptions)
+ purge_all_dead_edges ();
+
/* Shrink-wrapping can result in unreachable edges in the epilogue,
see PR57320. */
cleanup_cfg (optimize ? CLEANUP_EXPENSIVE : 0);
@@ -6614,9 +6621,9 @@ public:
{}
/* opt_pass methods: */
- virtual unsigned int execute (function *)
+ unsigned int execute (function * fun) final override
{
- return rest_of_handle_thread_prologue_and_epilogue ();
+ return rest_of_handle_thread_prologue_and_epilogue (fun);
}
}; // class pass_thread_prologue_and_epilogue
diff --git a/gcc/testsuite/g++.target/powerpc/pr90259.C b/gcc/testsuite/g++.target/powerpc/pr90259.C
new file mode 100644
index 000000000..db75ac7fe
--- /dev/null
+++ b/gcc/testsuite/g++.target/powerpc/pr90259.C
@@ -0,0 +1,103 @@
+/* { dg-require-effective-target long_double_ibm128 } */
+/* { dg-options "-O2 -ffloat-store -fgcse -fnon-call-exceptions -fno-forward-propagate -fno-omit-frame-pointer -fstack-protector-all" } */
+/* { dg-add-options long_double_ibm128 } */
+
+/* Verify there is no ICE. */
+
+template <int a> struct b
+{
+ static constexpr int c = a;
+};
+template <bool a> using d = b<a>;
+struct e
+{
+ int f;
+ int
+ g ()
+ {
+ return __builtin_ceil (f / (long double) h);
+ }
+ float h;
+};
+template <typename, typename> using k = d<!bool ()>;
+template <typename> class n
+{
+public:
+ e ae;
+ void af ();
+};
+template <typename l>
+void
+n<l>::af ()
+{
+ ae.g ();
+}
+template <bool> using m = int;
+template <typename ag, typename ah, typename ai = m<k<ag, ah>::c>>
+using aj = n<ai>;
+struct o
+{
+ void
+ af ()
+ {
+ al.af ();
+ }
+ aj<int, int> al;
+};
+template <typename> class am;
+template <typename i> class ao
+{
+protected:
+ static i *ap (int);
+};
+template <typename, typename> class p;
+template <typename ar, typename i, typename... j> class p<ar (j...), i> : ao<i>
+{
+public:
+ static ar
+ as (const int &p1, j...)
+ {
+ (*ao<i>::ap (p1)) (j ()...);
+ }
+};
+template <typename ar, typename... j> class am<ar (j...)>
+{
+ template <typename, typename> using av = int;
+
+public:
+ template <typename i, typename = av<d<!bool ()>, void>,
+ typename = av<i, void>>
+ am (i);
+ using aw = ar (*) (const int &, j...);
+ aw ax;
+};
+template <typename ar, typename... j>
+template <typename i, typename, typename>
+am<ar (j...)>::am (i)
+{
+ ax = p<ar (j...), i>::as;
+}
+struct G
+{
+ void ba (am<void (o)>);
+};
+struct q
+{
+ q ()
+ {
+ G a;
+ a.ba (r ());
+ }
+ struct r
+ {
+ void
+ operator() (o p1)
+ try
+ {
+ p1.af ();
+ }
+ catch (int)
+ {
+ }
+ };
+} s;
--
2.33.0

View File

@ -0,0 +1,71 @@
From beb962ec516f152cef482b229c9adf0390dc3b2c Mon Sep 17 00:00:00 2001
From: Andrew Pinski <apinski@marvell.com>
Date: Thu, 17 Nov 2022 22:03:08 +0000
Subject: [PATCH 047/157] [Backport][SME] Fix PR middle-end/107705: ICE after
reclaration error
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ceba66ee230bb96b0889fc8ec7333c7ffae96d6e
The problem here is after we created a call expression
in the C front-end, we replace the decl type with
an error mark node. We then end up calling
aggregate_value_p with the call expression
with the decl with the error mark as the type
and we ICE.
The fix is to check the function type
after we process the call expression inside
aggregate_value_p to get it.
OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Thanks,
Andrew Pinski
gcc/ChangeLog:
PR middle-end/107705
* function.cc (aggregate_value_p): Return 0 if
the function type was an error operand.
gcc/testsuite/ChangeLog:
* gcc.dg/redecl-22.c: New test.
---
gcc/function.cc | 3 +++
gcc/testsuite/gcc.dg/redecl-22.c | 9 +++++++++
2 files changed, 12 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/redecl-22.c
diff --git a/gcc/function.cc b/gcc/function.cc
index 28de39dd6..99aa738eb 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -2090,6 +2090,9 @@ aggregate_value_p (const_tree exp, const_tree fntype)
if (VOID_TYPE_P (type))
return 0;
+ if (error_operand_p (fntype))
+ return 0;
+
/* If a record should be passed the same as its first (and only) member
don't pass it as an aggregate. */
if (TREE_CODE (type) == RECORD_TYPE && TYPE_TRANSPARENT_AGGR (type))
diff --git a/gcc/testsuite/gcc.dg/redecl-22.c b/gcc/testsuite/gcc.dg/redecl-22.c
new file mode 100644
index 000000000..7758570fa
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/redecl-22.c
@@ -0,0 +1,9 @@
+/* We used to ICE in the gimplifier, PR 107705 */
+/* { dg-do compile } */
+/* { dg-options "-w" } */
+int f (void)
+{
+ int (*p) (void) = 0; // { dg-note "" }
+ return p ();
+ int p = 1; // { dg-error "" }
+}
--
2.33.0

View File

@ -0,0 +1,351 @@
From c074871572ef22cbcca8f0f4bc493d60caeddd78 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Wed, 21 Jun 2023 21:55:30 +0200
Subject: [PATCH 048/157] [Backport][SME] function: Change return type of
predicate function from int to bool
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ce47d3c2cf59bb2cc94afc4bbef88b0e4950f086
Also change some internal variables to bool and some functions to void.
gcc/ChangeLog:
* function.h (emit_initial_value_sets):
Change return type from int to void.
(aggregate_value_p): Change return type from int to bool.
(prologue_contains): Ditto.
(epilogue_contains): Ditto.
(prologue_epilogue_contains): Ditto.
* function.cc (temp_slot): Make "in_use" variable bool.
(make_slot_available): Update for changed "in_use" variable.
(assign_stack_temp_for_type): Ditto.
(emit_initial_value_sets): Change return type from int to void
and update function body accordingly.
(instantiate_virtual_regs): Ditto.
(rest_of_handle_thread_prologue_and_epilogue): Ditto.
(safe_insn_predicate): Change return type from int to bool.
(aggregate_value_p): Change return type from int to bool
and update function body accordingly.
(prologue_contains): Change return type from int to bool.
(prologue_epilogue_contains): Ditto.
---
gcc/function.cc | 77 ++++++++++++++++++++++++-------------------------
gcc/function.h | 10 +++----
2 files changed, 42 insertions(+), 45 deletions(-)
diff --git a/gcc/function.cc b/gcc/function.cc
index 99aa738eb..fc8eb5812 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -578,8 +578,8 @@ public:
tree type;
/* The alignment (in bits) of the slot. */
unsigned int align;
- /* Nonzero if this temporary is currently in use. */
- char in_use;
+ /* True if this temporary is currently in use. */
+ bool in_use;
/* Nesting level at which this slot is being used. */
int level;
/* The offset of the slot from the frame_pointer, including extra space
@@ -674,7 +674,7 @@ make_slot_available (class temp_slot *temp)
{
cut_slot_from_list (temp, temp_slots_at_level (temp->level));
insert_slot_to_list (temp, &avail_temp_slots);
- temp->in_use = 0;
+ temp->in_use = false;
temp->level = -1;
n_temp_slots_in_use--;
}
@@ -848,7 +848,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
if (known_ge (best_p->size - rounded_size, alignment))
{
p = ggc_alloc<temp_slot> ();
- p->in_use = 0;
+ p->in_use = false;
p->size = best_p->size - rounded_size;
p->base_offset = best_p->base_offset + rounded_size;
p->full_size = best_p->full_size - rounded_size;
@@ -918,7 +918,7 @@ assign_stack_temp_for_type (machine_mode mode, poly_int64 size, tree type)
}
p = selected;
- p->in_use = 1;
+ p->in_use = true;
p->type = type;
p->level = temp_slot_level;
n_temp_slots_in_use++;
@@ -1340,7 +1340,7 @@ has_hard_reg_initial_val (machine_mode mode, unsigned int regno)
return NULL_RTX;
}
-unsigned int
+void
emit_initial_value_sets (void)
{
struct initial_value_struct *ivs = crtl->hard_reg_initial_vals;
@@ -1348,7 +1348,7 @@ emit_initial_value_sets (void)
rtx_insn *seq;
if (ivs == 0)
- return 0;
+ return;
start_sequence ();
for (i = 0; i < ivs->num_entries; i++)
@@ -1357,7 +1357,6 @@ emit_initial_value_sets (void)
end_sequence ();
emit_insn_at_entry (seq);
- return 0;
}
/* Return the hardreg-pseudoreg initial values pair entry I and
@@ -1535,7 +1534,7 @@ instantiate_virtual_regs_in_rtx (rtx *loc)
/* A subroutine of instantiate_virtual_regs_in_insn. Return true if X
matches the predicate for insn CODE operand OPERAND. */
-static int
+static bool
safe_insn_predicate (int code, int operand, rtx x)
{
return code < 0 || insn_operand_matches ((enum insn_code) code, operand, x);
@@ -1948,7 +1947,7 @@ instantiate_decls (tree fndecl)
/* Pass through the INSNS of function FNDECL and convert virtual register
references to hard register references. */
-static unsigned int
+static void
instantiate_virtual_regs (void)
{
rtx_insn *insn;
@@ -2002,8 +2001,6 @@ instantiate_virtual_regs (void)
/* Indicate that, from now on, assign_stack_local should use
frame_pointer_rtx. */
virtuals_instantiated = 1;
-
- return 0;
}
namespace {
@@ -2031,7 +2028,8 @@ public:
/* opt_pass methods: */
virtual unsigned int execute (function *)
{
- return instantiate_virtual_regs ();
+ instantiate_virtual_regs ();
+ return 0;
}
}; // class pass_instantiate_virtual_regs
@@ -2045,12 +2043,12 @@ make_pass_instantiate_virtual_regs (gcc::context *ctxt)
}
-/* Return 1 if EXP is an aggregate type (or a value with aggregate type).
+/* Return true if EXP is an aggregate type (or a value with aggregate type).
This means a type for which function calls must pass an address to the
function or get an address back from the function.
EXP may be a type node or an expression (whose type is tested). */
-int
+bool
aggregate_value_p (const_tree exp, const_tree fntype)
{
const_tree type = (TYPE_P (exp)) ? exp : TREE_TYPE (exp);
@@ -2070,7 +2068,7 @@ aggregate_value_p (const_tree exp, const_tree fntype)
else
/* For internal functions, assume nothing needs to be
returned in memory. */
- return 0;
+ return false;
}
break;
case FUNCTION_DECL:
@@ -2088,10 +2086,10 @@ aggregate_value_p (const_tree exp, const_tree fntype)
}
if (VOID_TYPE_P (type))
- return 0;
+ return false;
if (error_operand_p (fntype))
- return 0;
+ return false;
/* If a record should be passed the same as its first (and only) member
don't pass it as an aggregate. */
@@ -2102,25 +2100,25 @@ aggregate_value_p (const_tree exp, const_tree fntype)
reference, do so. */
if ((TREE_CODE (exp) == PARM_DECL || TREE_CODE (exp) == RESULT_DECL)
&& DECL_BY_REFERENCE (exp))
- return 1;
+ return true;
/* Function types that are TREE_ADDRESSABLE force return in memory. */
if (fntype && TREE_ADDRESSABLE (fntype))
- return 1;
+ return true;
/* Types that are TREE_ADDRESSABLE must be constructed in memory,
and thus can't be returned in registers. */
if (TREE_ADDRESSABLE (type))
- return 1;
+ return true;
if (TYPE_EMPTY_P (type))
- return 0;
+ return false;
if (flag_pcc_struct_return && AGGREGATE_TYPE_P (type))
- return 1;
+ return true;
if (targetm.calls.return_in_memory (type, fntype))
- return 1;
+ return true;
/* Make sure we have suitable call-clobbered regs to return
the value in; if not, we must return it in memory. */
@@ -2129,7 +2127,7 @@ aggregate_value_p (const_tree exp, const_tree fntype)
/* If we have something other than a REG (e.g. a PARALLEL), then assume
it is OK. */
if (!REG_P (reg))
- return 0;
+ return false;
/* Use the default ABI if the type of the function isn't known.
The scheme for handling interoperability between different ABIs
@@ -2142,9 +2140,9 @@ aggregate_value_p (const_tree exp, const_tree fntype)
nregs = hard_regno_nregs (regno, TYPE_MODE (type));
for (i = 0; i < nregs; i++)
if (!fixed_regs[regno + i] && !abi.clobbers_full_reg_p (regno + i))
- return 1;
+ return true;
- return 0;
+ return false;
}
/* Return true if we should assign DECL a pseudo register; false if it
@@ -5741,26 +5739,26 @@ contains (const rtx_insn *insn, hash_table<insn_cache_hasher> *hash)
return hash->find (const_cast<rtx_insn *> (insn)) != NULL;
}
-int
+bool
prologue_contains (const rtx_insn *insn)
{
return contains (insn, prologue_insn_hash);
}
-int
+bool
epilogue_contains (const rtx_insn *insn)
{
return contains (insn, epilogue_insn_hash);
}
-int
+bool
prologue_epilogue_contains (const rtx_insn *insn)
{
if (contains (insn, prologue_insn_hash))
- return 1;
+ return true;
if (contains (insn, epilogue_insn_hash))
- return 1;
- return 0;
+ return true;
+ return false;
}
void
@@ -6386,14 +6384,13 @@ current_function_name (void)
}
-static unsigned int
+static void
rest_of_handle_check_leaf_regs (void)
{
#ifdef LEAF_REGISTERS
crtl->uses_only_leaf_regs
= optimize > 0 && only_leaf_regs_used () && leaf_function_p ();
#endif
- return 0;
}
/* Insert a TYPE into the used types hash table of CFUN. */
@@ -6518,7 +6515,8 @@ public:
/* opt_pass methods: */
virtual unsigned int execute (function *)
{
- return rest_of_handle_check_leaf_regs ();
+ rest_of_handle_check_leaf_regs ();
+ return 0;
}
}; // class pass_leaf_regs
@@ -6531,7 +6529,7 @@ make_pass_leaf_regs (gcc::context *ctxt)
return new pass_leaf_regs (ctxt);
}
-static unsigned int
+static void
rest_of_handle_thread_prologue_and_epilogue (function *fun)
{
/* prepare_shrink_wrap is sensitive to the block structure of the control
@@ -6563,8 +6561,6 @@ rest_of_handle_thread_prologue_and_epilogue (function *fun)
/* The stack usage info is finalized during prologue expansion. */
if (flag_stack_usage_info || flag_callgraph_info)
output_stack_usage ();
-
- return 0;
}
/* Record a final call to CALLEE at LOCATION. */
@@ -6626,7 +6622,8 @@ public:
/* opt_pass methods: */
unsigned int execute (function * fun) final override
{
- return rest_of_handle_thread_prologue_and_epilogue (fun);
+ rest_of_handle_thread_prologue_and_epilogue (fun);
+ return 0;
}
}; // class pass_thread_prologue_and_epilogue
diff --git a/gcc/function.h b/gcc/function.h
index a53fb24d2..4e8131706 100644
--- a/gcc/function.h
+++ b/gcc/function.h
@@ -653,11 +653,11 @@ extern rtx get_hard_reg_initial_val (machine_mode, unsigned int);
extern rtx has_hard_reg_initial_val (machine_mode, unsigned int);
/* Called from gimple_expand_cfg. */
-extern unsigned int emit_initial_value_sets (void);
+extern void emit_initial_value_sets (void);
extern bool initial_value_entry (int i, rtx *, rtx *);
extern void instantiate_decl_rtl (rtx x);
-extern int aggregate_value_p (const_tree, const_tree);
+extern bool aggregate_value_p (const_tree, const_tree);
extern bool use_register_for_decl (const_tree);
extern gimple_seq gimplify_parameters (gimple_seq *);
extern void locate_and_pad_parm (machine_mode, tree, int, int, int,
@@ -698,9 +698,9 @@ extern void clobber_return_register (void);
extern void expand_function_end (void);
extern rtx get_arg_pointer_save_area (void);
extern void maybe_copy_prologue_epilogue_insn (rtx, rtx);
-extern int prologue_contains (const rtx_insn *);
-extern int epilogue_contains (const rtx_insn *);
-extern int prologue_epilogue_contains (const rtx_insn *);
+extern bool prologue_contains (const rtx_insn *);
+extern bool epilogue_contains (const rtx_insn *);
+extern bool prologue_epilogue_contains (const rtx_insn *);
extern void record_prologue_seq (rtx_insn *);
extern void record_epilogue_seq (rtx_insn *);
extern void emit_return_into_block (bool simple_p, basic_block bb);
--
2.33.0

View File

@ -0,0 +1,233 @@
From 417d51e1ecf41b3ba3ddf24eaf1e07db5c1ded9e Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 09:28:46 +0000
Subject: [PATCH 049/157] [Backport][SME] Allow prologues and epilogues to be
inserted later
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e9d2ae6b9816e61a6148040149c63faa83f54702
Arm's SME adds a new processor mode called streaming mode.
This mode enables some new (matrix-oriented) instructions and
disables several existing groups of instructions, such as most
Advanced SIMD vector instructions and a much smaller set of SVE
instructions. It can also change the current vector length.
There are instructions to switch in and out of streaming mode.
However, their effect on the ISA and vector length can't be represented
directly in RTL, so they need to be emitted late in the pass pipeline,
close to md_reorg.
It's sometimes the responsibility of the prologue and epilogue to
switch modes, which means we need to emit the prologue and epilogue
sequences late as well. (This loses shrink-wrapping and scheduling
opportunities, but that's a price worth paying.)
This patch therefore adds a target hook for forcing prologue
and epilogue insertion to happen later in the pipeline.
gcc/
* target.def (use_late_prologue_epilogue): New hook.
* doc/tm.texi.in: Add TARGET_USE_LATE_PROLOGUE_EPILOGUE.
* doc/tm.texi: Regenerate.
* passes.def (pass_late_thread_prologue_and_epilogue): New pass.
* tree-pass.h (make_pass_late_thread_prologue_and_epilogue): Declare.
* function.cc (pass_thread_prologue_and_epilogue::gate): New function.
(pass_data_late_thread_prologue_and_epilogue): New pass variable.
(pass_late_thread_prologue_and_epilogue): New pass class.
(make_pass_late_thread_prologue_and_epilogue): New function.
---
gcc/doc/tm.texi | 19 ++++++++++++++++++
gcc/doc/tm.texi.in | 2 ++
gcc/function.cc | 50 ++++++++++++++++++++++++++++++++++++++++++++++
gcc/passes.def | 3 +++
gcc/target.def | 21 +++++++++++++++++++
gcc/tree-pass.h | 2 ++
6 files changed, 97 insertions(+)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 5f0972356..d930d233d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11684,6 +11684,25 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
to by @var{ce_info}.
@end defmac
+@deftypefn {Target Hook} bool TARGET_USE_LATE_PROLOGUE_EPILOGUE ()
+Return true if the current function's prologue and epilogue should
+be emitted late in the pass pipeline, instead of at the usual point.
+
+Normally, the prologue and epilogue sequences are introduced soon after
+register allocation is complete. The advantage of this approach is that
+it allows the prologue and epilogue instructions to be optimized and
+scheduled with other code in the function. However, some targets
+require the prologue and epilogue to be the first and last sequences
+executed by the function, with no variation allowed. This hook should
+return true on such targets.
+
+The default implementation returns false, which is correct for most
+targets. The hook should only return true if there is a specific
+target limitation that cannot be described in RTL. For example,
+the hook might return true if the prologue and epilogue need to switch
+between instruction sets.
+@end deftypefn
+
@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
If non-null, this hook performs a target-specific pass over the
instruction stream. The compiler will run it at all optimization levels,
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index fcab21744..19eabec48 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7708,6 +7708,8 @@ of the if-block in the @code{struct ce_if_block} structure that is pointed
to by @var{ce_info}.
@end defmac
+@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
+
@hook TARGET_MACHINE_DEPENDENT_REORG
@hook TARGET_INIT_BUILTINS
diff --git a/gcc/function.cc b/gcc/function.cc
index fc8eb5812..7c90b5f23 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -84,6 +84,7 @@ along with GCC; see the file COPYING3. If not see
#include "function-abi.h"
#include "value-range.h"
#include "gimple-range.h"
+#include "insn-attr.h"
/* So we can assign to cfun in this file. */
#undef cfun
@@ -6620,6 +6621,11 @@ public:
{}
/* opt_pass methods: */
+ bool gate (function *) final override
+ {
+ return !targetm.use_late_prologue_epilogue ();
+ }
+
unsigned int execute (function * fun) final override
{
rest_of_handle_thread_prologue_and_epilogue (fun);
@@ -6628,6 +6634,44 @@ public:
}; // class pass_thread_prologue_and_epilogue
+const pass_data pass_data_late_thread_prologue_and_epilogue =
+{
+ RTL_PASS, /* type */
+ "late_pro_and_epilogue", /* name */
+ OPTGROUP_NONE, /* optinfo_flags */
+ TV_THREAD_PROLOGUE_AND_EPILOGUE, /* tv_id */
+ 0, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
+};
+
+class pass_late_thread_prologue_and_epilogue : public rtl_opt_pass
+{
+public:
+ pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
+ : rtl_opt_pass (pass_data_late_thread_prologue_and_epilogue, ctxt)
+ {}
+
+ /* opt_pass methods: */
+ bool gate (function *) final override
+ {
+ return targetm.use_late_prologue_epilogue ();
+ }
+
+ unsigned int execute (function *fn) final override
+ {
+ /* It's not currently possible to have both delay slots and
+ late prologue/epilogue, since the latter has to run before
+ the former, and the former won't honor whatever restrictions
+ the latter is trying to enforce. */
+ gcc_assert (!DELAY_SLOTS);
+ rest_of_handle_thread_prologue_and_epilogue (fn);
+ return 0;
+ }
+}; // class pass_late_thread_prologue_and_epilogue
+
} // anon namespace
rtl_opt_pass *
@@ -6636,6 +6680,12 @@ make_pass_thread_prologue_and_epilogue (gcc::context *ctxt)
return new pass_thread_prologue_and_epilogue (ctxt);
}
+rtl_opt_pass *
+make_pass_late_thread_prologue_and_epilogue (gcc::context *ctxt)
+{
+ return new pass_late_thread_prologue_and_epilogue (ctxt);
+}
+
namespace {
const pass_data pass_data_zero_call_used_regs =
diff --git a/gcc/passes.def b/gcc/passes.def
index cdc600298..8797f166f 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -523,6 +523,9 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_stack_regs_run);
POP_INSERT_PASSES ()
POP_INSERT_PASSES ()
+ NEXT_PASS (pass_late_thread_prologue_and_epilogue);
+ /* No target-independent code motion is allowed beyond this point,
+ excepting the legacy delayed-branch pass. */
NEXT_PASS (pass_late_compilation);
PUSH_INSERT_PASSES_WITHIN (pass_late_compilation)
NEXT_PASS (pass_zero_call_used_regs);
diff --git a/gcc/target.def b/gcc/target.def
index 4d77c1523..fd4899612 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4120,6 +4120,27 @@ returns @code{VOIDmode}.",
machine_mode, (machine_mode m1, machine_mode m2),
default_cc_modes_compatible)
+DEFHOOK
+(use_late_prologue_epilogue,
+ "Return true if the current function's prologue and epilogue should\n\
+be emitted late in the pass pipeline, instead of at the usual point.\n\
+\n\
+Normally, the prologue and epilogue sequences are introduced soon after\n\
+register allocation is complete. The advantage of this approach is that\n\
+it allows the prologue and epilogue instructions to be optimized and\n\
+scheduled with other code in the function. However, some targets\n\
+require the prologue and epilogue to be the first and last sequences\n\
+executed by the function, with no variation allowed. This hook should\n\
+return true on such targets.\n\
+\n\
+The default implementation returns false, which is correct for most\n\
+targets. The hook should only return true if there is a specific\n\
+target limitation that cannot be described in RTL. For example,\n\
+the hook might return true if the prologue and epilogue need to switch\n\
+between instruction sets.",
+ bool, (),
+ hook_bool_void_false)
+
/* Do machine-dependent code transformations. Called just before
delayed-branch scheduling. */
DEFHOOK
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 34e60bc38..1c983ef71 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -612,6 +612,8 @@ extern rtl_opt_pass *make_pass_gcse2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_split_after_reload (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_thread_prologue_and_epilogue (gcc::context
*ctxt);
+extern rtl_opt_pass *make_pass_late_thread_prologue_and_epilogue (gcc::context
+ *ctxt);
extern rtl_opt_pass *make_pass_zero_call_used_regs (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_split_complex_instructions (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_adjustments (gcc::context *ctxt);
--
2.33.0

View File

@ -0,0 +1,239 @@
From e906213086639df81085a0101bf88fb66c1dbc2b Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 09:35:57 +0000
Subject: [PATCH 050/157] [Backport][SME] Add a target hook for sibcall
epilogues
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2e0aefa77157396acb48833407637303edba450a
Epilogues for sibling calls are generated using the
sibcall_epilogue pattern. One disadvantage of this approach
is that the target doesn't know which call the epilogue is for,
even though the code that generates the pattern has the call
to hand.
Although call instructions are currently rtxes, and so could be
passed as an operand to the pattern, the main point of introducing
rtx_insn was to move towards separating the rtx and insn types
(a good thing IMO). There also isn't an existing practice of
passing genuine instructions (as opposed to labels) to
instruction patterns.
This patch therefore adds a hook that can be defined as an
alternative to sibcall_epilogue. The advantage is that it
can be passed the call; the disadvantage is that it can't
use .md conveniences like generating instructions from
textual patterns (although most epilogues are too complex
to benefit much from that anyway).
gcc/
* doc/tm.texi.in: Add TARGET_EMIT_EPILOGUE_FOR_SIBCALL.
* doc/tm.texi: Regenerate.
* target.def (emit_epilogue_for_sibcall): New hook.
* calls.cc (can_implement_as_sibling_call_p): Use it.
* function.cc (thread_prologue_and_epilogue_insns): Likewise.
(reposition_prologue_and_epilogue_notes): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_expand_epilogue): Take
an rtx_call_insn * rather than a bool.
* config/aarch64/aarch64.cc (aarch64_expand_epilogue): Likewise.
(TARGET_EMIT_EPILOGUE_FOR_SIBCALL): Define.
* config/aarch64/aarch64.md (epilogue): Update call.
(sibcall_epilogue): Delete.
---
gcc/calls.cc | 3 ++-
gcc/config/aarch64/aarch64-protos.h | 2 +-
gcc/config/aarch64/aarch64.cc | 11 +++++++----
gcc/config/aarch64/aarch64.md | 11 +----------
gcc/doc/tm.texi | 8 ++++++++
gcc/doc/tm.texi.in | 2 ++
gcc/function.cc | 15 +++++++++++++--
gcc/target.def | 9 +++++++++
8 files changed, 43 insertions(+), 18 deletions(-)
diff --git a/gcc/calls.cc b/gcc/calls.cc
index 4d0bc45be..c1db66883 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -2461,7 +2461,8 @@ can_implement_as_sibling_call_p (tree exp,
tree addr,
const args_size &args_size)
{
- if (!targetm.have_sibcall_epilogue ())
+ if (!targetm.have_sibcall_epilogue ()
+ && !targetm.emit_epilogue_for_sibcall)
{
maybe_complain_about_tail_call
(exp,
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 86e444a60..97984f3ab 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -887,7 +887,7 @@ const char * aarch64_gen_far_branch (rtx *, int, const char *, const char *);
const char * aarch64_output_probe_stack_range (rtx, rtx);
const char * aarch64_output_probe_sve_stack_clash (rtx, rtx, rtx, rtx);
void aarch64_err_no_fpadvsimd (machine_mode);
-void aarch64_expand_epilogue (bool);
+void aarch64_expand_epilogue (rtx_call_insn *);
rtx aarch64_ptrue_all (unsigned int);
opt_machine_mode aarch64_ptrue_all_mode (rtx);
rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index fd1114b52..055b436b1 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10046,7 +10046,7 @@ aarch64_use_return_insn_p (void)
from a deallocated stack, and we optimize the unwind records by
emitting them all together if possible. */
void
-aarch64_expand_epilogue (bool for_sibcall)
+aarch64_expand_epilogue (rtx_call_insn *sibcall)
{
poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
@@ -10194,7 +10194,7 @@ aarch64_expand_epilogue (bool for_sibcall)
explicitly authenticate.
*/
if (aarch64_return_address_signing_enabled ()
- && (for_sibcall || !TARGET_ARMV8_3))
+ && (sibcall || !TARGET_ARMV8_3))
{
switch (aarch64_ra_sign_key)
{
@@ -10212,7 +10212,7 @@ aarch64_expand_epilogue (bool for_sibcall)
}
/* Stack adjustment for exception handler. */
- if (crtl->calls_eh_return && !for_sibcall)
+ if (crtl->calls_eh_return && !sibcall)
{
/* We need to unwind the stack by the offset computed by
EH_RETURN_STACKADJ_RTX. We have already reset the CFA
@@ -10223,7 +10223,7 @@ aarch64_expand_epilogue (bool for_sibcall)
}
emit_use (gen_rtx_REG (DImode, LR_REGNUM));
- if (!for_sibcall)
+ if (!sibcall)
emit_jump_insn (ret_rtx);
}
@@ -28246,6 +28246,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_HAVE_SHADOW_CALL_STACK
#define TARGET_HAVE_SHADOW_CALL_STACK true
+#undef TARGET_EMIT_EPILOGUE_FOR_SIBCALL
+#define TARGET_EMIT_EPILOGUE_FOR_SIBCALL aarch64_expand_epilogue
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-aarch64.h"
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7267a74d6..a78476c8a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -871,16 +871,7 @@
[(clobber (const_int 0))]
""
"
- aarch64_expand_epilogue (false);
- DONE;
- "
-)
-
-(define_expand "sibcall_epilogue"
- [(clobber (const_int 0))]
- ""
- "
- aarch64_expand_epilogue (true);
+ aarch64_expand_epilogue (nullptr);
DONE;
"
)
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index d930d233d..369f4b8da 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11703,6 +11703,14 @@ the hook might return true if the prologue and epilogue need to switch
between instruction sets.
@end deftypefn
+@deftypefn {Target Hook} void TARGET_EMIT_EPILOGUE_FOR_SIBCALL (rtx_call_insn *@var{call})
+If defined, this hook emits an epilogue sequence for sibling (tail)
+call instruction @var{call}. Another way of providing epilogues
+for sibling calls is to define the @code{sibcall_epilogue} instruction
+pattern; the main advantage of this hook over the pattern is that it
+has access to the call instruction.
+@end deftypefn
+
@deftypefn {Target Hook} void TARGET_MACHINE_DEPENDENT_REORG (void)
If non-null, this hook performs a target-specific pass over the
instruction stream. The compiler will run it at all optimization levels,
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 19eabec48..748b0777a 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7710,6 +7710,8 @@ to by @var{ce_info}.
@hook TARGET_USE_LATE_PROLOGUE_EPILOGUE
+@hook TARGET_EMIT_EPILOGUE_FOR_SIBCALL
+
@hook TARGET_MACHINE_DEPENDENT_REORG
@hook TARGET_INIT_BUILTINS
diff --git a/gcc/function.cc b/gcc/function.cc
index 7c90b5f23..ddab43ca4 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6209,7 +6209,17 @@ thread_prologue_and_epilogue_insns (void)
if (!(CALL_P (insn) && SIBLING_CALL_P (insn)))
continue;
- if (rtx_insn *ep_seq = targetm.gen_sibcall_epilogue ())
+ rtx_insn *ep_seq;
+ if (targetm.emit_epilogue_for_sibcall)
+ {
+ start_sequence ();
+ targetm.emit_epilogue_for_sibcall (as_a<rtx_call_insn *> (insn));
+ ep_seq = get_insns ();
+ end_sequence ();
+ }
+ else
+ ep_seq = targetm.gen_sibcall_epilogue ();
+ if (ep_seq)
{
start_sequence ();
emit_note (NOTE_INSN_EPILOGUE_BEG);
@@ -6259,7 +6269,8 @@ reposition_prologue_and_epilogue_notes (void)
{
if (!targetm.have_prologue ()
&& !targetm.have_epilogue ()
- && !targetm.have_sibcall_epilogue ())
+ && !targetm.have_sibcall_epilogue ()
+ && !targetm.emit_epilogue_for_sibcall)
return;
/* Since the hash table is created on demand, the fact that it is
diff --git a/gcc/target.def b/gcc/target.def
index fd4899612..cf9f96eba 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4141,6 +4141,15 @@ between instruction sets.",
bool, (),
hook_bool_void_false)
+DEFHOOK
+(emit_epilogue_for_sibcall,
+ "If defined, this hook emits an epilogue sequence for sibling (tail)\n\
+call instruction @var{call}. Another way of providing epilogues\n\
+for sibling calls is to define the @code{sibcall_epilogue} instruction\n\
+pattern; the main advantage of this hook over the pattern is that it\n\
+has access to the call instruction.",
+ void, (rtx_call_insn *call), NULL)
+
/* Do machine-dependent code transformations. Called just before
delayed-branch scheduling. */
DEFHOOK
--
2.33.0

View File

@ -0,0 +1,461 @@
From 58adede22d9ff2368b5c24ec3fc0e53bd3ddc8bd Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 09:44:52 +0000
Subject: [PATCH 051/157] [Backport][SME] Add a new target hook:
TARGET_START_CALL_ARGS
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=672fad57c1f99ff893019e2da4620e26b9b31dd2
We have the following two hooks into the call expansion code:
- TARGET_CALL_ARGS is called for each argument before arguments
are moved into hard registers.
- TARGET_END_CALL_ARGS is called after the end of the call
sequence (specifically, after any return value has been
moved to a pseudo).
This patch adds a TARGET_START_CALL_ARGS hook that is called before
the TARGET_CALL_ARGS sequence. This means that TARGET_START_CALL_REGS
and TARGET_END_CALL_REGS bracket the region in which argument registers
might be live. They also bracket a region in which the only call
emiitted by target-independent code is the call to the target function
itself. (For example, TARGET_START_CALL_ARGS happens after any use of
memcpy to copy arguments, and TARGET_END_CALL_ARGS happens before any
use of memcpy to copy the result.)
Also, the patch adds the cumulative argument structure as an argument
to the hooks, so that the target can use it to record and retrieve
information about the call as a whole.
The TARGET_CALL_ARGS docs said:
While generating RTL for a function call, this target hook is invoked once
for each argument passed to the function, either a register returned by
``TARGET_FUNCTION_ARG`` or a memory location. It is called just
- before the point where argument registers are stored.
The last bit was true for normal calls, but for libcalls the hook was
invoked earlier, before stack arguments have been copied. I don't think
this caused a practical difference for nvptx (the only port to use the
hooks) since I wouldn't expect any libcalls to take stack parameters.
gcc/
* doc/tm.texi.in: Add TARGET_START_CALL_ARGS.
* doc/tm.texi: Regenerate.
* target.def (start_call_args): New hook.
(call_args, end_call_args): Add a parameter for the cumulative
argument information.
* hooks.h (hook_void_rtx_tree): Delete.
* hooks.cc (hook_void_rtx_tree): Likewise.
* targhooks.h (hook_void_CUMULATIVE_ARGS): Declare.
(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
* targhooks.cc (hook_void_CUMULATIVE_ARGS): New function.
(hook_void_CUMULATIVE_ARGS_rtx_tree): Likewise.
* calls.cc (expand_call): Call start_call_args before computing
and storing stack parameters. Pass the cumulative argument
information to call_args and end_call_args.
(emit_library_call_value_1): Likewise.
* config/nvptx/nvptx.cc (nvptx_call_args): Add a cumulative
argument parameter.
(nvptx_end_call_args): Likewise.
---
gcc/calls.cc | 61 +++++++++++++++++++++------------------
gcc/config/nvptx/nvptx.cc | 4 +--
gcc/doc/tm.texi | 53 +++++++++++++++++++++++++++-------
gcc/doc/tm.texi.in | 2 ++
gcc/hooks.cc | 5 ----
gcc/hooks.h | 1 -
gcc/target.def | 59 +++++++++++++++++++++++++++++--------
gcc/targhooks.cc | 10 +++++++
gcc/targhooks.h | 5 ++--
9 files changed, 140 insertions(+), 60 deletions(-)
diff --git a/gcc/calls.cc b/gcc/calls.cc
index c1db66883..4a8535cc6 100644
--- a/gcc/calls.cc
+++ b/gcc/calls.cc
@@ -3507,15 +3507,26 @@ expand_call (tree exp, rtx target, int ignore)
sibcall_failure = 1;
}
+ /* Set up the next argument register. For sibling calls on machines
+ with register windows this should be the incoming register. */
+ if (pass == 0)
+ next_arg_reg = targetm.calls.function_incoming_arg
+ (args_so_far, function_arg_info::end_marker ());
+ else
+ next_arg_reg = targetm.calls.function_arg
+ (args_so_far, function_arg_info::end_marker ());
+
+ targetm.calls.start_call_args (args_so_far);
+
bool any_regs = false;
for (i = 0; i < num_actuals; i++)
if (args[i].reg != NULL_RTX)
{
any_regs = true;
- targetm.calls.call_args (args[i].reg, funtype);
+ targetm.calls.call_args (args_so_far, args[i].reg, funtype);
}
if (!any_regs)
- targetm.calls.call_args (pc_rtx, funtype);
+ targetm.calls.call_args (args_so_far, pc_rtx, funtype);
/* Figure out the register where the value, if any, will come back. */
valreg = 0;
@@ -3578,15 +3589,6 @@ expand_call (tree exp, rtx target, int ignore)
later safely search backwards to find the CALL_INSN. */
before_call = get_last_insn ();
- /* Set up next argument register. For sibling calls on machines
- with register windows this should be the incoming register. */
- if (pass == 0)
- next_arg_reg = targetm.calls.function_incoming_arg
- (args_so_far, function_arg_info::end_marker ());
- else
- next_arg_reg = targetm.calls.function_arg
- (args_so_far, function_arg_info::end_marker ());
-
if (pass == 1 && (return_flags & ERF_RETURNS_ARG))
{
int arg_nr = return_flags & ERF_RETURN_ARG_MASK;
@@ -3879,7 +3881,7 @@ expand_call (tree exp, rtx target, int ignore)
for (i = 0; i < num_actuals; ++i)
free (args[i].aligned_regs);
- targetm.calls.end_call_args ();
+ targetm.calls.end_call_args (args_so_far);
insns = get_insns ();
end_sequence ();
@@ -4437,17 +4439,9 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
}
#endif
- /* When expanding a normal call, args are stored in push order,
- which is the reverse of what we have here. */
- bool any_regs = false;
- for (int i = nargs; i-- > 0; )
- if (argvec[i].reg != NULL_RTX)
- {
- targetm.calls.call_args (argvec[i].reg, NULL_TREE);
- any_regs = true;
- }
- if (!any_regs)
- targetm.calls.call_args (pc_rtx, NULL_TREE);
+ rtx call_cookie
+ = targetm.calls.function_arg (args_so_far,
+ function_arg_info::end_marker ());
/* Push the args that need to be pushed. */
@@ -4565,6 +4559,20 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
fun = prepare_call_address (NULL, fun, NULL, &call_fusage, 0, 0);
+ targetm.calls.start_call_args (args_so_far);
+
+ /* When expanding a normal call, args are stored in push order,
+ which is the reverse of what we have here. */
+ bool any_regs = false;
+ for (int i = nargs; i-- > 0; )
+ if (argvec[i].reg != NULL_RTX)
+ {
+ targetm.calls.call_args (args_so_far, argvec[i].reg, NULL_TREE);
+ any_regs = true;
+ }
+ if (!any_regs)
+ targetm.calls.call_args (args_so_far, pc_rtx, NULL_TREE);
+
/* Now load any reg parms into their regs. */
/* ARGNUM indexes the ARGVEC array in the order in which the arguments
@@ -4671,10 +4679,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
get_identifier (XSTR (orgfun, 0)),
build_function_type (tfom, NULL_TREE),
original_args_size.constant, args_size.constant,
- struct_value_size,
- targetm.calls.function_arg (args_so_far,
- function_arg_info::end_marker ()),
- valreg,
+ struct_value_size, call_cookie, valreg,
old_inhibit_defer_pop + 1, call_fusage, flags, args_so_far);
if (flag_ipa_ra)
@@ -4694,7 +4699,7 @@ emit_library_call_value_1 (int retval, rtx orgfun, rtx value,
valreg = gen_rtx_REG (TYPE_MODE (tfom), REGNO (valreg));
}
- targetm.calls.end_call_args ();
+ targetm.calls.end_call_args (args_so_far);
/* For calls to `setjmp', etc., inform function.cc:setjmp_warnings
that it should complain if nonvolatile values are live. For
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 3634a49de..7f2103ba6 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -1780,7 +1780,7 @@ nvptx_get_drap_rtx (void)
argument to the next call. */
static void
-nvptx_call_args (rtx arg, tree fntype)
+nvptx_call_args (cumulative_args_t, rtx arg, tree fntype)
{
if (!cfun->machine->doing_call)
{
@@ -1808,7 +1808,7 @@ nvptx_call_args (rtx arg, tree fntype)
information we recorded. */
static void
-nvptx_end_call_args (void)
+nvptx_end_call_args (cumulative_args_t)
{
cfun->machine->doing_call = false;
free_EXPR_LIST_list (&cfun->machine->call_args);
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 369f4b8da..357c29a4d 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -5392,26 +5392,59 @@ except the last are treated as named.
You need not define this hook if it always returns @code{false}.
@end deftypefn
-@deftypefn {Target Hook} void TARGET_CALL_ARGS (rtx, @var{tree})
+@deftypefn {Target Hook} void TARGET_START_CALL_ARGS (cumulative_args_t @var{complete_args})
+This target hook is invoked while generating RTL for a function call,
+after the argument values have been computed, and after stack arguments
+have been initialized, but before register arguments have been moved into
+their ABI-defined hard register locations. It precedes calls to the related
+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.
+The significance of this position in the call expansion is that:
+
+@itemize @bullet
+@item
+No argument registers are live.
+@item
+Although a call sequence can in general involve subcalls (such as using
+@code{memcpy} to copy large arguments), no such subcall will occur between
+the call to this hook and the generation of the main call instruction.
+@end itemize
+
+The single argument @var{complete_args} is the state of the target
+function's cumulative argument information after the final call to
+@code{TARGET_FUNCTION_ARG}.
+
+The hook can be used for things like switching processor mode, in cases
+where different calls need different processor modes. Most ports do not
+need to implement anything for this hook.
+@end deftypefn
+
+@deftypefn {Target Hook} void TARGET_CALL_ARGS (cumulative_args_t @var{complete_args}, rtx @var{loc}, tree @var{type})
While generating RTL for a function call, this target hook is invoked once
for each argument passed to the function, either a register returned by
@code{TARGET_FUNCTION_ARG} or a memory location. It is called just
-before the point where argument registers are stored. The type of the
-function to be called is also passed as the second argument; it is
-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is
-invoked just after the code to copy the return reg has been emitted.
-This functionality can be used to perform special setup of call argument
-registers if a target needs it.
+before the point where argument registers are stored.
+
+@var{complete_args} is the state of the target function's cumulative
+argument information after the final call to @code{TARGET_FUNCTION_ARG}.
+@var{loc} is the location of the argument. @var{type} is the type of
+the function being called, or @code{NULL_TREE} for libcalls.
+
For functions without arguments, the hook is called once with @code{pc_rtx}
passed instead of an argument register.
-Most ports do not need to implement anything for this hook.
+
+This functionality can be used to perform special setup of call argument
+registers, if a target needs it. Most ports do not need to implement
+anything for this hook.
@end deftypefn
-@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (void)
+@deftypefn {Target Hook} void TARGET_END_CALL_ARGS (cumulative_args_t @var{complete_args})
This target hook is invoked while generating RTL for a function call,
just after the point where the return reg is copied into a pseudo. It
signals that all the call argument and return registers for the just
-emitted call are now no longer in use.
+emitted call are now no longer in use. @var{complete_args} is the
+state of the target function's cumulative argument information after
+the final call to @code{TARGET_FUNCTION_ARG}.
+
Most ports do not need to implement anything for this hook.
@end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 748b0777a..4ebc9afbf 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -3774,6 +3774,8 @@ These machine description macros help implement varargs:
@hook TARGET_STRICT_ARGUMENT_NAMING
+@hook TARGET_START_CALL_ARGS
+
@hook TARGET_CALL_ARGS
@hook TARGET_END_CALL_ARGS
diff --git a/gcc/hooks.cc b/gcc/hooks.cc
index b29233f4f..0f4e7ce10 100644
--- a/gcc/hooks.cc
+++ b/gcc/hooks.cc
@@ -280,11 +280,6 @@ hook_void_FILEptr_tree (FILE *, tree)
{
}
-void
-hook_void_rtx_tree (rtx, tree)
-{
-}
-
void
hook_void_constcharptr (const char *)
{
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 1056e1e9e..e2a742f43 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -83,7 +83,6 @@ extern void hook_void_FILEptr_constcharptr (FILE *, const char *);
extern void hook_void_FILEptr_constcharptr_const_tree (FILE *, const char *,
const_tree);
extern bool hook_bool_FILEptr_rtx_false (FILE *, rtx);
-extern void hook_void_rtx_tree (rtx, tree);
extern void hook_void_FILEptr_tree (FILE *, tree);
extern void hook_void_tree (tree);
extern void hook_void_tree_treeptr (tree, tree *);
diff --git a/gcc/target.def b/gcc/target.def
index cf9f96eba..a57e51b0d 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4784,32 +4784,67 @@ not generate any instructions in this case.",
int *pretend_args_size, int second_time),
default_setup_incoming_varargs)
+DEFHOOK
+(start_call_args,
+ "This target hook is invoked while generating RTL for a function call,\n\
+after the argument values have been computed, and after stack arguments\n\
+have been initialized, but before register arguments have been moved into\n\
+their ABI-defined hard register locations. It precedes calls to the related\n\
+hooks @code{TARGET_CALL_ARGS} and @code{TARGET_END_CALL_ARGS}.\n\
+The significance of this position in the call expansion is that:\n\
+\n\
+@itemize @bullet\n\
+@item\n\
+No argument registers are live.\n\
+@item\n\
+Although a call sequence can in general involve subcalls (such as using\n\
+@code{memcpy} to copy large arguments), no such subcall will occur between\n\
+the call to this hook and the generation of the main call instruction.\n\
+@end itemize\n\
+\n\
+The single argument @var{complete_args} is the state of the target\n\
+function's cumulative argument information after the final call to\n\
+@code{TARGET_FUNCTION_ARG}.\n\
+\n\
+The hook can be used for things like switching processor mode, in cases\n\
+where different calls need different processor modes. Most ports do not\n\
+need to implement anything for this hook.",
+ void, (cumulative_args_t complete_args),
+ hook_void_CUMULATIVE_ARGS)
+
DEFHOOK
(call_args,
"While generating RTL for a function call, this target hook is invoked once\n\
for each argument passed to the function, either a register returned by\n\
@code{TARGET_FUNCTION_ARG} or a memory location. It is called just\n\
-before the point where argument registers are stored. The type of the\n\
-function to be called is also passed as the second argument; it is\n\
-@code{NULL_TREE} for libcalls. The @code{TARGET_END_CALL_ARGS} hook is\n\
-invoked just after the code to copy the return reg has been emitted.\n\
-This functionality can be used to perform special setup of call argument\n\
-registers if a target needs it.\n\
+before the point where argument registers are stored.\n\
+\n\
+@var{complete_args} is the state of the target function's cumulative\n\
+argument information after the final call to @code{TARGET_FUNCTION_ARG}.\n\
+@var{loc} is the location of the argument. @var{type} is the type of\n\
+the function being called, or @code{NULL_TREE} for libcalls.\n\
+\n\
For functions without arguments, the hook is called once with @code{pc_rtx}\n\
passed instead of an argument register.\n\
-Most ports do not need to implement anything for this hook.",
- void, (rtx, tree),
- hook_void_rtx_tree)
+\n\
+This functionality can be used to perform special setup of call argument\n\
+registers, if a target needs it. Most ports do not need to implement\n\
+anything for this hook.",
+ void, (cumulative_args_t complete_args, rtx loc, tree type),
+ hook_void_CUMULATIVE_ARGS_rtx_tree)
DEFHOOK
(end_call_args,
"This target hook is invoked while generating RTL for a function call,\n\
just after the point where the return reg is copied into a pseudo. It\n\
signals that all the call argument and return registers for the just\n\
-emitted call are now no longer in use.\n\
+emitted call are now no longer in use. @var{complete_args} is the\n\
+state of the target function's cumulative argument information after\n\
+the final call to @code{TARGET_FUNCTION_ARG}.\n\
+\n\
Most ports do not need to implement anything for this hook.",
- void, (void),
- hook_void_void)
+ void, (cumulative_args_t complete_args),
+ hook_void_CUMULATIVE_ARGS)
DEFHOOK
(push_argument,
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index 399d6f874..c88afa5db 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -772,12 +772,22 @@ hook_int_CUMULATIVE_ARGS_arg_info_0 (cumulative_args_t,
return 0;
}
+void
+hook_void_CUMULATIVE_ARGS (cumulative_args_t)
+{
+}
+
void
hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t ca ATTRIBUTE_UNUSED,
tree ATTRIBUTE_UNUSED)
{
}
+void
+hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree)
+{
+}
+
/* Default implementation of TARGET_PUSH_ARGUMENT. */
bool
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index ecce55ebe..c6e12fc2e 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -138,8 +138,9 @@ extern bool hook_bool_CUMULATIVE_ARGS_arg_info_true
(cumulative_args_t, const function_arg_info &);
extern int hook_int_CUMULATIVE_ARGS_arg_info_0
(cumulative_args_t, const function_arg_info &);
-extern void hook_void_CUMULATIVE_ARGS_tree
- (cumulative_args_t, tree);
+extern void hook_void_CUMULATIVE_ARGS (cumulative_args_t);
+extern void hook_void_CUMULATIVE_ARGS_tree (cumulative_args_t, tree);
+extern void hook_void_CUMULATIVE_ARGS_rtx_tree (cumulative_args_t, rtx, tree);
extern const char *hook_invalid_arg_for_unprototyped_fn
(const_tree, const_tree, const_tree);
extern void default_function_arg_advance
--
2.33.0

View File

@ -0,0 +1,490 @@
From 8684458c3faf358e5a15dfb73b4ef632341ddf0a Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 09:52:41 +0000
Subject: [PATCH 052/157] [Backport][SME] Allow targets to add USEs to asms
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=414d795d8a365b6e72a84257caa36cb3bed7e0ba
Arm's SME has an array called ZA that for inline asm purposes
is effectively a form of special-purpose memory. It doesn't
have an associated storage type and so can't be passed and
returned in normal C/C++ objects.
We'd therefore like "za" in a clobber list to mean that an inline
asm can read from and write to ZA. (Just reading or writing
individually is unlikely to be useful, but we could add syntax
for that too if necessary.)
There is currently a TARGET_MD_ASM_ADJUST target hook that allows
targets to add clobbers to an asm instruction. This patch
extends that to allow targets to add USEs as well.
gcc/
* target.def (md_asm_adjust): Add a uses parameter.
* doc/tm.texi: Regenerate.
* cfgexpand.cc (expand_asm_loc): Update call to md_asm_adjust.
Handle any USEs created by the target.
(expand_asm_stmt): Likewise.
* recog.cc (asm_noperands): Handle asms with USEs.
(decode_asm_operands): Likewise.
* config/arm/aarch-common-protos.h (arm_md_asm_adjust): Add uses
parameter.
* config/arm/aarch-common.cc (arm_md_asm_adjust): Likewise.
* config/arm/arm.cc (thumb1_md_asm_adjust): Likewise.
* config/avr/avr.cc (avr_md_asm_adjust): Likewise.
* config/cris/cris.cc (cris_md_asm_adjust): Likewise.
* config/i386/i386.cc (ix86_md_asm_adjust): Likewise.
* config/mn10300/mn10300.cc (mn10300_md_asm_adjust): Likewise.
* config/nds32/nds32.cc (nds32_md_asm_adjust): Likewise.
* config/pdp11/pdp11.cc (pdp11_md_asm_adjust): Likewise.
* config/rs6000/rs6000.cc (rs6000_md_asm_adjust): Likewise.
* config/s390/s390.cc (s390_md_asm_adjust): Likewise.
* config/vax/vax.cc (vax_md_asm_adjust): Likewise.
* config/visium/visium.cc (visium_md_asm_adjust): Likewise.
---
gcc/cfgexpand.cc | 37 +++++++++++++++++++---------
gcc/config/arm/aarch-common-protos.h | 2 +-
gcc/config/arm/aarch-common.cc | 3 ++-
gcc/config/arm/arm.cc | 5 ++--
gcc/config/avr/avr.cc | 1 +
gcc/config/cris/cris.cc | 6 +++--
gcc/config/i386/i386.cc | 5 ++--
gcc/config/mn10300/mn10300.cc | 3 ++-
gcc/config/nds32/nds32.cc | 4 +--
gcc/config/pdp11/pdp11.cc | 6 +++--
gcc/config/rs6000/rs6000.cc | 3 ++-
gcc/config/s390/s390.cc | 3 ++-
gcc/config/vax/vax.cc | 4 ++-
gcc/config/visium/visium.cc | 5 ++--
gcc/doc/tm.texi | 5 ++--
gcc/recog.cc | 20 ++++++++++-----
gcc/target.def | 5 ++--
17 files changed, 77 insertions(+), 40 deletions(-)
diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 4691355aa..5401a4ebd 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -2873,6 +2873,7 @@ expand_asm_loc (tree string, int vol, location_t locus)
auto_vec<rtx> input_rvec, output_rvec;
auto_vec<machine_mode> input_mode;
auto_vec<const char *> constraints;
+ auto_vec<rtx> use_rvec;
auto_vec<rtx> clobber_rvec;
HARD_REG_SET clobbered_regs;
CLEAR_HARD_REG_SET (clobbered_regs);
@@ -2882,16 +2883,20 @@ expand_asm_loc (tree string, int vol, location_t locus)
if (targetm.md_asm_adjust)
targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
- constraints, clobber_rvec, clobbered_regs,
- locus);
+ constraints, use_rvec, clobber_rvec,
+ clobbered_regs, locus);
asm_op = body;
nclobbers = clobber_rvec.length ();
- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nclobbers));
+ auto nuses = use_rvec.length ();
+ body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (1 + nuses + nclobbers));
- XVECEXP (body, 0, 0) = asm_op;
- for (i = 0; i < nclobbers; i++)
- XVECEXP (body, 0, i + 1) = gen_rtx_CLOBBER (VOIDmode, clobber_rvec[i]);
+ i = 0;
+ XVECEXP (body, 0, i++) = asm_op;
+ for (rtx use : use_rvec)
+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
+ for (rtx clobber : clobber_rvec)
+ XVECEXP (body, 0, i++) = gen_rtx_CLOBBER (VOIDmode, clobber);
}
emit_insn (body);
@@ -3443,11 +3448,12 @@ expand_asm_stmt (gasm *stmt)
maintaining source-level compatibility means automatically clobbering
the flags register. */
rtx_insn *after_md_seq = NULL;
+ auto_vec<rtx> use_rvec;
if (targetm.md_asm_adjust)
after_md_seq
= targetm.md_asm_adjust (output_rvec, input_rvec, input_mode,
- constraints, clobber_rvec, clobbered_regs,
- locus);
+ constraints, use_rvec, clobber_rvec,
+ clobbered_regs, locus);
/* Do not allow the hook to change the output and input count,
lest it mess up the operand numbering. */
@@ -3455,7 +3461,8 @@ expand_asm_stmt (gasm *stmt)
gcc_assert (input_rvec.length() == ninputs);
gcc_assert (constraints.length() == noutputs + ninputs);
- /* But it certainly can adjust the clobbers. */
+ /* But it certainly can adjust the uses and clobbers. */
+ unsigned nuses = use_rvec.length ();
unsigned nclobbers = clobber_rvec.length ();
/* Third pass checks for easy conflicts. */
@@ -3527,7 +3534,7 @@ expand_asm_stmt (gasm *stmt)
ARGVEC CONSTRAINTS OPNAMES))
If there is more than one, put them inside a PARALLEL. */
- if (noutputs == 0 && nclobbers == 0)
+ if (noutputs == 0 && nuses == 0 && nclobbers == 0)
{
/* No output operands: put in a raw ASM_OPERANDS rtx. */
if (nlabels > 0)
@@ -3535,7 +3542,7 @@ expand_asm_stmt (gasm *stmt)
else
emit_insn (body);
}
- else if (noutputs == 1 && nclobbers == 0)
+ else if (noutputs == 1 && nuses == 0 && nclobbers == 0)
{
ASM_OPERANDS_OUTPUT_CONSTRAINT (body) = constraints[0];
if (nlabels > 0)
@@ -3551,7 +3558,8 @@ expand_asm_stmt (gasm *stmt)
if (num == 0)
num = 1;
- body = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num + nclobbers));
+ body = gen_rtx_PARALLEL (VOIDmode,
+ rtvec_alloc (num + nuses + nclobbers));
/* For each output operand, store a SET. */
for (i = 0; i < noutputs; ++i)
@@ -3578,6 +3586,11 @@ expand_asm_stmt (gasm *stmt)
if (i == 0)
XVECEXP (body, 0, i++) = obody;
+ /* Add the uses specified by the target hook. No checking should
+ be needed since this doesn't come directly from user code. */
+ for (rtx use : use_rvec)
+ XVECEXP (body, 0, i++) = gen_rtx_USE (VOIDmode, use);
+
/* Store (clobber REG) for each clobbered register specified. */
for (unsigned j = 0; j < nclobbers; ++j)
{
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index ae0465159..3b525c174 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -149,7 +149,7 @@ struct cpu_cost_table
rtx_insn *arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints,
+ vec<const char *> &constraints, vec<rtx> &,
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
location_t loc);
diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
index 04a53d750..365cfc140 100644
--- a/gcc/config/arm/aarch-common.cc
+++ b/gcc/config/arm/aarch-common.cc
@@ -533,7 +533,8 @@ arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
rtx_insn *
arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ vec<const char *> &constraints,
+ vec<rtx> & /*uses*/, vec<rtx> & /*clobbers*/,
HARD_REG_SET & /*clobbered_regs*/, location_t loc)
{
bool saw_asm_flag = false;
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index b700c23b8..c72e9c0b0 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -325,7 +325,7 @@ static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
static rtx_insn *thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &,
vec<const char *> &, vec<rtx> &,
- HARD_REG_SET &, location_t);
+ vec<rtx> &, HARD_REG_SET &, location_t);
static const char *arm_identify_fpu_from_isa (sbitmap);
/* Table of machine attributes. */
@@ -34209,7 +34209,8 @@ arm_stack_protect_guard (void)
rtx_insn *
thumb1_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ vec<const char *> &constraints,
+ vec<rtx> &, vec<rtx> & /*clobbers*/,
HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
{
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index 4ed390e4c..1b5a95410 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -14497,6 +14497,7 @@ static rtx_insn *
avr_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
vec<machine_mode> & /*input_modes*/,
vec<const char *> &/*constraints*/,
+ vec<rtx> &/*uses*/,
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
location_t /*loc*/)
{
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index f0017d630..3a1c85481 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -151,7 +151,8 @@ static void cris_function_arg_advance (cumulative_args_t,
const function_arg_info &);
static rtx_insn *cris_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &, vec<const char *> &,
- vec<rtx> &, HARD_REG_SET &, location_t);
+ vec<rtx> &, vec<rtx> &,
+ HARD_REG_SET &, location_t);
static void cris_option_override (void);
@@ -3506,7 +3507,8 @@ cris_function_arg_advance (cumulative_args_t ca_v,
static rtx_insn *
cris_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints, vec<rtx> &clobbers,
+ vec<const char *> &constraints,
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
/* For the time being, all asms clobber condition codes.
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 593185fa6..83a0d8abb 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -22252,8 +22252,9 @@ ix86_c_mode_for_suffix (char suffix)
static rtx_insn *
ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> &constraints, vec<rtx> &clobbers,
- HARD_REG_SET &clobbered_regs, location_t loc)
+ vec<const char *> &constraints, vec<rtx> &/*uses*/,
+ vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
+ location_t loc)
{
bool saw_asm_flag = false;
diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc
index 2a58dd925..2ca2c769c 100644
--- a/gcc/config/mn10300/mn10300.cc
+++ b/gcc/config/mn10300/mn10300.cc
@@ -2849,7 +2849,8 @@ mn10300_conditional_register_usage (void)
static rtx_insn *
mn10300_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ vec<const char *> & /*constraints*/,
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REG));
diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc
index 71fe9e8bc..27530495f 100644
--- a/gcc/config/nds32/nds32.cc
+++ b/gcc/config/nds32/nds32.cc
@@ -4199,8 +4199,8 @@ nds32_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
vec<rtx> &inputs ATTRIBUTE_UNUSED,
vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
vec<const char *> &constraints ATTRIBUTE_UNUSED,
- vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
- location_t /*loc*/)
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
+ HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
if (!flag_inline_asm_r15)
{
diff --git a/gcc/config/pdp11/pdp11.cc b/gcc/config/pdp11/pdp11.cc
index 380223439..25cf62cbc 100644
--- a/gcc/config/pdp11/pdp11.cc
+++ b/gcc/config/pdp11/pdp11.cc
@@ -155,7 +155,8 @@ static int pdp11_addr_cost (rtx, machine_mode, addr_space_t, bool);
static int pdp11_insn_cost (rtx_insn *insn, bool speed);
static rtx_insn *pdp11_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &, vec<const char *> &,
- vec<rtx> &, HARD_REG_SET &, location_t);
+ vec<rtx> &, vec<rtx> &,
+ HARD_REG_SET &, location_t);
static bool pdp11_return_in_memory (const_tree, const_tree);
static rtx pdp11_function_value (const_tree, const_tree, bool);
static rtx pdp11_libcall_value (machine_mode, const_rtx);
@@ -2137,7 +2138,8 @@ pdp11_cmp_length (rtx *operands, int words)
static rtx_insn *
pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ vec<const char *> & /*constraints*/,
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
clobbers.safe_push (gen_rtx_REG (CCmode, CC_REGNUM));
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 0b75861bb..55d4ce751 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -3443,7 +3443,8 @@ rs6000_builtin_mask_calculate (void)
static rtx_insn *
rs6000_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ vec<const char *> & /*constraints*/,
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index ae0cf9ef5..f1599a5c5 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -16994,7 +16994,8 @@ s390_hard_fp_reg_p (rtx x)
static rtx_insn *
s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
vec<machine_mode> &input_modes,
- vec<const char *> &constraints, vec<rtx> & /*clobbers*/,
+ vec<const char *> &constraints,
+ vec<rtx> &/*uses*/, vec<rtx> &/*clobbers*/,
HARD_REG_SET & /*clobbered_regs*/, location_t /*loc*/)
{
if (!TARGET_VXE)
diff --git a/gcc/config/vax/vax.cc b/gcc/config/vax/vax.cc
index 28c1af59a..7673a1428 100644
--- a/gcc/config/vax/vax.cc
+++ b/gcc/config/vax/vax.cc
@@ -57,7 +57,8 @@ static bool vax_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static machine_mode vax_cc_modes_compatible (machine_mode, machine_mode);
static rtx_insn *vax_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &, vec<const char *> &,
- vec<rtx> &, HARD_REG_SET &, location_t);
+ vec<rtx> &, vec<rtx> &, HARD_REG_SET &,
+ location_t);
static rtx vax_function_arg (cumulative_args_t, const function_arg_info &);
static void vax_function_arg_advance (cumulative_args_t,
const function_arg_info &);
@@ -1179,6 +1180,7 @@ vax_md_asm_adjust (vec<rtx> &outputs ATTRIBUTE_UNUSED,
vec<rtx> &inputs ATTRIBUTE_UNUSED,
vec<machine_mode> &input_modes ATTRIBUTE_UNUSED,
vec<const char *> &constraints ATTRIBUTE_UNUSED,
+ vec<rtx> &/*uses*/,
vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
location_t /*loc*/)
{
diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc
index 03c1a33e1..35b46ced9 100644
--- a/gcc/config/visium/visium.cc
+++ b/gcc/config/visium/visium.cc
@@ -190,7 +190,7 @@ static tree visium_build_builtin_va_list (void);
static rtx_insn *visium_md_asm_adjust (vec<rtx> &, vec<rtx> &,
vec<machine_mode> &,
vec<const char *> &, vec<rtx> &,
- HARD_REG_SET &, location_t);
+ vec<rtx> &, HARD_REG_SET &, location_t);
static bool visium_legitimate_constant_p (machine_mode, rtx);
@@ -794,7 +794,8 @@ visium_conditional_register_usage (void)
static rtx_insn *
visium_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
vec<machine_mode> & /*input_modes*/,
- vec<const char *> & /*constraints*/, vec<rtx> &clobbers,
+ vec<const char *> & /*constraints*/,
+ vec<rtx> &/*uses*/, vec<rtx> &clobbers,
HARD_REG_SET &clobbered_regs, location_t /*loc*/)
{
clobbers.safe_push (gen_rtx_REG (CCmode, FLAGS_REGNUM));
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 357c29a4d..4f93facf7 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11626,10 +11626,11 @@ from shared libraries (DLLs).
You need not define this macro if it would always evaluate to zero.
@end defmac
-@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc})
+@deftypefn {Target Hook} {rtx_insn *} TARGET_MD_ASM_ADJUST (vec<rtx>& @var{outputs}, vec<rtx>& @var{inputs}, vec<machine_mode>& @var{input_modes}, vec<const char *>& @var{constraints}, vec<rtx>& @var{usess}, vec<rtx>& @var{clobbers}, HARD_REG_SET& @var{clobbered_regs}, location_t @var{loc})
This target hook may add @dfn{clobbers} to @var{clobbers} and
@var{clobbered_regs} for any hard regs the port wishes to automatically
-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected
+clobber for an asm. It can also add hard registers that are used by the
+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected
to avoid clobbering a register that is already used by the asm. @var{loc}
is the source location of the asm.
diff --git a/gcc/recog.cc b/gcc/recog.cc
index cd2410ab2..5b81d5e21 100644
--- a/gcc/recog.cc
+++ b/gcc/recog.cc
@@ -1977,13 +1977,17 @@ asm_noperands (const_rtx body)
{
/* Multiple output operands, or 1 output plus some clobbers:
body is
- [(set OUTPUT (asm_operands ...))... (clobber (reg ...))...]. */
- /* Count backwards through CLOBBERs to determine number of SETs. */
+ [(set OUTPUT (asm_operands ...))...
+ (use (reg ...))...
+ (clobber (reg ...))...]. */
+ /* Count backwards through USEs and CLOBBERs to determine
+ number of SETs. */
for (i = XVECLEN (body, 0); i > 0; i--)
{
if (GET_CODE (XVECEXP (body, 0, i - 1)) == SET)
break;
- if (GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER)
+ if (GET_CODE (XVECEXP (body, 0, i - 1)) != USE
+ && GET_CODE (XVECEXP (body, 0, i - 1)) != CLOBBER)
return -1;
}
@@ -2010,10 +2014,13 @@ asm_noperands (const_rtx body)
else
{
/* 0 outputs, but some clobbers:
- body is [(asm_operands ...) (clobber (reg ...))...]. */
+ body is [(asm_operands ...)
+ (use (reg ...))...
+ (clobber (reg ...))...]. */
/* Make sure all the other parallel things really are clobbers. */
for (i = XVECLEN (body, 0) - 1; i > 0; i--)
- if (GET_CODE (XVECEXP (body, 0, i)) != CLOBBER)
+ if (GET_CODE (XVECEXP (body, 0, i)) != USE
+ && GET_CODE (XVECEXP (body, 0, i)) != CLOBBER)
return -1;
}
}
@@ -2080,7 +2087,8 @@ decode_asm_operands (rtx body, rtx *operands, rtx **operand_locs,
the SETs. Their constraints are in the ASM_OPERANDS itself. */
for (i = 0; i < nparallel; i++)
{
- if (GET_CODE (XVECEXP (body, 0, i)) == CLOBBER)
+ if (GET_CODE (XVECEXP (body, 0, i)) == USE
+ || GET_CODE (XVECEXP (body, 0, i)) == CLOBBER)
break; /* Past last SET */
gcc_assert (GET_CODE (XVECEXP (body, 0, i)) == SET);
if (operands)
diff --git a/gcc/target.def b/gcc/target.def
index a57e51b0d..60096c60c 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4309,7 +4309,8 @@ DEFHOOK
(md_asm_adjust,
"This target hook may add @dfn{clobbers} to @var{clobbers} and\n\
@var{clobbered_regs} for any hard regs the port wishes to automatically\n\
-clobber for an asm. The @var{outputs} and @var{inputs} may be inspected\n\
+clobber for an asm. It can also add hard registers that are used by the\n\
+asm to @var{uses}. The @var{outputs} and @var{inputs} may be inspected\n\
to avoid clobbering a register that is already used by the asm. @var{loc}\n\
is the source location of the asm.\n\
\n\
@@ -4320,7 +4321,7 @@ changes to @var{inputs} must be accompanied by the corresponding changes\n\
to @var{input_modes}.",
rtx_insn *,
(vec<rtx>& outputs, vec<rtx>& inputs, vec<machine_mode>& input_modes,
- vec<const char *>& constraints, vec<rtx>& clobbers,
+ vec<const char *>& constraints, vec<rtx>& usess, vec<rtx>& clobbers,
HARD_REG_SET& clobbered_regs, location_t loc),
NULL)
--
2.33.0

View File

@ -0,0 +1,998 @@
From 763db5ed42e18cdddf979dda82056345e3af15ed Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Mon, 19 Jun 2023 15:47:46 +0100
Subject: [PATCH 053/157] [Backport][SME] New compact syntax for insn and
insn_split in Machine Descriptions.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=957ae90406591739b68e95ad49a0232faeb74217
This patch adds support for a compact syntax for specifying constraints in
instruction patterns. Credit for the idea goes to Richard Earnshaw.
With this new syntax we want a clean break from the current limitations to make
something that is hopefully easier to use and maintain.
The idea behind this compact syntax is that often times it's quite hard to
correlate the entries in the constrains list, attributes and instruction lists.
One has to count and this often is tedious. Additionally when changing a single
line in the insn multiple lines in a diff change, making it harder to see what's
going on.
This new syntax takes into account many of the common things that are done in MD
files. It's also worth saying that this version is intended to deal with the
common case of a string based alternatives. For C chunks we have some ideas
but those are not intended to be addressed here.
It's easiest to explain with an example:
normal syntax:
(define_insn_and_split "*movsi_aarch64"
[(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w")
(match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
"@
mov\\t%w0, %w1
mov\\t%w0, %w1
mov\\t%w0, %w1
mov\\t%w0, %1
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%w0, %1
ldr\\t%s0, %1
str\\t%w1, %0
str\\t%s1, %0
adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
adr\\t%x0, %c1
adrp\\t%x0, %A1
fmov\\t%s0, %w1
fmov\\t%w0, %s1
fmov\\t%s0, %s1
* return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
"{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}"
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
(set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
(set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
]
)
New syntax:
(define_insn_and_split "*movsi_aarch64"
[(set (match_operand:SI 0 "nonimmediate_operand")
(match_operand:SI 1 "aarch64_mov_operand"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
{@ [cons: =0, 1; attrs: type, arch, length]
[r , r ; mov_reg , * , 4] mov\t%w0, %w1
[k , r ; mov_reg , * , 4] ^
[r , k ; mov_reg , * , 4] ^
[r , M ; mov_imm , * , 4] mov\t%w0, %1
[r , n ; mov_imm , * ,16] #
/* The "mov_imm" type for CNT is just a placeholder. */
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
[r , m ; load_4 , * , 4] ldr\t%w0, %1
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
[m , rZ ; store_4 , * , 4] str\t%w1, %0
[m , w ; store_4 , fp , 4] str\t%s1, %0
[r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
[r , Usa; adr , * , 4] adr\t%x0, %c1
[r , Ush; adr , * , 4] adrp\t%x0, %A1
[w , rZ ; f_mcr , fp , 4] fmov\t%s0, %w1
[r , w ; f_mrc , fp , 4] fmov\t%w0, %s1
[w , w ; fmov , fp , 4] fmov\t%s0, %s1
[w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
}
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
[(const_int 0)]
{
aarch64_expand_mov_immediate (operands[0], operands[1]);
DONE;
}
)
The main syntax rules are as follows (See docs for full rules):
- Template must start with "{@" and end with "}" to use the new syntax.
- "{@" is followed by a layout in parentheses which is "cons:" followed by
a list of match_operand/match_scratch IDs, then a semicolon, then the
same for attributes ("attrs:"). Both sections are optional (so you can
use only cons, or only attrs, or both), and cons must come before attrs
if present.
- Each alternative begins with any amount of whitespace.
- Following the whitespace is a comma-separated list of constraints and/or
attributes within brackets [], with sections separated by a semicolon.
- Following the closing ']' is any amount of whitespace, and then the actual
asm output.
- Spaces are allowed in the list (they will simply be removed).
- All alternatives should be specified: a blank list should be
"[,,]", "[,,;,]" etc., not "[]" or "" (however genattr may segfault if
you leave certain attributes empty, I have found).
- The actual constraint string in the match_operand or match_scratch, and
the attribute string in the set_attr, must be blank or an empty string
(you can't combine the old and new syntaxes).
- The common idion * return can be shortened by using <<.
- Any unexpanded iterators left during processing will result in an error at
compile time. If for some reason <> is needed in the output then these
must be escaped using \.
- Within an {@ block both multiline and singleline C comments are allowed, but
when used outside of a C block they must be the only non-whitespace blocks on
the line
- Inside an {@ block any unexpanded iterators will result in a compile time
fault instead of incorrect assembly being generated at runtime. If the
literal <> is needed in the output this needs to be escaped with \<\>.
- This check is not performed inside C blocks (lines starting with *).
- Instead of copying the previous instruction again in the next pattern, one
can use ^ to refer to the previous asm string.
This patch works by blindly transforming the new syntax into the old syntax,
so it doesn't do extensive checking. However, it does verify that:
- The correct number of constraints/attributes are specified.
- You haven't mixed old and new syntax.
- The specified operand IDs/attribute names actually exist.
- You don't have duplicate cons
If something goes wrong, it may write invalid constraints/attributes/template
back into the rtx. But this shouldn't matter because error_at will cause the
program to fail on exit anyway.
Because this transformation occurs as early as possible (before patterns are
queued), the rest of the compiler can completely ignore the new syntax and
assume that the old syntax will always be used.
This doesn't seem to have any measurable effect on the runtime of gen*
programs.
gcc/ChangeLog:
* gensupport.cc (class conlist, add_constraints, add_attributes,
skip_spaces, expect_char, preprocess_compact_syntax,
parse_section_layout, parse_section, convert_syntax): New.
(process_rtx): Check for conversion.
* genoutput.cc (process_template): Check for unresolved iterators.
(class data): Add compact_syntax_p.
(gen_insn): Use it.
* gensupport.h (compact_syntax): New.
(hash-set.h): Include.
* doc/md.texi: Document it.
Co-Authored-By: Omar Tahir <Omar.Tahir2@arm.com>
---
gcc/doc/md.texi | 163 +++++++++++++++
gcc/genoutput.cc | 48 ++++-
gcc/gensupport.cc | 498 ++++++++++++++++++++++++++++++++++++++++++++++
gcc/gensupport.h | 3 +
4 files changed, 709 insertions(+), 3 deletions(-)
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 3b544358b..04ace8f7f 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -27,6 +27,7 @@ See the next chapter for information on the C header file.
from such an insn.
* Output Statement:: For more generality, write C code to output
the assembler code.
+* Compact Syntax:: Compact syntax for writing machine descriptors.
* Predicates:: Controlling what kinds of operands can be used
for an insn.
* Constraints:: Fine-tuning operand selection.
@@ -713,6 +714,168 @@ you can use @samp{*} inside of a @samp{@@} multi-alternative template:
@end group
@end smallexample
+@node Compact Syntax
+@section Compact Syntax
+@cindex compact syntax
+
+When a @code{define_insn} or @code{define_insn_and_split} has multiple
+alternatives it may be beneficial to use the compact syntax when specifying
+alternatives.
+
+This syntax puts the constraints and attributes on the same horizontal line as
+the instruction assembly template.
+
+As an example
+
+@smallexample
+@group
+(define_insn_and_split ""
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r")
+ (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv"))]
+ ""
+ "@@
+ mov\\t%w0, %w1
+ mov\\t%w0, %w1
+ mov\\t%w0, %w1
+ mov\\t%w0, %1
+ #
+ * return aarch64_output_sve_cnt_immediate ('cnt', '%x0', operands[1]);"
+ "&& true"
+ [(const_int 0)]
+ @{
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ @}
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm")
+ (set_attr "arch" "*,*,*,*,*,sve")
+ (set_attr "length" "4,4,4,4,*, 4")
+]
+)
+@end group
+@end smallexample
+
+can be better expressed as:
+
+@smallexample
+@group
+(define_insn_and_split ""
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (match_operand:SI 1 "aarch64_mov_operand"))]
+ ""
+ @{@@ [cons: =0, 1; attrs: type, arch, length]
+ [r , r ; mov_reg , * , 4] mov\t%w0, %w1
+ [k , r ; mov_reg , * , 4] ^
+ [r , k ; mov_reg , * , 4] ^
+ [r , M ; mov_imm , * , 4] mov\t%w0, %1
+ [r , n ; mov_imm , * , *] #
+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ @}
+ "&& true"
+ [(const_int 0)]
+ @{
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ @}
+)
+@end group
+@end smallexample
+
+The syntax rules are as follows:
+@itemize @bullet
+@item
+Templates must start with @samp{@{@@} to use the new syntax.
+
+@item
+@samp{@{@@} is followed by a layout in square brackets which is @samp{cons:}
+followed by a comma-separated list of @code{match_operand}/@code{match_scratch}
+operand numbers, then a semicolon, followed by the same for attributes
+(@samp{attrs:}). Operand modifiers like @code{=} and @code{+} can be placed
+before an operand number.
+Both sections are optional (so you can use only @samp{cons}, or only
+@samp{attrs}, or both), and @samp{cons} must come before @samp{attrs} if
+present.
+
+@item
+Each alternative begins with any amount of whitespace.
+
+@item
+Following the whitespace is a comma-separated list of "constraints" and/or
+"attributes" within brackets @code{[]}, with sections separated by a semicolon.
+
+@item
+Should you want to copy the previous asm line, the symbol @code{^} can be used.
+This allows less copy pasting between alternative and reduces the number of
+lines to update on changes.
+
+@item
+When using C functions for output, the idiom @samp{* return @var{function};}
+can be replaced with the shorthand @samp{<< @var{function};}.
+
+@item
+Following the closing @samp{]} is any amount of whitespace, and then the actual
+asm output.
+
+@item
+Spaces are allowed in the list (they will simply be removed).
+
+@item
+All constraint alternatives should be specified. For example, a list of
+of three blank alternatives should be written @samp{[,,]} rather than
+@samp{[]}.
+
+@item
+All attribute alternatives should be non-empty, with @samp{*}
+representing the default attribute value. For example, a list of three
+default attribute values should be written @samp{[*,*,*]} rather than
+@samp{[]}.
+
+@item
+Within an @samp{@{@@} block both multiline and singleline C comments are
+allowed, but when used outside of a C block they must be the only non-whitespace
+blocks on the line.
+
+@item
+Within an @samp{@{@@} block, any iterators that do not get expanded will result
+in an error. If for some reason it is required to have @code{<} or @code{>} in
+the output then these must be escaped using @backslashchar{}.
+
+@item
+It is possible to use the @samp{attrs} list to specify some attributes and to
+use the normal @code{set_attr} syntax to specify other attributes. There must
+not be any overlap between the two lists.
+
+In other words, the following is valid:
+@smallexample
+@group
+(define_insn_and_split ""
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (match_operand:SI 1 "aarch64_mov_operand"))]
+ ""
+ @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+ @dots{}
+ [(set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+but this is not valid:
+@smallexample
+@group
+(define_insn_and_split ""
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (match_operand:SI 1 "aarch64_mov_operand"))]
+ ""
+ @{@@ [cons: 0, 1; attrs: type, arch, length]@}
+ @dots{}
+ [(set_attr "arch" "bar")
+ (set_attr "foo" "mov_imm")]
+)
+@end group
+@end smallexample
+
+because it specifies @code{arch} twice.
+@end itemize
+
@node Predicates
@section Predicates
@cindex predicates
diff --git a/gcc/genoutput.cc b/gcc/genoutput.cc
index 6bb03e286..de5dafdbf 100644
--- a/gcc/genoutput.cc
+++ b/gcc/genoutput.cc
@@ -157,6 +157,7 @@ public:
int n_alternatives; /* Number of alternatives in each constraint */
int operand_number; /* Operand index in the big array. */
int output_format; /* INSN_OUTPUT_FORMAT_*. */
+ bool compact_syntax_p;
struct operand_data operand[MAX_MAX_OPERANDS];
};
@@ -700,12 +701,51 @@ process_template (class data *d, const char *template_code)
if (sp != ep)
message_at (d->loc, "trailing whitespace in output template");
- while (cp < sp)
+ /* Check for any unexpanded iterators. */
+ if (bp[0] != '*' && d->compact_syntax_p)
{
- putchar (*cp);
- cp++;
+ const char *p = cp;
+ const char *last_bracket = nullptr;
+ while (p < sp)
+ {
+ if (*p == '\\' && p + 1 < sp)
+ {
+ putchar (*p);
+ putchar (*(p+1));
+ p += 2;
+ continue;
+ }
+
+ if (*p == '>' && last_bracket && *last_bracket == '<')
+ {
+ int len = p - last_bracket;
+ fatal_at (d->loc, "unresolved iterator '%.*s' in '%s'",
+ len - 1, last_bracket + 1, cp);
+ }
+ else if (*p == '<' || *p == '>')
+ last_bracket = p;
+
+ putchar (*p);
+ p += 1;
+ }
+
+ if (last_bracket)
+ {
+ char *nl = strchr (const_cast<char*> (cp), '\n');
+ if (nl)
+ *nl = '\0';
+ fatal_at (d->loc, "unmatched angle brackets, likely an "
+ "error in iterator syntax in %s", cp);
+ }
+ }
+ else
+ {
+ while (cp < sp)
+ putchar (*(cp++));
}
+ cp = sp;
+
if (!found_star)
puts ("\",");
else if (*bp != '*')
@@ -881,6 +921,8 @@ gen_insn (md_rtx_info *info)
else
d->name = 0;
+ d->compact_syntax_p = compact_syntax.contains (insn);
+
/* Build up the list in the same order as the insns are seen
in the machine description. */
d->next = 0;
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 42680499d..23c61dcdd 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -18,6 +18,8 @@
<http://www.gnu.org/licenses/>. */
#include "bconfig.h"
+#define INCLUDE_STRING
+#define INCLUDE_VECTOR
#include "system.h"
#include "coretypes.h"
#include "tm.h"
@@ -33,6 +35,8 @@
static rtx operand_data[MAX_OPERANDS];
static rtx match_operand_entries_in_pattern[MAX_OPERANDS];
static char used_operands_numbers[MAX_OPERANDS];
+/* List of entries which are part of the new syntax. */
+hash_set<rtx> compact_syntax;
/* In case some macros used by files we include need it, define this here. */
@@ -545,6 +549,497 @@ gen_rewrite_sequence (rtvec vec)
return new_vec;
}
+/* The following is for handling the compact syntax for constraints and
+ attributes.
+
+ The normal syntax looks like this:
+
+ ...
+ (match_operand: 0 "s_register_operand" "r,I,k")
+ (match_operand: 2 "s_register_operand" "r,k,I")
+ ...
+ "@
+ <asm>
+ <asm>
+ <asm>"
+ ...
+ (set_attr "length" "4,8,8")
+
+ The compact syntax looks like this:
+
+ ...
+ (match_operand: 0 "s_register_operand")
+ (match_operand: 2 "s_register_operand")
+ ...
+ {@ [cons: 0, 2; attrs: length]
+ [r,r; 4] <asm>
+ [I,k; 8] <asm>
+ [k,I; 8] <asm>
+ }
+ ...
+ [<other attributes>]
+
+ This is the only place where this syntax needs to be handled. Relevant
+ patterns are transformed from compact to the normal syntax before they are
+ queued, so none of the gen* programs need to know about this syntax at all.
+
+ Conversion process (convert_syntax):
+
+ 0) Check that pattern actually uses new syntax (check for {@ ... }).
+
+ 1) Get the "layout", i.e. the "[cons: 0 2; attrs: length]" from the above
+ example. cons must come first; both are optional. Set up two vecs,
+ convec and attrvec, for holding the results of the transformation.
+
+ 2) For each alternative: parse the list of constraints and/or attributes,
+ and enqueue them in the relevant lists in convec and attrvec. By the end
+ of this process, convec[N].con and attrvec[N].con should contain regular
+ syntax constraint/attribute lists like "r,I,k". Copy the asm to a string
+ as we go.
+
+ 3) Search the rtx and write the constraint and attribute lists into the
+ correct places. Write the asm back into the template. */
+
+/* Helper class for shuffling constraints/attributes in convert_syntax and
+ add_constraints/add_attributes. This includes commas but not whitespace. */
+
+class conlist {
+private:
+ std::string con;
+
+public:
+ std::string name;
+ int idx = -1;
+
+ conlist () = default;
+
+ /* [ns..ns + len) should be a string with the id of the rtx to match
+ i.e. if rtx is the relevant match_operand or match_scratch then
+ [ns..ns + len) should equal itoa (XINT (rtx, 0)), and if set_attr then
+ [ns..ns + len) should equal XSTR (rtx, 0). */
+ conlist (const char *ns, unsigned int len, bool numeric)
+ {
+ /* Trim leading whitespaces. */
+ while (ISBLANK (*ns))
+ {
+ ns++;
+ len--;
+ }
+
+ /* Trim trailing whitespace. */
+ for (int i = len - 1; i >= 0; i--, len--)
+ if (!ISBLANK (ns[i]))
+ break;
+
+ /* Parse off any modifiers. */
+ while (!ISALNUM (*ns))
+ {
+ con += *(ns++);
+ len--;
+ }
+
+ name.assign (ns, len);
+ if (numeric)
+ idx = std::stoi (name);
+ }
+
+ /* Adds a character to the end of the string. */
+ void add (char c)
+ {
+ con += c;
+ }
+
+ /* Output the string in the form of a brand-new char *, then effectively
+ clear the internal string by resetting len to 0. */
+ char *out ()
+ {
+ /* Final character is always a trailing comma, so strip it out. */
+ char *q = xstrndup (con.c_str (), con.size () - 1);
+ con.clear ();
+ return q;
+ }
+};
+
+typedef std::vector<conlist> vec_conlist;
+
+/* Add constraints to an rtx. This function is similar to remove_constraints.
+ Errors if adding the constraints would overwrite existing constraints. */
+
+static void
+add_constraints (rtx part, file_location loc, vec_conlist &cons)
+{
+ const char *format_ptr;
+
+ if (part == NULL_RTX)
+ return;
+
+ /* If match_op or match_scr, check if we have the right one, and if so, copy
+ over the constraint list. */
+ if (GET_CODE (part) == MATCH_OPERAND || GET_CODE (part) == MATCH_SCRATCH)
+ {
+ int field = GET_CODE (part) == MATCH_OPERAND ? 2 : 1;
+ unsigned id = XINT (part, 0);
+
+ if (id >= cons.size () || cons[id].idx == -1)
+ return;
+
+ if (XSTR (part, field)[0] != '\0')
+ {
+ error_at (loc, "can't mix normal and compact constraint syntax");
+ return;
+ }
+ XSTR (part, field) = cons[id].out ();
+ cons[id].idx = -1;
+ }
+
+ format_ptr = GET_RTX_FORMAT (GET_CODE (part));
+
+ /* Recursively search the rtx. */
+ for (int i = 0; i < GET_RTX_LENGTH (GET_CODE (part)); i++)
+ switch (*format_ptr++)
+ {
+ case 'e':
+ case 'u':
+ add_constraints (XEXP (part, i), loc, cons);
+ break;
+ case 'E':
+ if (XVEC (part, i) != NULL)
+ for (int j = 0; j < XVECLEN (part, i); j++)
+ add_constraints (XVECEXP (part, i, j), loc, cons);
+ break;
+ default:
+ continue;
+ }
+}
+
+/* Add ATTRS to definition X's attribute list. */
+
+static void
+add_attributes (rtx x, vec_conlist &attrs)
+{
+ unsigned int attr_index = GET_CODE (x) == DEFINE_INSN ? 4 : 3;
+ rtvec orig = XVEC (x, attr_index);
+ if (orig)
+ {
+ size_t n_curr = XVECLEN (x, attr_index);
+ rtvec copy = rtvec_alloc (n_curr + attrs.size ());
+
+ /* Create a shallow copy of existing entries. */
+ memcpy (&copy->elem[attrs.size ()], &orig->elem[0],
+ sizeof (rtx) * n_curr);
+ XVEC (x, attr_index) = copy;
+ }
+ else
+ XVEC (x, attr_index) = rtvec_alloc (attrs.size ());
+
+ /* Create the new elements. */
+ for (unsigned i = 0; i < attrs.size (); i++)
+ {
+ rtx attr = rtx_alloc (SET_ATTR);
+ XSTR (attr, 0) = xstrdup (attrs[i].name.c_str ());
+ XSTR (attr, 1) = attrs[i].out ();
+ XVECEXP (x, attr_index, i) = attr;
+ }
+}
+
+/* Consumes spaces and tabs. */
+
+static inline void
+skip_spaces (const char **str)
+{
+ while (ISBLANK (**str))
+ (*str)++;
+}
+
+/* Consumes the given character, if it's there. */
+
+static inline bool
+expect_char (const char **str, char c)
+{
+ if (**str != c)
+ return false;
+ (*str)++;
+ return true;
+}
+
+/* Parses the section layout that follows a "{@" if using new syntax. Builds
+ a vector for a single section. E.g. if we have "attrs: length, arch]..."
+ then list will have two elements, the first for "length" and the second
+ for "arch". */
+
+static void
+parse_section_layout (file_location loc, const char **templ, const char *label,
+ vec_conlist &list, bool numeric)
+{
+ const char *name_start;
+ size_t label_len = strlen (label);
+ if (strncmp (label, *templ, label_len) == 0)
+ {
+ *templ += label_len;
+
+ /* Gather the names. */
+ while (**templ != ';' && **templ != ']')
+ {
+ skip_spaces (templ);
+ name_start = *templ;
+ int len = 0;
+ char val = (*templ)[len];
+ while (val != ',' && val != ';' && val != ']')
+ {
+ if (val == 0 || val == '\n')
+ fatal_at (loc, "missing ']'");
+ val = (*templ)[++len];
+ }
+ *templ += len;
+ if (val == ',')
+ (*templ)++;
+ list.push_back (conlist (name_start, len, numeric));
+ }
+ }
+}
+
+/* Parse a section, a section is defined as a named space separated list, e.g.
+
+ foo: a, b, c
+
+ is a section named "foo" with entries a, b and c. */
+
+static void
+parse_section (const char **templ, unsigned int n_elems, unsigned int alt_no,
+ vec_conlist &list, file_location loc, const char *name)
+{
+ unsigned int i;
+
+ /* Go through the list, one character at a time, adding said character
+ to the correct string. */
+ for (i = 0; **templ != ']' && **templ != ';'; (*templ)++)
+ if (!ISBLANK (**templ))
+ {
+ if (**templ == 0 || **templ == '\n')
+ fatal_at (loc, "missing ']'");
+ list[i].add (**templ);
+ if (**templ == ',')
+ {
+ ++i;
+ if (i == n_elems)
+ fatal_at (loc, "too many %ss in alternative %d: expected %d",
+ name, alt_no, n_elems);
+ }
+ }
+
+ if (i + 1 < n_elems)
+ fatal_at (loc, "too few %ss in alternative %d: expected %d, got %d",
+ name, alt_no, n_elems, i);
+
+ list[i].add (',');
+}
+
+/* The compact syntax has more convience syntaxes. As such we post process
+ the lines to get them back to something the normal syntax understands. */
+
+static void
+preprocess_compact_syntax (file_location loc, int alt_no, std::string &line,
+ std::string &last_line)
+{
+ /* Check if we're copying the last statement. */
+ if (line.find ("^") == 0 && line.size () == 1)
+ {
+ if (last_line.empty ())
+ fatal_at (loc, "found instruction to copy previous line (^) in"
+ "alternative %d but no previous line to copy", alt_no);
+ line = last_line;
+ return;
+ }
+
+ std::string result;
+ std::string buffer;
+ /* Check if we have << which means return c statement. */
+ if (line.find ("<<") == 0)
+ {
+ result.append ("* return ");
+ const char *chunk = line.c_str () + 2;
+ skip_spaces (&chunk);
+ result.append (chunk);
+ }
+ else
+ result.append (line);
+
+ line = result;
+ return;
+}
+
+/* Converts an rtx from compact syntax to normal syntax if possible. */
+
+static void
+convert_syntax (rtx x, file_location loc)
+{
+ int alt_no;
+ unsigned int templ_index;
+ const char *templ;
+ vec_conlist tconvec, convec, attrvec;
+
+ templ_index = GET_CODE (x) == DEFINE_INSN ? 3 : 2;
+
+ templ = XTMPL (x, templ_index);
+
+ /* Templates with constraints start with "{@". */
+ if (strncmp ("*{@", templ, 3))
+ return;
+
+ /* Get the layout for the template. */
+ templ += 3;
+ skip_spaces (&templ);
+
+ if (!expect_char (&templ, '['))
+ fatal_at (loc, "expecing `[' to begin section list");
+
+ parse_section_layout (loc, &templ, "cons:", tconvec, true);
+
+ /* Check for any duplicate cons entries and sort based on i. */
+ for (auto e : tconvec)
+ {
+ unsigned idx = e.idx;
+ if (idx >= convec.size ())
+ convec.resize (idx + 1);
+
+ if (convec[idx].idx >= 0)
+ fatal_at (loc, "duplicate cons number found: %d", idx);
+ convec[idx] = e;
+ }
+ tconvec.clear ();
+
+ if (*templ != ']')
+ {
+ if (*templ == ';')
+ skip_spaces (&(++templ));
+ parse_section_layout (loc, &templ, "attrs:", attrvec, false);
+ }
+
+ if (!expect_char (&templ, ']'))
+ fatal_at (loc, "expecting `]` to end section list - section list must have "
+ "cons first, attrs second");
+
+ /* We will write the un-constrainified template into new_templ. */
+ std::string new_templ;
+ new_templ.append ("@");
+
+ /* Skip to the first proper line. */
+ skip_spaces (&templ);
+ if (*templ == 0)
+ fatal_at (loc, "'{@...}' blocks must have at least one alternative");
+ if (*templ != '\n')
+ fatal_at (loc, "unexpected character '%c' after ']'", *templ);
+ templ++;
+
+ alt_no = 0;
+ std::string last_line;
+
+ /* Process the alternatives. */
+ while (*(templ - 1) != '\0')
+ {
+ /* Skip leading whitespace. */
+ std::string buffer;
+ skip_spaces (&templ);
+
+ /* Check if we're at the end. */
+ if (templ[0] == '}' && templ[1] == '\0')
+ break;
+
+ if (expect_char (&templ, '['))
+ {
+ new_templ += '\n';
+ new_templ.append (buffer);
+ /* Parse the constraint list, then the attribute list. */
+ if (convec.size () > 0)
+ parse_section (&templ, convec.size (), alt_no, convec, loc,
+ "constraint");
+
+ if (attrvec.size () > 0)
+ {
+ if (convec.size () > 0 && !expect_char (&templ, ';'))
+ fatal_at (loc, "expected `;' to separate constraints "
+ "and attributes in alternative %d", alt_no);
+
+ parse_section (&templ, attrvec.size (), alt_no,
+ attrvec, loc, "attribute");
+ }
+
+ if (!expect_char (&templ, ']'))
+ fatal_at (loc, "expected end of constraint/attribute list but "
+ "missing an ending `]' in alternative %d", alt_no);
+ }
+ else if (templ[0] == '/' && templ[1] == '/')
+ {
+ templ += 2;
+ /* Glob till newline or end of string. */
+ while (*templ != '\n' || *templ != '\0')
+ templ++;
+
+ /* Skip any newlines or whitespaces needed. */
+ while (ISSPACE(*templ))
+ templ++;
+ continue;
+ }
+ else if (templ[0] == '/' && templ[1] == '*')
+ {
+ templ += 2;
+ /* Glob till newline or end of multiline comment. */
+ while (templ[0] != 0 && templ[0] != '*' && templ[1] != '/')
+ templ++;
+
+ while (templ[0] != '*' || templ[1] != '/')
+ {
+ if (templ[0] == 0)
+ fatal_at (loc, "unterminated '/*'");
+ templ++;
+ }
+ templ += 2;
+
+ /* Skip any newlines or whitespaces needed. */
+ while (ISSPACE(*templ))
+ templ++;
+ continue;
+ }
+ else
+ fatal_at (loc, "expected constraint/attribute list at beginning of "
+ "alternative %d but missing a starting `['", alt_no);
+
+ /* Skip whitespace between list and asm. */
+ skip_spaces (&templ);
+
+ /* Copy asm to new template. */
+ std::string line;
+ while (*templ != '\n' && *templ != '\0')
+ line += *templ++;
+
+ /* Apply any pre-processing needed to the line. */
+ preprocess_compact_syntax (loc, alt_no, line, last_line);
+ new_templ.append (line);
+ last_line = line;
+
+ /* Normal "*..." syntax expects the closing quote to be on the final
+ line of asm, whereas we allow the closing "}" to be on its own line.
+ Postpone copying the '\n' until we know that there is another
+ alternative in the list. */
+ while (ISSPACE (*templ))
+ templ++;
+ ++alt_no;
+ }
+
+ /* Write the constraints and attributes into their proper places. */
+ if (convec.size () > 0)
+ add_constraints (x, loc, convec);
+
+ if (attrvec.size () > 0)
+ add_attributes (x, attrvec);
+
+ /* Copy over the new un-constrainified template. */
+ XTMPL (x, templ_index) = xstrdup (new_templ.c_str ());
+
+ /* Register for later checks during iterator expansions. */
+ compact_syntax.add (x);
+}
+
/* Process a top level rtx in some way, queuing as appropriate. */
static void
@@ -553,10 +1048,12 @@ process_rtx (rtx desc, file_location loc)
switch (GET_CODE (desc))
{
case DEFINE_INSN:
+ convert_syntax (desc, loc);
queue_pattern (desc, &define_insn_tail, loc);
break;
case DEFINE_COND_EXEC:
+ convert_syntax (desc, loc);
queue_pattern (desc, &define_cond_exec_tail, loc);
break;
@@ -631,6 +1128,7 @@ process_rtx (rtx desc, file_location loc)
attr = XVEC (desc, split_code + 1);
PUT_CODE (desc, DEFINE_INSN);
XVEC (desc, 4) = attr;
+ convert_syntax (desc, loc);
/* Queue them. */
insn_elem = queue_pattern (desc, &define_insn_tail, loc);
diff --git a/gcc/gensupport.h b/gcc/gensupport.h
index 9a0fd7393..a19fc1319 100644
--- a/gcc/gensupport.h
+++ b/gcc/gensupport.h
@@ -20,6 +20,7 @@ along with GCC; see the file COPYING3. If not see
#ifndef GCC_GENSUPPORT_H
#define GCC_GENSUPPORT_H
+#include "hash-set.h"
#include "read-md.h"
struct obstack;
@@ -218,6 +219,8 @@ struct pattern_stats
int num_operand_vars;
};
+extern hash_set<rtx> compact_syntax;
+
extern void get_pattern_stats (struct pattern_stats *ranges, rtvec vec);
extern void compute_test_codes (rtx, file_location, char *);
extern file_location get_file_location (rtx);
--
2.33.0

View File

@ -0,0 +1,104 @@
From 35b64175c6fd622212d0bf936e7e98c635e1c618 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Wed, 13 Sep 2023 14:50:30 +0100
Subject: [PATCH 054/157] [Backport][SME] recog: Improve parser for pattern new
compact syntax
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd1091fe455c1ede5993b4cdf10d0f7c461b86d7
Hi all,
this is to add support to the new compact pattern syntax for the case
where the constraints do appear unsorted like:
(define_insn "*<optab>si3_insn_uxtw"
[(set (match_operand:DI 0 "register_operand")
(zero_extend:DI (SHIFT_no_rotate:SI
(match_operand:SI 1 "register_operand")
(match_operand:QI 2 "aarch64_reg_or_shift_imm_si"))))]
""
{@ [cons: =0, 2, 1]
[ r, Uss, r] <shift>\\t%w0, %w1, %2
[ r, r, r] <shift>\\t%w0, %w1, %w2
}
[(set_attr "type" "bfx,shift_reg")]
)
Best Regards
Andrea
gcc/Changelog
2023-09-20 Richard Sandiford <richard.sandiford@arm.com>
* gensupport.cc (convert_syntax): Updated to support unordered
constraints in compact syntax.
---
gcc/gensupport.cc | 32 ++++++++++++++++----------------
1 file changed, 16 insertions(+), 16 deletions(-)
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 23c61dcdd..97c614850 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -895,19 +895,6 @@ convert_syntax (rtx x, file_location loc)
parse_section_layout (loc, &templ, "cons:", tconvec, true);
- /* Check for any duplicate cons entries and sort based on i. */
- for (auto e : tconvec)
- {
- unsigned idx = e.idx;
- if (idx >= convec.size ())
- convec.resize (idx + 1);
-
- if (convec[idx].idx >= 0)
- fatal_at (loc, "duplicate cons number found: %d", idx);
- convec[idx] = e;
- }
- tconvec.clear ();
-
if (*templ != ']')
{
if (*templ == ';')
@@ -950,13 +937,13 @@ convert_syntax (rtx x, file_location loc)
new_templ += '\n';
new_templ.append (buffer);
/* Parse the constraint list, then the attribute list. */
- if (convec.size () > 0)
- parse_section (&templ, convec.size (), alt_no, convec, loc,
+ if (tconvec.size () > 0)
+ parse_section (&templ, tconvec.size (), alt_no, tconvec, loc,
"constraint");
if (attrvec.size () > 0)
{
- if (convec.size () > 0 && !expect_char (&templ, ';'))
+ if (tconvec.size () > 0 && !expect_char (&templ, ';'))
fatal_at (loc, "expected `;' to separate constraints "
"and attributes in alternative %d", alt_no);
@@ -1026,6 +1013,19 @@ convert_syntax (rtx x, file_location loc)
++alt_no;
}
+ /* Check for any duplicate cons entries and sort based on i. */
+ for (auto e : tconvec)
+ {
+ unsigned idx = e.idx;
+ if (idx >= convec.size ())
+ convec.resize (idx + 1);
+
+ if (convec[idx].idx >= 0)
+ fatal_at (loc, "duplicate cons number found: %d", idx);
+ convec[idx] = e;
+ }
+ tconvec.clear ();
+
/* Write the constraints and attributes into their proper places. */
if (convec.size () > 0)
add_constraints (x, loc, convec);
--
2.33.0

View File

@ -0,0 +1,49 @@
From e593ad216bd1f4f75d9875898f352e0e5f978159 Mon Sep 17 00:00:00 2001
From: Andrea Corallo <andrea.corallo@arm.com>
Date: Fri, 15 Sep 2023 10:23:02 +0200
Subject: [PATCH 055/157] [Backport][SME] recog: Support space in "[ cons"
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=9d31045b21324166c3997d603961d99e3c4c357d
Hi all,
this is to allow for spaces before "cons:" in the definitions of
patterns using the new compact syntax, ex:
(define_insn "aarch64_simd_dup<mode>"
[(set (match_operand:VDQ_I 0 "register_operand")
(vec_duplicate:VDQ_I
(match_operand:<VEL> 1 "register_operand")))]
"TARGET_SIMD"
{@ [ cons: =0 , 1 ; attrs: type ]
[ w , w ; neon_dup<q> ] dup\t%0.<Vtype>, %1.<Vetype>[0]
[ w , ?r ; neon_from_gp<q> ] dup\t%0.<Vtype>, %<vwcore>1
}
)
gcc/Changelog
2023-09-20 Andrea Corallo <andrea.corallo@arm.com>
* gensupport.cc (convert_syntax): Skip spaces before "cons:"
in new compact pattern syntax.
---
gcc/gensupport.cc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/gcc/gensupport.cc b/gcc/gensupport.cc
index 97c614850..3d7a6d4fd 100644
--- a/gcc/gensupport.cc
+++ b/gcc/gensupport.cc
@@ -893,6 +893,8 @@ convert_syntax (rtx x, file_location loc)
if (!expect_char (&templ, '['))
fatal_at (loc, "expecing `[' to begin section list");
+ skip_spaces (&templ);
+
parse_section_layout (loc, &templ, "cons:", tconvec, true);
if (*templ != ']')
--
2.33.0

View File

@ -0,0 +1,164 @@
From cb6d55f6bc7c490f72a43dd87543ab7a7ea582a8 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:18 +0000
Subject: [PATCH 056/157] [Backport][SME] aarch64: Generalise
require_immediate_lane_index
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c0cf2c893d54420b0c19fee7bd41ae40017d0106
require_immediate_lane_index previously hard-coded the assumption
that the group size is determined by the argument immediately before
the index. However, for SME, there are cases where it should be
determined by an earlier argument instead.
gcc/
* config/aarch64/aarch64-sve-builtins.h:
(function_checker::require_immediate_lane_index): Add an argument
for the index of the indexed vector argument.
* config/aarch64/aarch64-sve-builtins.cc
(function_checker::require_immediate_lane_index): Likewise.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(ternary_bfloat_lane_base::check): Update accordingly.
(ternary_qq_lane_base::check): Likewise.
(binary_lane_def::check): Likewise.
(binary_long_lane_def::check): Likewise.
(ternary_lane_def::check): Likewise.
(ternary_lane_rotate_def::check): Likewise.
(ternary_long_lane_def::check): Likewise.
(ternary_qq_lane_rotate_def::check): Likewise.
---
.../aarch64/aarch64-sve-builtins-shapes.cc | 16 ++++++++--------
gcc/config/aarch64/aarch64-sve-builtins.cc | 18 ++++++++++++------
gcc/config/aarch64/aarch64-sve-builtins.h | 3 ++-
3 files changed, 22 insertions(+), 15 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index f57f92698..4fa4181b9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -941,7 +941,7 @@ struct ternary_bfloat_lane_base
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (3, N);
+ return c.require_immediate_lane_index (3, 2, N);
}
};
@@ -956,7 +956,7 @@ struct ternary_qq_lane_base
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (3, 4);
+ return c.require_immediate_lane_index (3, 0);
}
};
@@ -1123,7 +1123,7 @@ struct binary_lane_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (2);
+ return c.require_immediate_lane_index (2, 1);
}
};
SHAPE (binary_lane)
@@ -1162,7 +1162,7 @@ struct binary_long_lane_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (2);
+ return c.require_immediate_lane_index (2, 1);
}
};
SHAPE (binary_long_lane)
@@ -2817,7 +2817,7 @@ struct ternary_lane_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (3);
+ return c.require_immediate_lane_index (3, 2);
}
};
SHAPE (ternary_lane)
@@ -2845,7 +2845,7 @@ struct ternary_lane_rotate_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return (c.require_immediate_lane_index (3, 2)
+ return (c.require_immediate_lane_index (3, 2, 2)
&& c.require_immediate_one_of (4, 0, 90, 180, 270));
}
};
@@ -2868,7 +2868,7 @@ struct ternary_long_lane_def
bool
check (function_checker &c) const OVERRIDE
{
- return c.require_immediate_lane_index (3);
+ return c.require_immediate_lane_index (3, 2);
}
};
SHAPE (ternary_long_lane)
@@ -2965,7 +2965,7 @@ struct ternary_qq_lane_rotate_def : public overloaded_base<0>
bool
check (function_checker &c) const OVERRIDE
{
- return (c.require_immediate_lane_index (3, 4)
+ return (c.require_immediate_lane_index (3, 0)
&& c.require_immediate_one_of (4, 0, 90, 180, 270));
}
};
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 91af96687..7924cdf0f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -2440,20 +2440,26 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
return false;
}
-/* Check that argument REL_ARGNO is suitable for indexing argument
- REL_ARGNO - 1, in groups of GROUP_SIZE elements. REL_ARGNO counts
- from the end of the predication arguments. */
+/* The intrinsic conceptually divides vector argument REL_VEC_ARGNO into
+ groups of GROUP_SIZE elements. Return true if argument REL_ARGNO is
+ a suitable constant index for selecting one of these groups. The
+ selection happens within a 128-bit quadword, rather than the whole vector.
+
+ REL_ARGNO and REL_VEC_ARGNO count from the end of the predication
+ arguments. */
bool
function_checker::require_immediate_lane_index (unsigned int rel_argno,
+ unsigned int rel_vec_argno,
unsigned int group_size)
{
unsigned int argno = m_base_arg + rel_argno;
if (!argument_exists_p (argno))
return true;
- /* Get the type of the previous argument. tree_argument_type wants a
- 1-based number, whereas ARGNO is 0-based. */
- machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, argno));
+ /* Get the type of the vector argument. tree_argument_type wants a
+ 1-based number, whereas VEC_ARGNO is 0-based. */
+ unsigned int vec_argno = m_base_arg + rel_vec_argno;
+ machine_mode mode = TYPE_MODE (type_argument_type (m_fntype, vec_argno + 1));
gcc_assert (VECTOR_MODE_P (mode));
unsigned int nlanes = 128 / (group_size * GET_MODE_UNIT_BITSIZE (mode));
return require_immediate_range (rel_argno, 0, nlanes - 1);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 52994cde0..824c31cd7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -463,7 +463,8 @@ public:
bool require_immediate_either_or (unsigned int, HOST_WIDE_INT,
HOST_WIDE_INT);
bool require_immediate_enum (unsigned int, tree);
- bool require_immediate_lane_index (unsigned int, unsigned int = 1);
+ bool require_immediate_lane_index (unsigned int, unsigned int,
+ unsigned int = 1);
bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT,
HOST_WIDE_INT, HOST_WIDE_INT);
bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
--
2.33.0

View File

@ -0,0 +1,469 @@
From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001
From: Christophe Lyon <christophe.lyon@arm.com>
Date: Fri, 11 Mar 2022 16:21:02 +0000
Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22
This patch updates the aarch64 backend as needed to support DFP modes
(SD, DD and TD).
Changes v1->v2:
* Drop support for DFP modes in
aarch64_gen_{load||store}[wb]_pair as these are only used in
prologue/epilogue where DFP modes are not used. Drop the
changes to the corresponding patterns in aarch64.md, and
useless GPF_PAIR iterator.
* In aarch64_reinterpret_float_as_int, handle DDmode the same way
as DFmode (needed in case the representation of the
floating-point value can be loaded using mov/movk.
* In aarch64_float_const_zero_rtx_p, reject constants with DFP
mode: when X is zero, the callers want to emit either '0' or
'zr' depending on the context, which is not the way 0.0 is
represented in DFP mode (in particular fmov d0, #0 is not right
for DFP).
* In aarch64_legitimate_constant_p, accept DFP
2022-03-31 Christophe Lyon <christophe.lyon@arm.com>
gcc/
* config/aarch64/aarch64.cc
(aarch64_split_128bit_move): Handle DFP modes.
(aarch64_mode_valid_for_sched_fusion_p): Likewise.
(aarch64_classify_address): Likewise.
(aarch64_legitimize_address_displacement): Likewise.
(aarch64_reinterpret_float_as_int): Likewise.
(aarch64_float_const_zero_rtx_p): Likewise.
(aarch64_can_const_movi_rtx_p): Likewise.
(aarch64_anchor_offset): Likewise.
(aarch64_secondary_reload): Likewise.
(aarch64_rtx_costs): Likewise.
(aarch64_legitimate_constant_p): Likewise.
(aarch64_gimplify_va_arg_expr): Likewise.
(aapcs_vfp_sub_candidate): Likewise.
(aarch64_vfp_is_call_or_return_candidate): Likewise.
(aarch64_output_scalar_simd_mov_immediate): Likewise.
(aarch64_gen_adjusted_ldpstp): Likewise.
(aarch64_scalar_mode_supported_p): Accept DFP modes if enabled.
* config/aarch64/aarch64.md
(movsf_aarch64): Use SFD iterator and rename into
mov<mode>_aarch64.
(movdf_aarch64): Use DFD iterator and rename into
mov<mode>_aarch64.
(movtf_aarch64): Use TFD iterator and rename into
mov<mode>_aarch64.
(split pattern for move TF mode): Use TFD iterator.
* config/aarch64/iterators.md
(GPF_TF_F16_MOV): Add DFP modes.
(SFD, DFD, TFD): New iterators.
(GPF_TF): Add DFP modes.
(TX, DX, DX2): Likewise.
---
gcc/config/aarch64/aarch64.cc | 82 ++++++++++++++++++++++-----------
gcc/config/aarch64/aarch64.md | 34 +++++++-------
gcc/config/aarch64/iterators.md | 24 +++++++---
3 files changed, 89 insertions(+), 51 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 055b436b1..02210ed13 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
machine_mode mode = GET_MODE (dst);
- gcc_assert (mode == TImode || mode == TFmode);
+ gcc_assert (mode == TImode || mode == TFmode || mode == TDmode);
gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
{
return mode == SImode || mode == DImode
|| mode == SFmode || mode == DFmode
+ || mode == SDmode || mode == DDmode
|| (aarch64_vector_mode_supported_p (mode)
&& (known_eq (GET_MODE_SIZE (mode), 8)
|| (known_eq (GET_MODE_SIZE (mode), 16)
@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
vec_flags &= ~VEC_PARTIAL;
/* On BE, we use load/store pair for all large int mode load/stores.
- TI/TFmode may also use a load/store pair. */
+ TI/TF/TDmode may also use a load/store pair. */
bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
|| type == ADDR_QUERY_LDP_STP_N
|| mode == TImode
|| mode == TFmode
+ || mode == TDmode
|| (BYTES_BIG_ENDIAN && advsimd_struct_p));
/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
corresponds to the actual size of the memory being loaded/stored and the
@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
info->offset = op1;
info->const_offset = offset;
- /* TImode and TFmode values are allowed in both pairs of X
+ /* TImode, TFmode and TDmode values are allowed in both pairs of X
registers and individual Q registers. The available
address modes are:
X,X: 7-bit signed scaled offset
@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
When performing the check for pairs of X registers i.e. LDP/STP
pass down DImode since that is the natural size of the LDP/STP
instruction memory accesses. */
- if (mode == TImode || mode == TFmode)
+ if (mode == TImode || mode == TFmode || mode == TDmode)
return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
&& (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
|| offset_12bit_unsigned_scaled_p (mode, offset)));
@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
info->offset = XEXP (XEXP (x, 1), 1);
info->const_offset = offset;
- /* TImode and TFmode values are allowed in both pairs of X
+ /* TImode, TFmode and TDmode values are allowed in both pairs of X
registers and individual Q registers. The available
address modes are:
X,X: 7-bit signed scaled offset
Q: 9-bit signed offset
We conservatively require an offset representable in either mode.
*/
- if (mode == TImode || mode == TFmode)
+ if (mode == TImode || mode == TFmode || mode == TDmode)
return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
&& aarch64_offset_9bit_signed_unscaled_p (mode, offset));
@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
offset. Use 4KB range for 1- and 2-byte accesses and a 16KB
range otherwise to increase opportunities for sharing the base
address of different sizes. Unaligned accesses use the signed
- 9-bit range, TImode/TFmode use the intersection of signed
+ 9-bit range, TImode/TFmode/TDmode use the intersection of signed
scaled 7-bit and signed 9-bit offset. */
- if (mode == TImode || mode == TFmode)
+ if (mode == TImode || mode == TFmode || mode == TDmode)
second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
else if ((const_offset & (size - 1)) != 0)
second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
CONST_DOUBLE_REAL_VALUE (value),
REAL_MODE_FORMAT (mode));
- if (mode == DFmode)
+ if (mode == DFmode || mode == DDmode)
{
int order = BYTES_BIG_ENDIAN ? 1 : 0;
ival = zext_hwi (res[order], 32);
@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x)
return false;
}
-/* Return TRUE if rtx X is immediate constant 0.0 */
+/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal
+ Floating Point). */
bool
aarch64_float_const_zero_rtx_p (rtx x)
{
- if (GET_MODE (x) == VOIDmode)
+ /* 0.0 in Decimal Floating Point cannot be represented by #0 or
+ zr as our callers expect, so no need to check the actual
+ value if X is of Decimal Floating Point type. */
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT)
return false;
if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
else
return false;
- /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
a 128 bit vector mode. */
int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
if (IN_RANGE (offset, -256, 0))
return 0;
- if (mode == TImode || mode == TFmode)
+ if (mode == TImode || mode == TFmode || mode == TDmode)
return (offset + 0x100) & ~0x1ff;
/* Use 12-bit offset by access size. */
@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
/* Without the TARGET_SIMD instructions we cannot move a Q register
to a Q register directly. We need a scratch. */
- if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
+ if (REG_P (x)
+ && (mode == TFmode || mode == TImode || mode == TDmode)
+ && mode == GET_MODE (x)
&& FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
&& reg_class_subset_p (rclass, FP_REGS))
{
@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
return NO_REGS;
}
- /* A TFmode or TImode memory access should be handled via an FP_REGS
+ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS
because AArch64 has richer addressing modes for LDR/STR instructions
than LDP/STP instructions. */
if (TARGET_FLOAT && rclass == GENERAL_REGS
&& known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
return FP_REGS;
- if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
+ if (rclass == FP_REGS
+ && (mode == TImode || mode == TFmode || mode == TDmode)
+ && CONSTANT_P(x))
return GENERAL_REGS;
return NO_REGS;
@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
*cost += extra_cost->ldst.storev;
else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.store;
- else if (mode == SFmode)
+ else if (mode == SFmode || mode == SDmode)
*cost += extra_cost->ldst.storef;
- else if (mode == DFmode)
+ else if (mode == DFmode || mode == DDmode)
*cost += extra_cost->ldst.stored;
*cost +=
@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
/* mov[df,sf]_aarch64. */
if (aarch64_float_const_representable_p (x))
/* FMOV (scalar immediate). */
- *cost += extra_cost->fp[mode == DFmode].fpconst;
+ *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
else if (!aarch64_float_const_zero_rtx_p (x))
{
/* This will be a load from memory. */
- if (mode == DFmode)
+ if (mode == DFmode || mode == DDmode)
*cost += extra_cost->ldst.loadd;
else
*cost += extra_cost->ldst.loadf;
@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
*cost += extra_cost->ldst.loadv;
else if (GET_MODE_CLASS (mode) == MODE_INT)
*cost += extra_cost->ldst.load;
- else if (mode == SFmode)
+ else if (mode == SFmode || mode == SDmode)
*cost += extra_cost->ldst.loadf;
- else if (mode == DFmode)
+ else if (mode == DFmode || mode == DDmode)
*cost += extra_cost->ldst.loadd;
*cost +=
@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
{
/* Support CSE and rematerialization of common constants. */
if (CONST_INT_P (x)
- || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT))
+ || CONST_DOUBLE_P (x))
return true;
/* Only accept variable-length vector constants if they can be
@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
field_t = long_double_type_node;
field_ptr_t = long_double_ptr_type_node;
break;
+ case SDmode:
+ field_t = dfloat32_type_node;
+ field_ptr_t = build_pointer_type (dfloat32_type_node);
+ break;
+ case DDmode:
+ field_t = dfloat64_type_node;
+ field_ptr_t = build_pointer_type (dfloat64_type_node);
+ break;
+ case TDmode:
+ field_t = dfloat128_type_node;
+ field_ptr_t = build_pointer_type (dfloat128_type_node);
+ break;
case E_HFmode:
field_t = aarch64_fp16_type_node;
field_ptr_t = aarch64_fp16_ptr_type_node;
@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
case REAL_TYPE:
mode = TYPE_MODE (type);
if (mode != DFmode && mode != SFmode
- && mode != TFmode && mode != HFmode)
+ && mode != TFmode && mode != HFmode
+ && mode != SDmode && mode != DDmode && mode != TDmode)
return -1;
if (*modep == VOIDmode)
@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
machine_mode new_mode = VOIDmode;
bool composite_p = aarch64_composite_type_p (type, mode);
- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ if ((!composite_p
+ && (GET_MODE_CLASS (mode) == MODE_FLOAT
+ || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT))
|| aarch64_short_vector_p (type, mode))
{
*count = 1;
@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
}
machine_mode vmode;
- /* use a 64 bit mode for everything except for DI/DF mode, where we use
+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
a 128 bit vector mode. */
int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
base_off = (off_val_1 + off_val_3) / 2;
else
/* However, due to issues with negative LDP/STP offset generation for
- larger modes, for DF, DI and vector modes. we must not use negative
+ larger modes, for DF, DD, DI and vector modes. we must not use negative
addresses smaller than 9 signed unadjusted bits can store. This
provides the most range in this case. */
base_off = off_val_1;
@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
static bool
aarch64_scalar_mode_supported_p (scalar_mode mode)
{
+ if (DECIMAL_FLOAT_MODE_P (mode))
+ return default_decimal_float_supported_p ();
+
return (mode == HFmode
? true
: default_scalar_mode_supported_p (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a78476c8a..8757a962f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1476,11 +1476,11 @@
(set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
)
-(define_insn "*movsf_aarch64"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
- "TARGET_FLOAT && (register_operand (operands[0], SFmode)
- || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+(define_insn "*mov<mode>_aarch64"
+ [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
movi\\t%0.2s, #0
fmov\\t%s0, %w1
@@ -1500,11 +1500,11 @@
(set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
)
-(define_insn "*movdf_aarch64"
- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
- "TARGET_FLOAT && (register_operand (operands[0], DFmode)
- || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+(define_insn "*mov<mode>_aarch64"
+ [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
movi\\t%d0, #0
fmov\\t%d0, %x1
@@ -1545,13 +1545,13 @@
}
)
-(define_insn "*movtf_aarch64"
- [(set (match_operand:TF 0
+(define_insn "*mov<mode>_aarch64"
+ [(set (match_operand:TFD 0
"nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
- (match_operand:TF 1
+ (match_operand:TFD 1
"general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
- "TARGET_FLOAT && (register_operand (operands[0], TFmode)
- || aarch64_reg_or_fp_zero (operands[1], TFmode))"
+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+ || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
mov\\t%0.16b, %1.16b
#
@@ -1571,8 +1571,8 @@
)
(define_split
- [(set (match_operand:TF 0 "register_operand" "")
- (match_operand:TF 1 "nonmemory_operand" ""))]
+ [(set (match_operand:TFD 0 "register_operand" "")
+ (match_operand:TFD 1 "nonmemory_operand" ""))]
"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
[(const_int 0)]
{
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 967e6b0b1..d0cd1b788 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -67,14 +67,24 @@
(define_mode_iterator GPF_TF_F16 [HF SF DF TF])
;; Iterator for all scalar floating point modes suitable for moving, including
-;; special BF type (HF, SF, DF, TF and BF)
-(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
+;; special BF type and decimal floating point types (HF, SF, DF, TF, BF,
+;; SD, DD and TD)
+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD])
+
+;; Iterator for scalar 32bit fp modes (SF, SD)
+(define_mode_iterator SFD [SD SF])
+
+;; Iterator for scalar 64bit fp modes (DF, DD)
+(define_mode_iterator DFD [DD DF])
+
+;; Iterator for scalar 128bit fp modes (TF, TD)
+(define_mode_iterator TFD [TD TF])
;; Double vector modes.
(define_mode_iterator VDF [V2SF V4HF])
-;; Iterator for all scalar floating point modes (SF, DF and TF)
-(define_mode_iterator GPF_TF [SF DF TF])
+;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD)
+(define_mode_iterator GPF_TF [SF DF TF SD DD TD])
;; Integer Advanced SIMD modes.
(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
@@ -301,7 +311,7 @@
;; 2 and 4 lane SI modes.
(define_mode_iterator VS [V2SI V4SI])
-(define_mode_iterator TX [TI TF])
+(define_mode_iterator TX [TI TF TD])
;; Advanced SIMD opaque structure modes.
(define_mode_iterator VSTRUCT [OI CI XI])
@@ -403,10 +413,10 @@
V4x8HF V4x4SF V4x2DF V4x8BF])
;; Double scalar modes
-(define_mode_iterator DX [DI DF])
+(define_mode_iterator DX [DI DF DD])
;; Duplicate of the above
-(define_mode_iterator DX2 [DI DF])
+(define_mode_iterator DX2 [DI DF DD])
;; Single scalar modes
(define_mode_iterator SX [SI SF])
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,213 @@
From b51d3b1af24758534e5a8f3a52a56106b935c485 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed, 31 May 2023 11:23:23 +0100
Subject: [PATCH 059/157] [Backport][SME] aarch64: Simplify output template
emission code for a few patterns
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=11bd9b1f8133fc07c267e6d1aee8b01e06c7a750
If the output code for a define_insn just does a switch (which_alternative) with no other computation we can almost always
replace it with more compact MD syntax for each alternative in a mult-alternative '@' block.
This patch cleans up some such patterns in the aarch64 backend, making them shorter and more concise.
No behavioural change intended.
Bootstrapped and tested on aarch64-none-linux-gnu.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>): Rewrite
output template to avoid explicit switch on which_alternative.
(*aarch64_simd_mov<VQMOV:mode>): Likewise.
(and<mode>3): Likewise.
(ior<mode>3): Likewise.
* config/aarch64/aarch64.md (*mov<mode>_aarch64): Likewise.
---
gcc/config/aarch64/aarch64-simd.md | 97 +++++++++---------------------
gcc/config/aarch64/aarch64.md | 42 ++++---------
2 files changed, 40 insertions(+), 99 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index ef7fc4ecb..2d688edf5 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -122,28 +122,16 @@
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
-{
- switch (which_alternative)
- {
- case 0: return "ldr\t%d0, %1";
- case 1: return "str\txzr, %0";
- case 2: return "str\t%d1, %0";
- case 3:
- if (TARGET_SIMD)
- return "mov\t%0.<Vbtype>, %1.<Vbtype>";
- return "fmov\t%d0, %d1";
- case 4:
- if (TARGET_SIMD)
- return "umov\t%0, %1.d[0]";
- return "fmov\t%x0, %d1";
- case 5: return "fmov\t%d0, %1";
- case 6: return "mov\t%0, %1";
- case 7:
- return aarch64_output_simd_mov_immediate (operands[1], 64);
- case 8: return "fmov\t%d0, xzr";
- default: gcc_unreachable ();
- }
-}
+ "@
+ ldr\t%d0, %1
+ str\txzr, %0
+ str\t%d1, %0
+ * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
+ * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
+ fmov\t%d0, %1
+ mov\t%0, %1
+ * return aarch64_output_simd_mov_immediate (operands[1], 64);
+ fmov\t%d0, xzr"
[(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
neon_logic<q>, neon_to_gp<q>, f_mcr,\
mov_reg, neon_move<q>, f_mcr")
@@ -158,29 +146,16 @@
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
-{
- switch (which_alternative)
- {
- case 0:
- return "ldr\t%q0, %1";
- case 1:
- return "stp\txzr, xzr, %0";
- case 2:
- return "str\t%q1, %0";
- case 3:
- return "mov\t%0.<Vbtype>, %1.<Vbtype>";
- case 4:
- case 5:
- case 6:
- return "#";
- case 7:
- return aarch64_output_simd_mov_immediate (operands[1], 128);
- case 8:
- return "fmov\t%d0, xzr";
- default:
- gcc_unreachable ();
- }
-}
+ "@
+ ldr\t%q0, %1
+ stp\txzr, xzr, %0
+ str\t%q1, %0
+ mov\t%0.<Vbtype>, %1.<Vbtype>
+ #
+ #
+ #
+ * return aarch64_output_simd_mov_immediate (operands[1], 128);
+ fmov\t%d0, xzr"
[(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
neon_logic<q>, multiple, multiple,\
multiple, neon_move<q>, fmov")
@@ -1004,18 +979,10 @@
(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
"TARGET_SIMD"
- {
- switch (which_alternative)
- {
- case 0:
- return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
- case 1:
- return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
- AARCH64_CHECK_BIC);
- default:
- gcc_unreachable ();
- }
- }
+ "@
+ and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+ * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
+ AARCH64_CHECK_BIC);"
[(set_attr "type" "neon_logic<q>")]
)
@@ -1025,18 +992,10 @@
(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
(match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
"TARGET_SIMD"
- {
- switch (which_alternative)
- {
- case 0:
- return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
- case 1:
- return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
- AARCH64_CHECK_ORR);
- default:
- gcc_unreachable ();
- }
- }
+ "@
+ orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+ * return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,\
+ AARCH64_CHECK_ORR);"
[(set_attr "type" "neon_logic<q>")]
)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c0cc91756..7454a5c77 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1198,36 +1198,18 @@
(match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
-{
- switch (which_alternative)
- {
- case 0:
- return "mov\t%w0, %w1";
- case 1:
- return "mov\t%w0, %1";
- case 2:
- return aarch64_output_scalar_simd_mov_immediate (operands[1],
- <MODE>mode);
- case 3:
- return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
- case 4:
- return "ldr<size>\t%w0, %1";
- case 5:
- return "ldr\t%<size>0, %1";
- case 6:
- return "str<size>\t%w1, %0";
- case 7:
- return "str\t%<size>1, %0";
- case 8:
- return TARGET_SIMD ? "umov\t%w0, %1.<v>[0]" : "fmov\t%w0, %s1";
- case 9:
- return TARGET_SIMD ? "dup\t%0.<Vallxd>, %w1" : "fmov\t%s0, %w1";
- case 10:
- return TARGET_SIMD ? "dup\t%<Vetype>0, %1.<v>[0]" : "fmov\t%s0, %s1";
- default:
- gcc_unreachable ();
- }
-}
+ "@
+ mov\t%w0, %w1
+ mov\t%w0, %1
+ * return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
+ * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
+ ldr<size>\t%w0, %1
+ ldr\t%<size>0, %1
+ str<size>\t%w1, %0
+ str\t%<size>1, %0
+ * return TARGET_SIMD ? \"umov\t%w0, %1.<v>[0]\" : \"fmov\t%w0, %s1\";
+ * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\";
+ * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>[0]\" : \"fmov\t%s0, %s1\";"
;; The "mov_imm" type for CNT is just a placeholder.
[(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
--
2.33.0

View File

@ -0,0 +1,631 @@
From d5293e2a8db54245553e01ad5d791b7492ad6101 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@arm.com>
Date: Mon, 24 Oct 2022 15:14:14 +0100
Subject: [PATCH 060/157] [Backport][SME] Improve immediate expansion
[PR106583]
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=a096036589d82175a0f729c2dab73c9a527d075d
Improve immediate expansion of immediates which can be created from a
bitmask immediate and 2 MOVKs. Simplify, refactor and improve efficiency
of bitmask checks. Move various immediate handling functions together
to avoid forward declarations.
This reduces the number of 4-instruction immediates in SPECINT/FP by 10-15%.
gcc/
PR target/106583
* config/aarch64/aarch64.cc (aarch64_internal_mov_immediate)
Add support for a bitmask immediate with 2 MOVKs.
(aarch64_check_bitmask): New function after refactorization.
(aarch64_bitmask_imm): Simplify replication of small modes.
Split function into 64-bit only version for efficiency.
(aarch64_move_imm): Move near other immediate functions.
(aarch64_uimm12_shift): Likewise.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movk_shift): Likewise.
(aarch64_replicate_bitmask_imm): Likewise.
(aarch64_and_split_imm1): Likewise.
(aarch64_and_split_imm2): Likewise.
(aarch64_and_bitmask_imm): Likewise.
(aarch64_movw_imm): Likewise.
gcc/testsuite/
PR target/106583
* gcc.target/aarch64/pr106583.c: Add new test.
---
gcc/config/aarch64/aarch64.cc | 485 +++++++++++---------
gcc/testsuite/gcc.target/aarch64/pr106583.c | 41 ++
2 files changed, 301 insertions(+), 225 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/pr106583.c
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index b4b646fa0..cf7736994 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -305,7 +305,6 @@ static bool aarch64_builtin_support_vector_misalignment (machine_mode mode,
static machine_mode aarch64_simd_container_mode (scalar_mode, poly_int64);
static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
aarch64_addr_query_type);
-static HOST_WIDE_INT aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val);
/* The processor for which instructions should be scheduled. */
enum aarch64_processor aarch64_tune = cortexa53;
@@ -5756,6 +5755,143 @@ aarch64_output_sve_vector_inc_dec (const char *operands, rtx x)
factor, nelts_per_vq);
}
+/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
+
+static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
+ {
+ 0x0000000100000001ull,
+ 0x0001000100010001ull,
+ 0x0101010101010101ull,
+ 0x1111111111111111ull,
+ 0x5555555555555555ull,
+ };
+
+
+
+/* Return true if 64-bit VAL is a valid bitmask immediate. */
+static bool
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
+{
+ unsigned HOST_WIDE_INT tmp, mask, first_one, next_one;
+ int bits;
+
+ /* Check for a single sequence of one bits and return quickly if so.
+ The special cases of all ones and all zeroes returns false. */
+ tmp = val + (val & -val);
+
+ if (tmp == (tmp & -tmp))
+ return (val + 1) > 1;
+
+ /* Invert if the immediate doesn't start with a zero bit - this means we
+ only need to search for sequences of one bits. */
+ if (val & 1)
+ val = ~val;
+
+ /* Find the first set bit and set tmp to val with the first sequence of one
+ bits removed. Return success if there is a single sequence of ones. */
+ first_one = val & -val;
+ tmp = val & (val + first_one);
+
+ if (tmp == 0)
+ return true;
+
+ /* Find the next set bit and compute the difference in bit position. */
+ next_one = tmp & -tmp;
+ bits = clz_hwi (first_one) - clz_hwi (next_one);
+ mask = val ^ tmp;
+
+ /* Check the bit position difference is a power of 2, and that the first
+ sequence of one bits fits within 'bits' bits. */
+ if ((mask >> bits) != 0 || bits != (bits & -bits))
+ return false;
+
+ /* Check the sequence of one bits is repeated 64/bits times. */
+ return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
+}
+
+
+/* Return true if VAL is a valid bitmask immediate for MODE. */
+bool
+aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+{
+ if (mode == DImode)
+ return aarch64_bitmask_imm (val_in);
+
+ unsigned HOST_WIDE_INT val = val_in;
+
+ if (mode == SImode)
+ return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
+
+ /* Replicate small immediates to fit 64 bits. */
+ int size = GET_MODE_UNIT_PRECISION (mode);
+ val &= (HOST_WIDE_INT_1U << size) - 1;
+ val *= bitmask_imm_mul[__builtin_clz (size) - 26];
+
+ return aarch64_bitmask_imm (val);
+}
+
+
+/* Return true if the immediate VAL can be a bitfield immediate
+ by changing the given MASK bits in VAL to zeroes, ones or bits
+ from the other half of VAL. Return the new immediate in VAL2. */
+static inline bool
+aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
+ unsigned HOST_WIDE_INT &val2,
+ unsigned HOST_WIDE_INT mask)
+{
+ val2 = val & ~mask;
+ if (val2 != val && aarch64_bitmask_imm (val2))
+ return true;
+ val2 = val | mask;
+ if (val2 != val && aarch64_bitmask_imm (val2))
+ return true;
+ val = val & ~mask;
+ val2 = val | (((val >> 32) | (val << 32)) & mask);
+ if (val2 != val && aarch64_bitmask_imm (val2))
+ return true;
+ val2 = val | (((val >> 16) | (val << 48)) & mask);
+ if (val2 != val && aarch64_bitmask_imm (val2))
+ return true;
+ return false;
+}
+
+
+/* Return true if val is an immediate that can be loaded into a
+ register by a MOVZ instruction. */
+static bool
+aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+{
+ if (GET_MODE_SIZE (mode) > 4)
+ {
+ if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
+ || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
+ return 1;
+ }
+ else
+ {
+ /* Ignore sign extension. */
+ val &= (HOST_WIDE_INT) 0xffffffff;
+ }
+ return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
+ || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+}
+
+
+/* Return true if VAL is an immediate that can be loaded into a
+ register in a single instruction. */
+bool
+aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
+{
+ scalar_int_mode int_mode;
+ if (!is_a <scalar_int_mode> (mode, &int_mode))
+ return false;
+
+ if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
+ return 1;
+ return aarch64_bitmask_imm (val, int_mode);
+}
+
+
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
scalar_int_mode mode)
@@ -5786,7 +5922,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
/* Check if we have to emit a second instruction by checking to see
- if any of the upper 32 bits of the original DI mode value is set. */
+ if any of the upper 32 bits of the original DI mode value is set. */
if (val == val2)
return 1;
@@ -5822,36 +5958,43 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
- if (zero_match != 2 && one_match != 2)
+ if (zero_match < 2 && one_match < 2)
{
/* Try emitting a bitmask immediate with a movk replacing 16 bits.
For a 64-bit bitmask try whether changing 16 bits to all ones or
zeroes creates a valid bitmask. To check any repeated bitmask,
try using 16 bits from the other 32-bit half of val. */
- for (i = 0; i < 64; i += 16, mask <<= 16)
- {
- val2 = val & ~mask;
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
- break;
- val2 = val | mask;
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
- break;
- val2 = val2 & ~mask;
- val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
- if (val2 != val && aarch64_bitmask_imm (val2, mode))
- break;
- }
- if (i != 64)
- {
- if (generate)
+ for (i = 0; i < 64; i += 16)
+ if (aarch64_check_bitmask (val, val2, mask << i))
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+ return 2;
+ }
+ }
+
+ /* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
+ if (zero_match + one_match == 0)
+ {
+ for (i = 0; i < 48; i += 16)
+ for (int j = i + 16; j < 64; j += 16)
+ if (aarch64_check_bitmask (val, val2, (mask << i) | (mask << j)))
{
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (j),
+ GEN_INT ((val >> j) & 0xffff)));
+ }
+ return 3;
}
- return 2;
- }
}
/* Generate 2-4 instructions, skipping 16 bits of all zeroes or ones which
@@ -5898,6 +6041,99 @@ aarch64_mov128_immediate (rtx imm)
}
+/* Return true if val can be encoded as a 12-bit unsigned immediate with
+ a left shift of 0 or 12 bits. */
+bool
+aarch64_uimm12_shift (HOST_WIDE_INT val)
+{
+ return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
+ || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
+ );
+}
+
+/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
+ that can be created with a left shift of 0 or 12. */
+static HOST_WIDE_INT
+aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+{
+ /* Check to see if the value fits in 24 bits, as that is the maximum we can
+ handle correctly. */
+ gcc_assert ((val & 0xffffff) == val);
+
+ if (((val & 0xfff) << 0) == val)
+ return val;
+
+ return val & (0xfff << 12);
+}
+
+
+/* Test whether:
+
+ X = (X & AND_VAL) | IOR_VAL;
+
+ can be implemented using:
+
+ MOVK X, #(IOR_VAL >> shift), LSL #shift
+
+ Return the shift if so, otherwise return -1. */
+int
+aarch64_movk_shift (const wide_int_ref &and_val,
+ const wide_int_ref &ior_val)
+{
+ unsigned int precision = and_val.get_precision ();
+ unsigned HOST_WIDE_INT mask = 0xffff;
+ for (unsigned int shift = 0; shift < precision; shift += 16)
+ {
+ if (and_val == ~mask && (ior_val & mask) == ior_val)
+ return shift;
+ mask <<= 16;
+ }
+ return -1;
+}
+
+/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
+ Assumed precondition: VAL_IN Is not zero. */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
+{
+ int lowest_bit_set = ctz_hwi (val_in);
+ int highest_bit_set = floor_log2 (val_in);
+ gcc_assert (val_in != 0);
+
+ return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
+ (HOST_WIDE_INT_1U << lowest_bit_set));
+}
+
+/* Create constant where bits outside of lowest bit set to highest bit set
+ are set to 1. */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
+{
+ return val_in | ~aarch64_and_split_imm1 (val_in);
+}
+
+/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
+
+bool
+aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
+{
+ scalar_int_mode int_mode;
+ if (!is_a <scalar_int_mode> (mode, &int_mode))
+ return false;
+
+ if (aarch64_bitmask_imm (val_in, int_mode))
+ return false;
+
+ if (aarch64_move_imm (val_in, int_mode))
+ return false;
+
+ unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
+
+ return aarch64_bitmask_imm (imm2, int_mode);
+}
+
/* Return the number of temporary registers that aarch64_add_offset_1
would need to add OFFSET to a register. */
@@ -10379,207 +10615,6 @@ aarch64_tls_referenced_p (rtx x)
}
-/* Return true if val can be encoded as a 12-bit unsigned immediate with
- a left shift of 0 or 12 bits. */
-bool
-aarch64_uimm12_shift (HOST_WIDE_INT val)
-{
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
- );
-}
-
-/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
- that can be created with a left shift of 0 or 12. */
-static HOST_WIDE_INT
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
-{
- /* Check to see if the value fits in 24 bits, as that is the maximum we can
- handle correctly. */
- gcc_assert ((val & 0xffffff) == val);
-
- if (((val & 0xfff) << 0) == val)
- return val;
-
- return val & (0xfff << 12);
-}
-
-/* Return true if val is an immediate that can be loaded into a
- register by a MOVZ instruction. */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
-{
- if (GET_MODE_SIZE (mode) > 4)
- {
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
- return 1;
- }
- else
- {
- /* Ignore sign extension. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- }
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
-}
-
-/* Test whether:
-
- X = (X & AND_VAL) | IOR_VAL;
-
- can be implemented using:
-
- MOVK X, #(IOR_VAL >> shift), LSL #shift
-
- Return the shift if so, otherwise return -1. */
-int
-aarch64_movk_shift (const wide_int_ref &and_val,
- const wide_int_ref &ior_val)
-{
- unsigned int precision = and_val.get_precision ();
- unsigned HOST_WIDE_INT mask = 0xffff;
- for (unsigned int shift = 0; shift < precision; shift += 16)
- {
- if (and_val == ~mask && (ior_val & mask) == ior_val)
- return shift;
- mask <<= 16;
- }
- return -1;
-}
-
-/* VAL is a value with the inner mode of MODE. Replicate it to fill a
- 64-bit (DImode) integer. */
-
-static unsigned HOST_WIDE_INT
-aarch64_replicate_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
-{
- unsigned int size = GET_MODE_UNIT_PRECISION (mode);
- while (size < 64)
- {
- val &= (HOST_WIDE_INT_1U << size) - 1;
- val |= val << size;
- size *= 2;
- }
- return val;
-}
-
-/* Multipliers for repeating bitmasks of width 32, 16, 8, 4, and 2. */
-
-static const unsigned HOST_WIDE_INT bitmask_imm_mul[] =
- {
- 0x0000000100000001ull,
- 0x0001000100010001ull,
- 0x0101010101010101ull,
- 0x1111111111111111ull,
- 0x5555555555555555ull,
- };
-
-
-/* Return true if val is a valid bitmask immediate. */
-
-bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
-{
- unsigned HOST_WIDE_INT val, tmp, mask, first_one, next_one;
- int bits;
-
- /* Check for a single sequence of one bits and return quickly if so.
- The special cases of all ones and all zeroes returns false. */
- val = aarch64_replicate_bitmask_imm (val_in, mode);
- tmp = val + (val & -val);
-
- if (tmp == (tmp & -tmp))
- return (val + 1) > 1;
-
- /* Replicate 32-bit immediates so we can treat them as 64-bit. */
- if (mode == SImode)
- val = (val << 32) | (val & 0xffffffff);
-
- /* Invert if the immediate doesn't start with a zero bit - this means we
- only need to search for sequences of one bits. */
- if (val & 1)
- val = ~val;
-
- /* Find the first set bit and set tmp to val with the first sequence of one
- bits removed. Return success if there is a single sequence of ones. */
- first_one = val & -val;
- tmp = val & (val + first_one);
-
- if (tmp == 0)
- return true;
-
- /* Find the next set bit and compute the difference in bit position. */
- next_one = tmp & -tmp;
- bits = clz_hwi (first_one) - clz_hwi (next_one);
- mask = val ^ tmp;
-
- /* Check the bit position difference is a power of 2, and that the first
- sequence of one bits fits within 'bits' bits. */
- if ((mask >> bits) != 0 || bits != (bits & -bits))
- return false;
-
- /* Check the sequence of one bits is repeated 64/bits times. */
- return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
-}
-
-/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.
- Assumed precondition: VAL_IN Is not zero. */
-
-unsigned HOST_WIDE_INT
-aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
-{
- int lowest_bit_set = ctz_hwi (val_in);
- int highest_bit_set = floor_log2 (val_in);
- gcc_assert (val_in != 0);
-
- return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
- (HOST_WIDE_INT_1U << lowest_bit_set));
-}
-
-/* Create constant where bits outside of lowest bit set to highest bit set
- are set to 1. */
-
-unsigned HOST_WIDE_INT
-aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
-{
- return val_in | ~aarch64_and_split_imm1 (val_in);
-}
-
-/* Return true if VAL_IN is a valid 'and' bitmask immediate. */
-
-bool
-aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
-{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- return false;
-
- if (aarch64_bitmask_imm (val_in, int_mode))
- return false;
-
- if (aarch64_move_imm (val_in, int_mode))
- return false;
-
- unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
-
- return aarch64_bitmask_imm (imm2, int_mode);
-}
-
-/* Return true if val is an immediate that can be loaded into a
- register in a single instruction. */
-bool
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
-{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- return false;
-
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
- return 1;
- return aarch64_bitmask_imm (val, int_mode);
-}
-
static bool
aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
diff --git a/gcc/testsuite/gcc.target/aarch64/pr106583.c b/gcc/testsuite/gcc.target/aarch64/pr106583.c
new file mode 100644
index 000000000..0f9315808
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr106583.c
@@ -0,0 +1,41 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps" } */
+
+long f1 (void)
+{
+ return 0x7efefefefefefeff;
+}
+
+long f2 (void)
+{
+ return 0x12345678aaaaaaaa;
+}
+
+long f3 (void)
+{
+ return 0x1234cccccccc5678;
+}
+
+long f4 (void)
+{
+ return 0x7777123456787777;
+}
+
+long f5 (void)
+{
+ return 0x5555555512345678;
+}
+
+long f6 (void)
+{
+ return 0x1234bbbb5678bbbb;
+}
+
+long f7 (void)
+{
+ return 0x4444123444445678;
+}
+
+
+/* { dg-final { scan-assembler-times {\tmovk\t} 14 } } */
+/* { dg-final { scan-assembler-times {\tmov\t} 7 } } */
--
2.33.0

View File

@ -0,0 +1,410 @@
From d76be4acadc0641cc8e795cd6b8a1c3c83b4fdb2 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wilco.dijkstra@arm.com>
Date: Mon, 5 Dec 2022 10:49:25 +0000
Subject: [PATCH 061/157] [Backport][SME] AArch64: Cleanup move immediate code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ba1536dac780f3f92c5eab999fda6931f6247fc1
Simplify, refactor and improve various move immediate functions.
Allow 32-bit MOVN/I as a valid 64-bit immediate which removes special
cases in aarch64_internal_mov_immediate. Add new constraint so the movdi
pattern only needs a single alternative for move immediate.
gcc/
* config/aarch64/aarch64.cc (aarch64_bitmask_imm): Use unsigned type.
(aarch64_is_mov_xn_imm): New function.
(aarch64_move_imm): Refactor, assert mode is SImode or DImode.
(aarch64_internal_mov_immediate): Assert mode is SImode or DImode.
Simplify special cases.
(aarch64_uimm12_shift): Simplify code.
(aarch64_clamp_to_uimm12_shift): Likewise.
(aarch64_movw_imm): Rename to aarch64_is_movz.
(aarch64_float_const_rtx_p): Pass either SImode or DImode to
aarch64_internal_mov_immediate.
(aarch64_rtx_costs): Likewise.
* config/aarch64/aarch64.md (movdi_aarch64): Merge 'N' and 'M'
constraints into single 'O'.
(mov<mode>_aarch64): Likewise.
* config/aarch64/aarch64-protos.h (aarch64_move_imm): Use unsigned.
(aarch64_bitmask_imm): Likewise.
(aarch64_uimm12_shift): Likewise.
(aarch64_is_mov_xn_imm): New prototype.
* config/aarch64/constraints.md: Add 'O' for 32/64-bit immediates,
limit 'N' to 64-bit only moves.
---
gcc/config/aarch64/aarch64-protos.h | 7 +-
gcc/config/aarch64/aarch64.cc | 158 ++++++++++++----------------
gcc/config/aarch64/aarch64.md | 17 ++-
gcc/config/aarch64/constraints.md | 5 +
4 files changed, 85 insertions(+), 102 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 97984f3ab..3ff1a0163 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -755,7 +755,7 @@ void aarch64_post_cfi_startproc (void);
poly_int64 aarch64_initial_elimination_offset (unsigned, unsigned);
int aarch64_get_condition_code (rtx);
bool aarch64_address_valid_for_prefetch_p (rtx, bool);
-bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+bool aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode);
unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
@@ -793,7 +793,7 @@ bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
-bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
+bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
machine_mode aarch64_sve_int_mode (machine_mode);
opt_machine_mode aarch64_sve_pred_mode (unsigned int);
machine_mode aarch64_sve_pred_mode (machine_mode);
@@ -843,8 +843,9 @@ bool aarch64_sve_float_arith_immediate_p (rtx, bool);
bool aarch64_sve_float_mul_immediate_p (rtx);
bool aarch64_split_dimode_const_store (rtx, rtx);
bool aarch64_symbolic_address_p (rtx);
-bool aarch64_uimm12_shift (HOST_WIDE_INT);
+bool aarch64_uimm12_shift (unsigned HOST_WIDE_INT);
int aarch64_movk_shift (const wide_int_ref &, const wide_int_ref &);
+bool aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
const char *aarch64_output_casesi (rtx *);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index cf7736994..acb659f53 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5812,12 +5812,10 @@ aarch64_bitmask_imm (unsigned HOST_WIDE_INT val)
/* Return true if VAL is a valid bitmask immediate for MODE. */
bool
-aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
+aarch64_bitmask_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
{
if (mode == DImode)
- return aarch64_bitmask_imm (val_in);
-
- unsigned HOST_WIDE_INT val = val_in;
+ return aarch64_bitmask_imm (val);
if (mode == SImode)
return aarch64_bitmask_imm ((val & 0xffffffff) | (val << 32));
@@ -5856,51 +5854,55 @@ aarch64_check_bitmask (unsigned HOST_WIDE_INT val,
}
-/* Return true if val is an immediate that can be loaded into a
- register by a MOVZ instruction. */
-static bool
-aarch64_movw_imm (HOST_WIDE_INT val, scalar_int_mode mode)
+/* Return true if VAL is a valid MOVZ immediate. */
+static inline bool
+aarch64_is_movz (unsigned HOST_WIDE_INT val)
{
- if (GET_MODE_SIZE (mode) > 4)
- {
- if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
- return 1;
- }
- else
- {
- /* Ignore sign extension. */
- val &= (HOST_WIDE_INT) 0xffffffff;
- }
- return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
+ return (val >> (ctz_hwi (val) & 48)) < 65536;
}
-/* Return true if VAL is an immediate that can be loaded into a
- register in a single instruction. */
+/* Return true if immediate VAL can be created by a 64-bit MOVI/MOVN/MOVZ. */
bool
-aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
+aarch64_is_mov_xn_imm (unsigned HOST_WIDE_INT val)
{
- scalar_int_mode int_mode;
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- return false;
+ return aarch64_is_movz (val) || aarch64_is_movz (~val)
+ || aarch64_bitmask_imm (val);
+}
- if (aarch64_movw_imm (val, int_mode) || aarch64_movw_imm (~val, int_mode))
- return 1;
- return aarch64_bitmask_imm (val, int_mode);
+
+/* Return true if VAL is an immediate that can be created by a single
+ MOV instruction. */
+bool
+aarch64_move_imm (unsigned HOST_WIDE_INT val, machine_mode mode)
+{
+ gcc_assert (mode == SImode || mode == DImode);
+
+ if (val < 65536)
+ return true;
+
+ unsigned HOST_WIDE_INT mask =
+ (val >> 32) == 0 || mode == SImode ? 0xffffffff : HOST_WIDE_INT_M1U;
+
+ if (aarch64_is_movz (val & mask) || aarch64_is_movz (~val & mask))
+ return true;
+
+ val = (val & mask) | ((val << 32) & ~mask);
+ return aarch64_bitmask_imm (val);
}
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
- scalar_int_mode mode)
+ machine_mode mode)
{
int i;
unsigned HOST_WIDE_INT val, val2, mask;
int one_match, zero_match;
int num_insns;
+ gcc_assert (mode == SImode || mode == DImode);
+
val = INTVAL (imm);
if (aarch64_move_imm (val, mode))
@@ -5910,31 +5912,6 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
return 1;
}
- /* Check to see if the low 32 bits are either 0xffffXXXX or 0xXXXXffff
- (with XXXX non-zero). In that case check to see if the move can be done in
- a smaller mode. */
- val2 = val & 0xffffffff;
- if (mode == DImode
- && aarch64_move_imm (val2, SImode)
- && (((val >> 32) & 0xffff) == 0 || (val >> 48) == 0))
- {
- if (generate)
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
-
- /* Check if we have to emit a second instruction by checking to see
- if any of the upper 32 bits of the original DI mode value is set. */
- if (val == val2)
- return 1;
-
- i = (val >> 48) ? 48 : 32;
-
- if (generate)
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
-
- return 2;
- }
-
if ((val >> 32) == 0 || mode == SImode)
{
if (generate)
@@ -5958,24 +5935,31 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
one_match = ((~val & mask) == 0) + ((~val & (mask << 16)) == 0) +
((~val & (mask << 32)) == 0) + ((~val & (mask << 48)) == 0);
+ /* Try a bitmask immediate and a movk to generate the immediate
+ in 2 instructions. */
+
if (zero_match < 2 && one_match < 2)
{
- /* Try emitting a bitmask immediate with a movk replacing 16 bits.
- For a 64-bit bitmask try whether changing 16 bits to all ones or
- zeroes creates a valid bitmask. To check any repeated bitmask,
- try using 16 bits from the other 32-bit half of val. */
-
for (i = 0; i < 64; i += 16)
- if (aarch64_check_bitmask (val, val2, mask << i))
- {
- if (generate)
- {
- emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
- emit_insn (gen_insv_immdi (dest, GEN_INT (i),
- GEN_INT ((val >> i) & 0xffff)));
- }
- return 2;
- }
+ {
+ if (aarch64_check_bitmask (val, val2, mask << i))
+ break;
+
+ val2 = val & ~(mask << i);
+ if ((val2 >> 32) == 0 && aarch64_move_imm (val2, DImode))
+ break;
+ }
+
+ if (i != 64)
+ {
+ if (generate)
+ {
+ emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
+ emit_insn (gen_insv_immdi (dest, GEN_INT (i),
+ GEN_INT ((val >> i) & 0xffff)));
+ }
+ return 2;
+ }
}
/* Try a bitmask plus 2 movk to generate the immediate in 3 instructions. */
@@ -6044,26 +6028,24 @@ aarch64_mov128_immediate (rtx imm)
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
-aarch64_uimm12_shift (HOST_WIDE_INT val)
+aarch64_uimm12_shift (unsigned HOST_WIDE_INT val)
{
- return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
- || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
- );
+ return val < 4096 || (val & 0xfff000) == val;
}
/* Returns the nearest value to VAL that will fit as a 12-bit unsigned immediate
that can be created with a left shift of 0 or 12. */
static HOST_WIDE_INT
-aarch64_clamp_to_uimm12_shift (HOST_WIDE_INT val)
+aarch64_clamp_to_uimm12_shift (unsigned HOST_WIDE_INT val)
{
/* Check to see if the value fits in 24 bits, as that is the maximum we can
handle correctly. */
- gcc_assert ((val & 0xffffff) == val);
+ gcc_assert (val < 0x1000000);
- if (((val & 0xfff) << 0) == val)
+ if (val < 4096)
return val;
- return val & (0xfff << 12);
+ return val & 0xfff000;
}
@@ -7211,8 +7193,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
return;
}
- aarch64_internal_mov_immediate (dest, imm, true,
- as_a <scalar_int_mode> (mode));
+ aarch64_internal_mov_immediate (dest, imm, true, mode);
}
/* Return the MEM rtx that provides the canary value that should be used
@@ -11410,9 +11391,7 @@ aarch64_float_const_rtx_p (rtx x)
&& SCALAR_FLOAT_MODE_P (mode)
&& aarch64_reinterpret_float_as_int (x, &ival))
{
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8) ? DImode : SImode;
int num_instr = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
return num_instr < 3;
@@ -14049,10 +14028,10 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
proportionally expensive to the number of instructions
required to build that constant. This is true whether we
are compiling for SPEED or otherwise. */
- if (!is_a <scalar_int_mode> (mode, &int_mode))
- int_mode = word_mode;
+ machine_mode imode = known_le (GET_MODE_SIZE (mode), 4)
+ ? SImode : DImode;
*cost = COSTS_N_INSNS (aarch64_internal_mov_immediate
- (NULL_RTX, x, false, int_mode));
+ (NULL_RTX, x, false, imode));
}
return true;
@@ -14068,9 +14047,8 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
bool succeed = aarch64_reinterpret_float_as_int (x, &ival);
gcc_assert (succeed);
- scalar_int_mode imode = (mode == HFmode
- ? SImode
- : int_mode_for_mode (mode).require ());
+ machine_mode imode = known_eq (GET_MODE_SIZE (mode), 8)
+ ? DImode : SImode;
int ncost = aarch64_internal_mov_immediate
(NULL_RTX, gen_int_mode (ival, imode), false, imode);
*cost += COSTS_N_INSNS (ncost);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7454a5c77..ea94152bf 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1288,16 +1288,15 @@
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
mov\\t%x0, %x1
mov\\t%0, %x1
mov\\t%x0, %1
- mov\\t%x0, %1
- mov\\t%w0, %1
+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
#
* return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
ldr\\t%x0, %1
@@ -1319,11 +1318,11 @@
DONE;
}"
;; The "mov_imm" type for CNTD is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,mov_imm,
+ [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
+ (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
)
(define_insn "insv_imm<mode>"
@@ -1487,7 +1486,7 @@
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")
- (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,O"))]
"TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
"@
@@ -1502,7 +1501,7 @@
ldr\\t%x0, %1
str\\t%x1, %0
mov\\t%x0, %x1
- mov\\t%x0, %1"
+ * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,neon_move,\
f_loadd,f_stored,load_8,store_8,mov_reg,\
fconstd")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index ee7587cca..750a42fb1 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -106,6 +106,11 @@
(define_constraint "N"
"A constant that can be used with a 64-bit MOV immediate operation."
+ (and (match_code "const_int")
+ (match_test "aarch64_is_mov_xn_imm (ival)")))
+
+(define_constraint "O"
+ "A constant that can be used with a 32 or 64-bit MOV immediate operation."
(and (match_code "const_int")
(match_test "aarch64_move_imm (ival, DImode)")))
--
2.33.0

View File

@ -0,0 +1,229 @@
From 5db3e7b68d5a443e908011b8d53de625ae462f82 Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Mon, 19 Jun 2023 15:55:28 +0100
Subject: [PATCH 062/157] [Backport][SME] AArch64: convert some patterns to
compact MD syntax
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c72a7b849853716d94e8d313be5dce3c22850113
Hi All,
This converts some patterns in the AArch64 backend to use the new
compact syntax.
gcc/ChangeLog:
* config/aarch64/aarch64.md (arches): Add nosimd.
(*mov<mode>_aarch64, *movsi_aarch64, *movdi_aarch64): Rewrite to
compact syntax.
---
gcc/config/aarch64/aarch64.md | 161 ++++++++++++++++------------------
1 file changed, 78 insertions(+), 83 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ea94152bf..5d02da42f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -378,7 +378,7 @@
;; As a convenience, "fp_q" means "fp" + the ability to move between
;; Q registers and is equivalent to "simd".
-(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd nosimd sve fp16])
(define_enum_attr "arch" "arches" (const_string "any"))
@@ -409,6 +409,9 @@
(and (eq_attr "arch" "fp_q, simd")
(match_test "TARGET_SIMD"))
+ (and (eq_attr "arch" "nosimd")
+ (match_test "!TARGET_SIMD"))
+
(and (eq_attr "arch" "fp16")
(match_test "TARGET_FP_F16INST"))
@@ -1194,26 +1197,27 @@
)
(define_insn "*mov<mode>_aarch64"
- [(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r, w,r ,r,w, m,m,r,w,w")
- (match_operand:SHORT 1 "aarch64_mov_operand" " r,M,D<hq>,Usv,m,m,rZ,w,w,rZ,w"))]
+ [(set (match_operand:SHORT 0 "nonimmediate_operand")
+ (match_operand:SHORT 1 "aarch64_mov_operand"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
- "@
- mov\t%w0, %w1
- mov\t%w0, %1
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
- ldr<size>\t%w0, %1
- ldr\t%<size>0, %1
- str<size>\t%w1, %0
- str\t%<size>1, %0
- * return TARGET_SIMD ? \"umov\t%w0, %1.<v>[0]\" : \"fmov\t%w0, %s1\";
- * return TARGET_SIMD ? \"dup\t%0.<Vallxd>, %w1\" : \"fmov\t%s0, %w1\";
- * return TARGET_SIMD ? \"dup\t%<Vetype>0, %1.<v>[0]\" : \"fmov\t%s0, %s1\";"
- ;; The "mov_imm" type for CNT is just a placeholder.
- [(set_attr "type" "mov_reg,mov_imm,neon_move,mov_imm,load_4,load_4,store_4,
- store_4,neon_to_gp<q>,neon_from_gp<q>,neon_dup")
- (set_attr "arch" "*,*,simd,sve,*,*,*,*,*,*,*")]
+ {@ [cons: =0, 1; attrs: type, arch]
+ [r, r ; mov_reg , * ] mov\t%w0, %w1
+ [r, M ; mov_imm , * ] mov\t%w0, %1
+ [w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
+ /* The "mov_imm" type for CNT is just a placeholder. */
+ [r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r, m ; load_4 , * ] ldr<size>\t%w0, %1
+ [w, m ; load_4 , * ] ldr\t%<size>0, %1
+ [m, r Z ; store_4 , * ] str<size>\\t%w1, %0
+ [m, w ; store_4 , * ] str\t%<size>1, %0
+ [r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0]
+ [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1 /*foo */
+ [w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
+ [w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
+ [w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
+ [w, w ; neon_dup , nosimd] fmov\t%s0, %s1
+ }
)
(define_expand "mov<mode>"
@@ -1250,79 +1254,70 @@
)
(define_insn_and_split "*movsi_aarch64"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m, m, r, r, r, w,r,w, w")
- (match_operand:SI 1 "aarch64_mov_operand" " r,r,k,M,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Ds"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand")
+ (match_operand:SI 1 "aarch64_mov_operand"))]
"(register_operand (operands[0], SImode)
|| aarch64_reg_or_zero (operands[1], SImode))"
- "@
- mov\\t%w0, %w1
- mov\\t%w0, %w1
- mov\\t%w0, %w1
- mov\\t%w0, %1
- #
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
- ldr\\t%w0, %1
- ldr\\t%s0, %1
- str\\t%w1, %0
- str\\t%s1, %0
- adrp\\t%x0, %A1\;ldr\\t%w0, [%x0, %L1]
- adr\\t%x0, %c1
- adrp\\t%x0, %A1
- fmov\\t%s0, %w1
- fmov\\t%w0, %s1
- fmov\\t%s0, %s1
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);"
+ {@ [cons: =0, 1; attrs: type, arch, length]
+ [r k, r ; mov_reg , * , 4] mov\t%w0, %w1
+ [r , k ; mov_reg , * , 4] ^
+ [r , M ; mov_imm , * , 4] mov\t%w0, %1
+ [r , n ; mov_imm , * ,16] #
+ /* The "mov_imm" type for CNT is just a placeholder. */
+ [r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r , m ; load_4 , * , 4] ldr\t%w0, %1
+ [w , m ; load_4 , fp , 4] ldr\t%s0, %1
+ [m , r Z; store_4 , * , 4] str\t%w1, %0
+ [m , w ; store_4 , fp , 4] str\t%s1, %0
+ [r , Usw; load_4 , * , 8] adrp\t%x0, %A1;ldr\t%w0, [%x0, %L1]
+ [r , Usa; adr , * , 4] adr\t%x0, %c1
+ [r , Ush; adr , * , 4] adrp\t%x0, %A1
+ [w , r Z; f_mcr , fp , 4] fmov\t%s0, %w1
+ [r , w ; f_mrc , fp , 4] fmov\t%w0, %s1
+ [w , w ; fmov , fp , 4] fmov\t%s0, %s1
+ [w , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
+ }
"CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
- [(const_int 0)]
- "{
- aarch64_expand_mov_immediate (operands[0], operands[1]);
- DONE;
- }"
- ;; The "mov_imm" type for CNT is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,load_4,
- load_4,store_4,store_4,load_4,adr,adr,f_mcr,f_mrc,fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")
-]
+ [(const_int 0)]
+ {
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }
)
(define_insn_and_split "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,r, r,w, m,m, r, r, r, w,r,w, w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,O,n,Usv,m,m,rZ,w,Usw,Usa,Ush,rZ,w,w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand")
+ (match_operand:DI 1 "aarch64_mov_operand"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
- "@
- mov\\t%x0, %x1
- mov\\t%0, %x1
- mov\\t%x0, %1
- * return aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? \"mov\\t%x0, %1\" : \"mov\\t%w0, %1\";
- #
- * return aarch64_output_sve_cnt_immediate (\"cnt\", \"%x0\", operands[1]);
- ldr\\t%x0, %1
- ldr\\t%d0, %1
- str\\t%x1, %0
- str\\t%d1, %0
- * return TARGET_ILP32 ? \"adrp\\t%0, %A1\;ldr\\t%w0, [%0, %L1]\" : \"adrp\\t%0, %A1\;ldr\\t%0, [%0, %L1]\";
- adr\\t%x0, %c1
- adrp\\t%x0, %A1
- fmov\\t%d0, %x1
- fmov\\t%x0, %d1
- fmov\\t%d0, %d1
- * return aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);"
- "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
- && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
- [(const_int 0)]
- "{
- aarch64_expand_mov_immediate (operands[0], operands[1]);
- DONE;
- }"
- ;; The "mov_imm" type for CNTD is just a placeholder.
- [(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,mov_imm,
- load_8,load_8,store_8,store_8,load_8,adr,adr,f_mcr,f_mrc,
- fmov,neon_move")
- (set_attr "arch" "*,*,*,*,*,sve,*,fp,*,fp,*,*,*,fp,fp,fp,simd")
- (set_attr "length" "4,4,4,4,*, 4,4, 4,4, 4,8,4,4, 4, 4, 4, 4")]
+ {@ [cons: =0, 1; attrs: type, arch, length]
+ [r, r ; mov_reg , * , 4] mov\t%x0, %x1
+ [k, r ; mov_reg , * , 4] mov\t%0, %x1
+ [r, k ; mov_reg , * , 4] mov\t%x0, %1
+ [r, O ; mov_imm , * , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? "mov\t%x0, %1" : "mov\t%w0, %1";
+ [r, n ; mov_imm , * ,16] #
+ /* The "mov_imm" type for CNT is just a placeholder. */
+ [r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r, m ; load_8 , * , 4] ldr\t%x0, %1
+ [w, m ; load_8 , fp , 4] ldr\t%d0, %1
+ [m, r Z; store_8 , * , 4] str\t%x1, %0
+ [m, w ; store_8 , fp , 4] str\t%d1, %0
+ [r, Usw; load_8 , * , 8] << TARGET_ILP32 ? "adrp\t%0, %A1;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1;ldr\t%0, [%0, %L1]";
+ [r, Usa; adr , * , 4] adr\t%x0, %c1
+ [r, Ush; adr , * , 4] adrp\t%x0, %A1
+ [w, r Z; f_mcr , fp , 4] fmov\t%d0, %x1
+ [r, w ; f_mrc , fp , 4] fmov\t%x0, %d1
+ [w, w ; fmov , fp , 4] fmov\t%d0, %d1
+ [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
+ }
+ "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), DImode)
+ && REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+ [(const_int 0)]
+ {
+ aarch64_expand_mov_immediate (operands[0], operands[1]);
+ DONE;
+ }
)
(define_insn "insv_imm<mode>"
--
2.33.0

View File

@ -0,0 +1,792 @@
From 46310765c05cde8732e07bfb0df9f0ec25a34018 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:18 +0000
Subject: [PATCH 063/157] [Backport][SME] aarch64: Use SVE's RDVL instruction
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=80f47d7bbe38234e1530d27fe5c2f130223ca7a0
We didn't previously use SVE's RDVL instruction, since the CNT*
forms are preferred and provide most of the range. However,
there are some cases that RDVL can handle and CNT* can't,
and using RDVL-like instructions becomes important for SME.
gcc/
* config/aarch64/aarch64-protos.h (aarch64_sve_rdvl_immediate_p)
(aarch64_output_sve_rdvl): Declare.
* config/aarch64/aarch64.cc (aarch64_sve_cnt_factor_p): New
function, split out from...
(aarch64_sve_cnt_immediate_p): ...here.
(aarch64_sve_rdvl_factor_p): New function.
(aarch64_sve_rdvl_immediate_p): Likewise.
(aarch64_output_sve_rdvl): Likewise.
(aarch64_offset_temporaries): Rewrite the SVE handling to use RDVL
for some cases.
(aarch64_expand_mov_immediate): Handle RDVL immediates.
(aarch64_mov_operand_p): Likewise.
* config/aarch64/constraints.md (Usr): New constraint.
* config/aarch64/aarch64.md (*mov<SHORT:mode>_aarch64): Add an RDVL
alternative.
(*movsi_aarch64, *movdi_aarch64): Likewise.
gcc/testsuite/
* gcc.target/aarch64/sve/acle/asm/cntb.c: Tweak expected output.
* gcc.target/aarch64/sve/acle/asm/cnth.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/cntw.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/cntd.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/prfb.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/prfh.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/prfw.c: Likewise.
* gcc.target/aarch64/sve/acle/asm/prfd.c: Likewise.
* gcc.target/aarch64/sve/loop_add_4.c: Expect RDVL to be used
to calculate the -17 and 17 factors.
* gcc.target/aarch64/sve/pcs/stack_clash_1.c: Likewise the 18 factor.
---
gcc/config/aarch64/aarch64-protos.h | 2 +
gcc/config/aarch64/aarch64.cc | 191 ++++++++++++------
gcc/config/aarch64/aarch64.md | 3 +
gcc/config/aarch64/constraints.md | 6 +
.../gcc.target/aarch64/sve/acle/asm/cntb.c | 71 +++++--
.../gcc.target/aarch64/sve/acle/asm/cntd.c | 12 +-
.../gcc.target/aarch64/sve/acle/asm/cnth.c | 20 +-
.../gcc.target/aarch64/sve/acle/asm/cntw.c | 16 +-
.../gcc.target/aarch64/sve/acle/asm/prfb.c | 6 +-
.../gcc.target/aarch64/sve/acle/asm/prfd.c | 4 +-
.../gcc.target/aarch64/sve/acle/asm/prfh.c | 4 +-
.../gcc.target/aarch64/sve/acle/asm/prfw.c | 4 +-
.../gcc.target/aarch64/sve/loop_add_4.c | 6 +-
.../aarch64/sve/pcs/stack_clash_1.c | 3 +-
14 files changed, 225 insertions(+), 123 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3ff1a0163..14a568140 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -802,6 +802,7 @@ bool aarch64_sve_mode_p (machine_mode);
HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
bool aarch64_sve_cnt_immediate_p (rtx);
bool aarch64_sve_scalar_inc_dec_immediate_p (rtx);
+bool aarch64_sve_rdvl_immediate_p (rtx);
bool aarch64_sve_addvl_addpl_immediate_p (rtx);
bool aarch64_sve_vector_inc_dec_immediate_p (rtx);
int aarch64_add_offset_temporaries (rtx);
@@ -814,6 +815,7 @@ char *aarch64_output_sve_prefetch (const char *, rtx, const char *);
char *aarch64_output_sve_cnt_immediate (const char *, const char *, rtx);
char *aarch64_output_sve_cnt_pat_immediate (const char *, const char *, rtx *);
char *aarch64_output_sve_scalar_inc_dec (rtx);
+char *aarch64_output_sve_rdvl (rtx);
char *aarch64_output_sve_addvl_addpl (rtx);
char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index acb659f53..4194dfc70 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5520,6 +5520,18 @@ aarch64_fold_sve_cnt_pat (aarch64_svpattern pattern, unsigned int nelts_per_vq)
return -1;
}
+/* Return true if a single CNT[BHWD] instruction can multiply FACTOR
+ by the number of 128-bit quadwords in an SVE vector. */
+
+static bool
+aarch64_sve_cnt_factor_p (HOST_WIDE_INT factor)
+{
+ /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
+ return (IN_RANGE (factor, 2, 16 * 16)
+ && (factor & 1) == 0
+ && factor <= 16 * (factor & -factor));
+}
+
/* Return true if we can move VALUE into a register using a single
CNT[BHWD] instruction. */
@@ -5527,11 +5539,7 @@ static bool
aarch64_sve_cnt_immediate_p (poly_int64 value)
{
HOST_WIDE_INT factor = value.coeffs[0];
- /* The coefficient must be [1, 16] * {2, 4, 8, 16}. */
- return (value.coeffs[1] == factor
- && IN_RANGE (factor, 2, 16 * 16)
- && (factor & 1) == 0
- && factor <= 16 * (factor & -factor));
+ return value.coeffs[1] == factor && aarch64_sve_cnt_factor_p (factor);
}
/* Likewise for rtx X. */
@@ -5647,6 +5655,50 @@ aarch64_output_sve_scalar_inc_dec (rtx offset)
-offset_value.coeffs[1], 0);
}
+/* Return true if a single RDVL instruction can multiply FACTOR by the
+ number of 128-bit quadwords in an SVE vector. */
+
+static bool
+aarch64_sve_rdvl_factor_p (HOST_WIDE_INT factor)
+{
+ return (multiple_p (factor, 16)
+ && IN_RANGE (factor, -32 * 16, 31 * 16));
+}
+
+/* Return true if we can move VALUE into a register using a single
+ RDVL instruction. */
+
+static bool
+aarch64_sve_rdvl_immediate_p (poly_int64 value)
+{
+ HOST_WIDE_INT factor = value.coeffs[0];
+ return value.coeffs[1] == factor && aarch64_sve_rdvl_factor_p (factor);
+}
+
+/* Likewise for rtx X. */
+
+bool
+aarch64_sve_rdvl_immediate_p (rtx x)
+{
+ poly_int64 value;
+ return poly_int_rtx_p (x, &value) && aarch64_sve_rdvl_immediate_p (value);
+}
+
+/* Return the asm string for moving RDVL immediate OFFSET into register
+ operand 0. */
+
+char *
+aarch64_output_sve_rdvl (rtx offset)
+{
+ static char buffer[sizeof ("rdvl\t%x0, #-") + 3 * sizeof (int)];
+ poly_int64 offset_value = rtx_to_poly_int64 (offset);
+ gcc_assert (aarch64_sve_rdvl_immediate_p (offset_value));
+
+ int factor = offset_value.coeffs[1];
+ snprintf (buffer, sizeof (buffer), "rdvl\t%%x0, #%d", factor / 16);
+ return buffer;
+}
+
/* Return true if we can add VALUE to a register using a single ADDVL
or ADDPL instruction. */
@@ -6227,13 +6279,13 @@ aarch64_offset_temporaries (bool add_p, poly_int64 offset)
count += 1;
else if (factor != 0)
{
- factor = abs (factor);
- if (factor > 16 * (factor & -factor))
- /* Need one register for the CNT result and one for the multiplication
- factor. If necessary, the second temporary can be reused for the
- constant part of the offset. */
+ factor /= (HOST_WIDE_INT) least_bit_hwi (factor);
+ if (!IN_RANGE (factor, -32, 31))
+ /* Need one register for the CNT or RDVL result and one for the
+ multiplication factor. If necessary, the second temporary
+ can be reused for the constant part of the offset. */
return 2;
- /* Need one register for the CNT result (which might then
+ /* Need one register for the CNT or RDVL result (which might then
be shifted). */
count += 1;
}
@@ -6322,85 +6374,100 @@ aarch64_add_offset (scalar_int_mode mode, rtx dest, rtx src,
/* Otherwise use a CNT-based sequence. */
else if (factor != 0)
{
- /* Use a subtraction if we have a negative factor. */
- rtx_code code = PLUS;
- if (factor < 0)
- {
- factor = -factor;
- code = MINUS;
- }
+ /* Calculate CNTB * FACTOR / 16 as CNTB * REL_FACTOR * 2**SHIFT,
+ with negative shifts indicating a shift right. */
+ HOST_WIDE_INT low_bit = least_bit_hwi (factor);
+ HOST_WIDE_INT rel_factor = factor / low_bit;
+ int shift = exact_log2 (low_bit) - 4;
+ gcc_assert (shift >= -4 && (rel_factor & 1) != 0);
+
+ /* Set CODE, VAL and SHIFT so that [+-] VAL * 2**SHIFT is
+ equal to CNTB * FACTOR / 16, with CODE being the [+-].
- /* Calculate CNTD * FACTOR / 2. First try to fold the division
- into the multiplication. */
+ We can avoid a multiplication if REL_FACTOR is in the range
+ of RDVL, although there are then various optimizations that
+ we can try on top. */
+ rtx_code code = PLUS;
rtx val;
- int shift = 0;
- if (factor & 1)
- /* Use a right shift by 1. */
- shift = -1;
- else
- factor /= 2;
- HOST_WIDE_INT low_bit = factor & -factor;
- if (factor <= 16 * low_bit)
+ if (IN_RANGE (rel_factor, -32, 31))
{
- if (factor > 16 * 8)
+ /* Try to use an unshifted CNT[BHWD] or RDVL. */
+ if (aarch64_sve_cnt_factor_p (factor)
+ || aarch64_sve_rdvl_factor_p (factor))
+ {
+ val = gen_int_mode (poly_int64 (factor, factor), mode);
+ shift = 0;
+ }
+ /* Try to subtract an unshifted CNT[BHWD]. */
+ else if (aarch64_sve_cnt_factor_p (-factor))
{
- /* "CNTB Xn, ALL, MUL #FACTOR" is out of range, so calculate
- the value with the minimum multiplier and shift it into
- position. */
- int extra_shift = exact_log2 (low_bit);
- shift += extra_shift;
- factor >>= extra_shift;
+ code = MINUS;
+ val = gen_int_mode (poly_int64 (-factor, -factor), mode);
+ shift = 0;
}
- val = gen_int_mode (poly_int64 (factor * 2, factor * 2), mode);
+ /* If subtraction is free, prefer to load a positive constant.
+ In the best case this will fit a shifted CNTB. */
+ else if (src != const0_rtx && rel_factor < 0)
+ {
+ code = MINUS;
+ val = gen_int_mode (-rel_factor * BYTES_PER_SVE_VECTOR, mode);
+ }
+ /* Otherwise use a shifted RDVL or CNT[BHWD]. */
+ else
+ val = gen_int_mode (rel_factor * BYTES_PER_SVE_VECTOR, mode);
}
else
{
- /* Base the factor on LOW_BIT if we can calculate LOW_BIT
- directly, since that should increase the chances of being
- able to use a shift and add sequence. If LOW_BIT itself
- is out of range, just use CNTD. */
- if (low_bit <= 16 * 8)
- factor /= low_bit;
+ /* If we can calculate CNTB << SHIFT directly, prefer to do that,
+ since it should increase the chances of being able to use
+ a shift and add sequence for the multiplication.
+ If CNTB << SHIFT is out of range, stick with the current
+ shift factor. */
+ if (IN_RANGE (low_bit, 2, 16 * 16))
+ {
+ val = gen_int_mode (poly_int64 (low_bit, low_bit), mode);
+ shift = 0;
+ }
else
- low_bit = 1;
+ val = gen_int_mode (BYTES_PER_SVE_VECTOR, mode);
- val = gen_int_mode (poly_int64 (low_bit * 2, low_bit * 2), mode);
val = aarch64_force_temporary (mode, temp1, val);
+ /* Prefer to multiply by a positive factor and subtract rather
+ than multiply by a negative factor and add, since positive
+ values are usually easier to move. */
+ if (rel_factor < 0 && src != const0_rtx)
+ {
+ rel_factor = -rel_factor;
+ code = MINUS;
+ }
+
if (can_create_pseudo_p ())
{
- rtx coeff1 = gen_int_mode (factor, mode);
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
val = expand_mult (mode, val, coeff1, NULL_RTX, true, true);
}
else
{
- /* Go back to using a negative multiplication factor if we have
- no register from which to subtract. */
- if (code == MINUS && src == const0_rtx)
- {
- factor = -factor;
- code = PLUS;
- }
- rtx coeff1 = gen_int_mode (factor, mode);
+ rtx coeff1 = gen_int_mode (rel_factor, mode);
coeff1 = aarch64_force_temporary (mode, temp2, coeff1);
val = gen_rtx_MULT (mode, val, coeff1);
}
}
+ /* Multiply by 2 ** SHIFT. */
if (shift > 0)
{
- /* Multiply by 1 << SHIFT. */
val = aarch64_force_temporary (mode, temp1, val);
val = gen_rtx_ASHIFT (mode, val, GEN_INT (shift));
}
- else if (shift == -1)
+ else if (shift < 0)
{
- /* Divide by 2. */
val = aarch64_force_temporary (mode, temp1, val);
- val = gen_rtx_ASHIFTRT (mode, val, const1_rtx);
+ val = gen_rtx_ASHIFTRT (mode, val, GEN_INT (-shift));
}
- /* Calculate SRC +/- CNTD * FACTOR / 2. */
+ /* Add the result to SRC or subtract the result from SRC. */
if (src != const0_rtx)
{
val = aarch64_force_temporary (mode, temp1, val);
@@ -7045,7 +7112,9 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
aarch64_report_sve_required ();
return;
}
- if (base == const0_rtx && aarch64_sve_cnt_immediate_p (offset))
+ if (base == const0_rtx
+ && (aarch64_sve_cnt_immediate_p (offset)
+ || aarch64_sve_rdvl_immediate_p (offset)))
emit_insn (gen_rtx_SET (dest, imm));
else
{
@@ -21751,7 +21820,9 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
if (SYMBOL_REF_P (x) && mode == DImode && CONSTANT_ADDRESS_P (x))
return true;
- if (TARGET_SVE && aarch64_sve_cnt_immediate_p (x))
+ if (TARGET_SVE
+ && (aarch64_sve_cnt_immediate_p (x)
+ || aarch64_sve_rdvl_immediate_p (x)))
return true;
return aarch64_classify_symbolic_expression (x)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 5d02da42f..c0977a3da 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1207,6 +1207,7 @@
[w, D<hq>; neon_move , simd ] << aarch64_output_scalar_simd_mov_immediate (operands[1], <MODE>mode);
/* The "mov_imm" type for CNT is just a placeholder. */
[r, Usv ; mov_imm , sve ] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r, Usr ; mov_imm , sve ] << aarch64_output_sve_rdvl (operands[1]);
[r, m ; load_4 , * ] ldr<size>\t%w0, %1
[w, m ; load_4 , * ] ldr\t%<size>0, %1
[m, r Z ; store_4 , * ] str<size>\\t%w1, %0
@@ -1265,6 +1266,7 @@
[r , n ; mov_imm , * ,16] #
/* The "mov_imm" type for CNT is just a placeholder. */
[r , Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r , Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
[r , m ; load_4 , * , 4] ldr\t%w0, %1
[w , m ; load_4 , fp , 4] ldr\t%s0, %1
[m , r Z; store_4 , * , 4] str\t%w1, %0
@@ -1299,6 +1301,7 @@
[r, n ; mov_imm , * ,16] #
/* The "mov_imm" type for CNT is just a placeholder. */
[r, Usv; mov_imm , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+ [r, Usr; mov_imm , sve, 4] << aarch64_output_sve_rdvl (operands[1]);
[r, m ; load_8 , * , 4] ldr\t%x0, %1
[w, m ; load_8 , fp , 4] ldr\t%d0, %1
[m, r Z; store_8 , * , 4] str\t%x1, %0
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 750a42fb1..212a73416 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -214,6 +214,12 @@
(and (match_code "const_int")
(match_test "aarch64_high_bits_all_ones_p (ival)")))
+(define_constraint "Usr"
+ "@internal
+ A constraint that matches a value produced by RDVL."
+ (and (match_code "const_poly_int")
+ (match_test "aarch64_sve_rdvl_immediate_p (op)")))
+
(define_constraint "Usv"
"@internal
A constraint that matches a VG-based constant that can be loaded by
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
index 8b8fe8e4f..a22d8a28d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntb.c
@@ -51,19 +51,24 @@ PROTO (cntb_15, uint64_t, ()) { return svcntb () * 15; }
*/
PROTO (cntb_16, uint64_t, ()) { return svcntb () * 16; }
-/* Other sequences would be OK. */
/*
** cntb_17:
-** cntb x0, all, mul #16
-** incb x0
+** rdvl x0, #17
** ret
*/
PROTO (cntb_17, uint64_t, ()) { return svcntb () * 17; }
+/*
+** cntb_31:
+** rdvl x0, #31
+** ret
+*/
+PROTO (cntb_31, uint64_t, ()) { return svcntb () * 31; }
+
/*
** cntb_32:
-** cntd (x[0-9]+)
-** lsl x0, \1, 8
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
** ret
*/
PROTO (cntb_32, uint64_t, ()) { return svcntb () * 32; }
@@ -80,16 +85,16 @@ PROTO (cntb_33, uint64_t, ()) { return svcntb () * 33; }
/*
** cntb_64:
-** cntd (x[0-9]+)
-** lsl x0, \1, 9
+** cntb (x[0-9]+)
+** lsl x0, \1, 6
** ret
*/
PROTO (cntb_64, uint64_t, ()) { return svcntb () * 64; }
/*
** cntb_128:
-** cntd (x[0-9]+)
-** lsl x0, \1, 10
+** cntb (x[0-9]+)
+** lsl x0, \1, 7
** ret
*/
PROTO (cntb_128, uint64_t, ()) { return svcntb () * 128; }
@@ -106,46 +111,70 @@ PROTO (cntb_129, uint64_t, ()) { return svcntb () * 129; }
/*
** cntb_m1:
-** cntb (x[0-9]+)
-** neg x0, \1
+** rdvl x0, #-1
** ret
*/
PROTO (cntb_m1, uint64_t, ()) { return -svcntb (); }
/*
** cntb_m13:
-** cntb (x[0-9]+), all, mul #13
-** neg x0, \1
+** rdvl x0, #-13
** ret
*/
PROTO (cntb_m13, uint64_t, ()) { return -svcntb () * 13; }
/*
** cntb_m15:
-** cntb (x[0-9]+), all, mul #15
-** neg x0, \1
+** rdvl x0, #-15
** ret
*/
PROTO (cntb_m15, uint64_t, ()) { return -svcntb () * 15; }
/*
** cntb_m16:
-** cntb (x[0-9]+), all, mul #16
-** neg x0, \1
+** rdvl x0, #-16
** ret
*/
PROTO (cntb_m16, uint64_t, ()) { return -svcntb () * 16; }
-/* Other sequences would be OK. */
/*
** cntb_m17:
-** cntb x0, all, mul #16
-** incb x0
-** neg x0, x0
+** rdvl x0, #-17
** ret
*/
PROTO (cntb_m17, uint64_t, ()) { return -svcntb () * 17; }
+/*
+** cntb_m32:
+** rdvl x0, #-32
+** ret
+*/
+PROTO (cntb_m32, uint64_t, ()) { return -svcntb () * 32; }
+
+/*
+** cntb_m33:
+** rdvl x0, #-32
+** decb x0
+** ret
+*/
+PROTO (cntb_m33, uint64_t, ()) { return -svcntb () * 33; }
+
+/*
+** cntb_m34:
+** rdvl (x[0-9]+), #-17
+** lsl x0, \1, #?1
+** ret
+*/
+PROTO (cntb_m34, uint64_t, ()) { return -svcntb () * 34; }
+
+/*
+** cntb_m64:
+** rdvl (x[0-9]+), #-1
+** lsl x0, \1, #?6
+** ret
+*/
+PROTO (cntb_m64, uint64_t, ()) { return -svcntb () * 64; }
+
/*
** incb_1:
** incb x0
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
index 0d0ed4849..090a643b4 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntd.c
@@ -54,8 +54,8 @@ PROTO (cntd_16, uint64_t, ()) { return svcntd () * 16; }
/* Other sequences would be OK. */
/*
** cntd_17:
-** cntb x0, all, mul #2
-** incd x0
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 3
** ret
*/
PROTO (cntd_17, uint64_t, ()) { return svcntd () * 17; }
@@ -107,8 +107,7 @@ PROTO (cntd_m15, uint64_t, ()) { return -svcntd () * 15; }
/*
** cntd_m16:
-** cntb (x[0-9]+), all, mul #2
-** neg x0, \1
+** rdvl x0, #-2
** ret
*/
PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
@@ -116,9 +115,8 @@ PROTO (cntd_m16, uint64_t, ()) { return -svcntd () * 16; }
/* Other sequences would be OK. */
/*
** cntd_m17:
-** cntb x0, all, mul #2
-** incd x0
-** neg x0, x0
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 3
** ret
*/
PROTO (cntd_m17, uint64_t, ()) { return -svcntd () * 17; }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
index c29930f15..1a4e7dc0e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cnth.c
@@ -54,8 +54,8 @@ PROTO (cnth_16, uint64_t, ()) { return svcnth () * 16; }
/* Other sequences would be OK. */
/*
** cnth_17:
-** cntb x0, all, mul #8
-** inch x0
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 1
** ret
*/
PROTO (cnth_17, uint64_t, ()) { return svcnth () * 17; }
@@ -69,16 +69,16 @@ PROTO (cnth_32, uint64_t, ()) { return svcnth () * 32; }
/*
** cnth_64:
-** cntd (x[0-9]+)
-** lsl x0, \1, 8
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
** ret
*/
PROTO (cnth_64, uint64_t, ()) { return svcnth () * 64; }
/*
** cnth_128:
-** cntd (x[0-9]+)
-** lsl x0, \1, 9
+** cntb (x[0-9]+)
+** lsl x0, \1, 6
** ret
*/
PROTO (cnth_128, uint64_t, ()) { return svcnth () * 128; }
@@ -109,8 +109,7 @@ PROTO (cnth_m15, uint64_t, ()) { return -svcnth () * 15; }
/*
** cnth_m16:
-** cntb (x[0-9]+), all, mul #8
-** neg x0, \1
+** rdvl x0, #-8
** ret
*/
PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
@@ -118,9 +117,8 @@ PROTO (cnth_m16, uint64_t, ()) { return -svcnth () * 16; }
/* Other sequences would be OK. */
/*
** cnth_m17:
-** cntb x0, all, mul #8
-** inch x0
-** neg x0, x0
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 1
** ret
*/
PROTO (cnth_m17, uint64_t, ()) { return -svcnth () * 17; }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
index e26cc67a4..9d1697690 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/cntw.c
@@ -54,8 +54,8 @@ PROTO (cntw_16, uint64_t, ()) { return svcntw () * 16; }
/* Other sequences would be OK. */
/*
** cntw_17:
-** cntb x0, all, mul #4
-** incw x0
+** rdvl (x[0-9]+), #17
+** asr x0, \1, 2
** ret
*/
PROTO (cntw_17, uint64_t, ()) { return svcntw () * 17; }
@@ -76,8 +76,8 @@ PROTO (cntw_64, uint64_t, ()) { return svcntw () * 64; }
/*
** cntw_128:
-** cntd (x[0-9]+)
-** lsl x0, \1, 8
+** cntb (x[0-9]+)
+** lsl x0, \1, 5
** ret
*/
PROTO (cntw_128, uint64_t, ()) { return svcntw () * 128; }
@@ -108,8 +108,7 @@ PROTO (cntw_m15, uint64_t, ()) { return -svcntw () * 15; }
/*
** cntw_m16:
-** cntb (x[0-9]+), all, mul #4
-** neg x0, \1
+** rdvl (x[0-9]+), #-4
** ret
*/
PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
@@ -117,9 +116,8 @@ PROTO (cntw_m16, uint64_t, ()) { return -svcntw () * 16; }
/* Other sequences would be OK. */
/*
** cntw_m17:
-** cntb x0, all, mul #4
-** incw x0
-** neg x0, x0
+** rdvl (x[0-9]+), #-17
+** asr x0, \1, 2
** ret
*/
PROTO (cntw_m17, uint64_t, ()) { return -svcntw () * 17; }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
index c90730a03..94cd3a066 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfb.c
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfb_vnum_31, uint16_t,
/*
** prfb_vnum_32:
-** cntd (x[0-9]+)
-** lsl (x[0-9]+), \1, #?8
+** cntb (x[0-9]+)
+** lsl (x[0-9]+), \1, #?5
** add (x[0-9]+), (\2, x0|x0, \2)
** prfb pldl1keep, p0, \[\3\]
** ret
@@ -240,7 +240,7 @@ TEST_PREFETCH (prfb_vnum_m32, uint16_t,
/*
** prfb_vnum_m33:
** ...
-** prfb pldl1keep, p0, \[x[0-9]+\]
+** prfb pldl1keep, p0, \[x[0-9]+(, x[0-9]+)?\]
** ret
*/
TEST_PREFETCH (prfb_vnum_m33, uint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
index 869ef3d3e..b7a116cf0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfd.c
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfd_vnum_31, uint16_t,
/*
** prfd_vnum_32:
-** cntd (x[0-9]+)
-** lsl (x[0-9]+), \1, #?8
+** cntb (x[0-9]+)
+** lsl (x[0-9]+), \1, #?5
** add (x[0-9]+), (\2, x0|x0, \2)
** prfd pldl1keep, p0, \[\3\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
index 45a735eae..9d3df6bd3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfh.c
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfh_vnum_31, uint16_t,
/*
** prfh_vnum_32:
-** cntd (x[0-9]+)
-** lsl (x[0-9]+), \1, #?8
+** cntb (x[0-9]+)
+** lsl (x[0-9]+), \1, #?5
** add (x[0-9]+), (\2, x0|x0, \2)
** prfh pldl1keep, p0, \[\3\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
index 444187f45..6962abab6 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/prfw.c
@@ -218,8 +218,8 @@ TEST_PREFETCH (prfw_vnum_31, uint16_t,
/*
** prfw_vnum_32:
-** cntd (x[0-9]+)
-** lsl (x[0-9]+), \1, #?8
+** cntb (x[0-9]+)
+** lsl (x[0-9]+), \1, #?5
** add (x[0-9]+), (\2, x0|x0, \2)
** prfw pldl1keep, p0, \[\3\]
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
index 9ead9c21b..7f02497e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/loop_add_4.c
@@ -68,8 +68,7 @@ TEST_ALL (LOOP)
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.s, w[0-9]+, w[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 2\]} 8 } } */
-/* 2 for the calculations of -17 and 17. */
-/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tincw\tx[0-9]+\n} 8 } } */
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #16\n} 1 } } */
/* { dg-final { scan-assembler-times {\tdecw\tz[0-9]+\.s, all, mul #15\n} 1 } } */
@@ -86,8 +85,7 @@ TEST_ALL (LOOP)
/* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.d, x[0-9]+, x[0-9]+\n} 3 } } */
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]+/z, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7]+, \[x[0-9]+, x[0-9]+, lsl 3\]} 8 } } */
-/* 2 for the calculations of -17 and 17. */
-/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 10 } } */
+/* { dg-final { scan-assembler-times {\tincd\tx[0-9]+\n} 8 } } */
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #16\n} 1 } } */
/* { dg-final { scan-assembler-times {\tdecd\tz[0-9]+\.d, all, mul #15\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
index 110947a6c..5de34fc61 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_1.c
@@ -6,8 +6,7 @@
/*
** test_1:
-** cntd x12, all, mul #9
-** lsl x12, x12, #?4
+** rdvl x12, #18
** mov x11, sp
** ...
** sub sp, sp, x12
--
2.33.0

View File

@ -0,0 +1,137 @@
From c0badff223a1f5ea5a0f75df72f5d0138d94d8e6 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:19 +0000
Subject: [PATCH 064/157] [Backport][SME] aarch64: Make AARCH64_FL_SVE
requirements explicit
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dd7aaef62a43efe52eece525eea4d7d252b0c148
So far, all intrinsics covered by the aarch64-sve-builtins*
framework have (naturally enough) required at least SVE.
However, arm_sme.h defines a couple of intrinsics that can
be called by any code. It's therefore necessary to make
the implicit SVE requirement explicit.
gcc/
* config/aarch64/aarch64-sve-builtins.cc (function_groups): Remove
implied requirement on SVE.
* config/aarch64/aarch64-sve-builtins-base.def: Explicitly require SVE.
* config/aarch64/aarch64-sve-builtins-sve2.def: Likewise.
---
.../aarch64/aarch64-sve-builtins-base.def | 10 +++++-----
.../aarch64/aarch64-sve-builtins-sve2.def | 18 +++++++++++++-----
gcc/config/aarch64/aarch64-sve-builtins.cc | 2 +-
3 files changed, 19 insertions(+), 11 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index ffdf7cb4c..3a58f76c3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -17,7 +17,7 @@
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-#define REQUIRED_EXTENSIONS 0
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE
DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz)
DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz)
DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit)
@@ -318,7 +318,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS AARCH64_FL_BF16
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16
DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
@@ -330,7 +330,7 @@ DEF_SVE_FUNCTION (svcvt, unary_convert, cvt_bfloat, mxz)
DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS AARCH64_FL_I8MM
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM
DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
@@ -339,11 +339,11 @@ DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS AARCH64_FL_F32MM
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F32MM
DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS AARCH64_FL_F64MM
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM
DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 635089ffc..d5f23a887 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -17,7 +17,7 @@
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE2
+#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2
DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
@@ -189,7 +189,9 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_AES)
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SVE2_AES)
DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
@@ -198,17 +200,23 @@ DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM)
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SVE2_BITPERM)
DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SHA3)
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SVE2_SHA3)
DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
#undef REQUIRED_EXTENSIONS
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE2 | AARCH64_FL_SVE2_SM4)
+#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
+ | AARCH64_FL_SVE2 \
+ | AARCH64_FL_SVE2_SM4)
DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 7924cdf0f..dde01f676 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -525,7 +525,7 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
static CONSTEXPR const function_group_info function_groups[] = {
#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
{ #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
- REQUIRED_EXTENSIONS | AARCH64_FL_SVE },
+ REQUIRED_EXTENSIONS },
#include "aarch64-sve-builtins.def"
};
--
2.33.0

View File

@ -0,0 +1,562 @@
From e99332e15895156632949f3b6c3080fc9d994b13 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:19 +0000
Subject: [PATCH 065/157] [Backport][SME] aarch64: Add group suffixes to SVE
intrinsics
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b607f197967e052d7d7e29f6b41eded18f8c65d
The SME2 ACLE adds a new "group" suffix component to the naming
convention for SVE intrinsics. This is also used in the new tuple
forms of the svreinterpret intrinsics.
This patch adds support for group suffixes and defines the
x2, x3 and x4 suffixes that are needed for the svreinterprets.
gcc/
* config/aarch64/aarch64-sve-builtins-shapes.cc (build_one): Take
a group suffix index parameter.
(build_32_64, build_all): Update accordingly. Iterate over all
group suffixes.
* config/aarch64/aarch64-sve-builtins-sve2.cc (svqrshl_impl::fold)
(svqshl_impl::fold, svrshl_impl::fold): Update function_instance
constructors.
* config/aarch64/aarch64-sve-builtins.cc (group_suffixes): New array.
(groups_none): New constant.
(function_groups): Initialize the groups field.
(function_instance::hash): Hash the group index.
(function_builder::get_name): Add the group suffix.
(function_builder::add_overloaded_functions): Iterate over all
group suffixes.
(function_resolver::lookup_form): Take a group suffix parameter.
(function_resolver::resolve_to): Likewise.
* config/aarch64/aarch64-sve-builtins.def (DEF_SVE_GROUP_SUFFIX): New
macro.
(x2, x3, x4): New group suffixes.
* config/aarch64/aarch64-sve-builtins.h (group_suffix_index): New enum.
(group_suffix_info): New structure.
(function_group_info::groups): New member variable.
(function_instance::group_suffix_id): Likewise.
(group_suffixes): New array.
(function_instance::operator==): Compare the group suffixes.
(function_instance::group_suffix): New function.
---
.../aarch64/aarch64-sve-builtins-shapes.cc | 53 ++++++------
.../aarch64/aarch64-sve-builtins-sve2.cc | 10 +--
gcc/config/aarch64/aarch64-sve-builtins.cc | 84 +++++++++++++------
gcc/config/aarch64/aarch64-sve-builtins.def | 9 ++
gcc/config/aarch64/aarch64-sve-builtins.h | 81 ++++++++++++++----
5 files changed, 165 insertions(+), 72 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 4fa4181b9..3ecef026c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -275,18 +275,20 @@ parse_signature (const function_instance &instance, const char *format,
}
/* Add one function instance for GROUP, using mode suffix MODE_SUFFIX_ID,
- the type suffixes at index TI and the predication suffix at index PI.
- The other arguments are as for build_all. */
+ the type suffixes at index TI, the group suffixes at index GI, and the
+ predication suffix at index PI. The other arguments are as for
+ build_all. */
static void
build_one (function_builder &b, const char *signature,
const function_group_info &group, mode_suffix_index mode_suffix_id,
- unsigned int ti, unsigned int pi, bool force_direct_overloads)
+ unsigned int ti, unsigned int gi, unsigned int pi,
+ bool force_direct_overloads)
{
/* Byte forms of svdupq take 16 arguments. */
auto_vec<tree, 16> argument_types;
function_instance instance (group.base_name, *group.base, *group.shape,
mode_suffix_id, group.types[ti],
- group.preds[pi]);
+ group.groups[gi], group.preds[pi]);
tree return_type = parse_signature (instance, signature, argument_types);
apply_predication (instance, return_type, argument_types);
b.add_unique_function (instance, return_type, argument_types,
@@ -312,24 +314,26 @@ build_32_64 (function_builder &b, const char *signature,
mode_suffix_index mode64, bool force_direct_overloads = false)
{
for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
- if (group.types[0][0] == NUM_TYPE_SUFFIXES)
- {
- gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
- build_one (b, signature, group, mode32, 0, pi,
- force_direct_overloads);
- build_one (b, signature, group, mode64, 0, pi,
- force_direct_overloads);
- }
- else
- for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
+ if (group.types[0][0] == NUM_TYPE_SUFFIXES)
{
- unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
- gcc_assert (bits == 32 || bits == 64);
- mode_suffix_index mode = bits == 32 ? mode32 : mode64;
- if (mode != MODE_none)
- build_one (b, signature, group, mode, ti, pi,
- force_direct_overloads);
+ gcc_assert (mode32 != MODE_none && mode64 != MODE_none);
+ build_one (b, signature, group, mode32, 0, gi, pi,
+ force_direct_overloads);
+ build_one (b, signature, group, mode64, 0, gi, pi,
+ force_direct_overloads);
}
+ else
+ for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
+ ++ti)
+ {
+ unsigned int bits = type_suffixes[group.types[ti][0]].element_bits;
+ gcc_assert (bits == 32 || bits == 64);
+ mode_suffix_index mode = bits == 32 ? mode32 : mode64;
+ if (mode != MODE_none)
+ build_one (b, signature, group, mode, ti, gi, pi,
+ force_direct_overloads);
+ }
}
/* For every type and predicate combination in GROUP, add one function
@@ -423,10 +427,11 @@ build_all (function_builder &b, const char *signature,
bool force_direct_overloads = false)
{
for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
- for (unsigned int ti = 0;
- ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
- build_one (b, signature, group, mode_suffix_id, ti, pi,
- force_direct_overloads);
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
+ for (unsigned int ti = 0;
+ ti == 0 || group.types[ti][0] != NUM_TYPE_SUFFIXES; ++ti)
+ build_one (b, signature, group, mode_suffix_id, ti, gi, pi,
+ force_direct_overloads);
}
/* TYPE is the largest type suffix associated with the arguments of R,
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index e066f096d..a94e5e269 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -252,7 +252,7 @@ public:
that we can use for sensible shift amounts. */
function_instance instance ("svqshl", functions::svqshl,
shapes::binary_int_opt_n, MODE_n,
- f.type_suffix_ids, f.pred);
+ f.type_suffix_ids, GROUP_none, f.pred);
return f.redirect_call (instance);
}
else
@@ -261,7 +261,7 @@ public:
that we can use for sensible shift amounts. */
function_instance instance ("svrshl", functions::svrshl,
shapes::binary_int_opt_n, MODE_n,
- f.type_suffix_ids, f.pred);
+ f.type_suffix_ids, GROUP_none, f.pred);
return f.redirect_call (instance);
}
}
@@ -290,7 +290,7 @@ public:
-wi::to_wide (amount));
function_instance instance ("svasr", functions::svasr,
shapes::binary_uint_opt_n, MODE_n,
- f.type_suffix_ids, f.pred);
+ f.type_suffix_ids, GROUP_none, f.pred);
if (f.type_suffix (0).unsigned_p)
{
instance.base_name = "svlsr";
@@ -322,7 +322,7 @@ public:
that we can use for sensible shift amounts. */
function_instance instance ("svlsl", functions::svlsl,
shapes::binary_uint_opt_n, MODE_n,
- f.type_suffix_ids, f.pred);
+ f.type_suffix_ids, GROUP_none, f.pred);
gcall *call = as_a <gcall *> (f.redirect_call (instance));
gimple_call_set_arg (call, 2, amount);
return call;
@@ -335,7 +335,7 @@ public:
-wi::to_wide (amount));
function_instance instance ("svrshr", functions::svrshr,
shapes::shift_right_imm, MODE_n,
- f.type_suffix_ids, f.pred);
+ f.type_suffix_ids, GROUP_none, f.pred);
gcall *call = as_a <gcall *> (f.redirect_call (instance));
gimple_call_set_arg (call, 2, amount);
return call;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index dde01f676..dc3fd80da 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -144,6 +144,13 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
0, VOIDmode }
};
+CONSTEXPR const group_suffix_info group_suffixes[] = {
+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) \
+ { "_" #NAME, VG, VECTORS_PER_TUPLE },
+#include "aarch64-sve-builtins.def"
+ { "", 0, 1 }
+};
+
/* Define a TYPES_<combination> macro for each combination of type
suffixes that an ACLE function can have, where <combination> is the
name used in DEF_SVE_FUNCTION entries.
@@ -483,6 +490,10 @@ DEF_SVE_TYPES_ARRAY (inc_dec_n);
DEF_SVE_TYPES_ARRAY (reinterpret);
DEF_SVE_TYPES_ARRAY (while);
+static const group_suffix_index groups_none[] = {
+ GROUP_none, NUM_GROUP_SUFFIXES
+};
+
/* Used by functions that have no governing predicate. */
static const predication_index preds_none[] = { PRED_none, NUM_PREDS };
@@ -524,8 +535,8 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
/* A list of all SVE ACLE functions. */
static CONSTEXPR const function_group_info function_groups[] = {
#define DEF_SVE_FUNCTION(NAME, SHAPE, TYPES, PREDS) \
- { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, preds_##PREDS, \
- REQUIRED_EXTENSIONS },
+ { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_none, \
+ preds_##PREDS, REQUIRED_EXTENSIONS },
#include "aarch64-sve-builtins.def"
};
@@ -788,6 +799,7 @@ function_instance::hash () const
h.add_int (mode_suffix_id);
h.add_int (type_suffix_ids[0]);
h.add_int (type_suffix_ids[1]);
+ h.add_int (group_suffix_id);
h.add_int (pred);
return h.end ();
}
@@ -957,6 +969,8 @@ function_builder::get_name (const function_instance &instance,
for (unsigned int i = 0; i < 2; ++i)
if (!overloaded_p || instance.shape->explicit_type_suffix_p (i))
append_name (instance.type_suffix (i).string);
+ if (!overloaded_p || instance.shape->explicit_group_suffix_p ())
+ append_name (instance.group_suffix ().string);
append_name (pred_suffixes[instance.pred]);
return finish_name ();
}
@@ -1113,19 +1127,26 @@ void
function_builder::add_overloaded_functions (const function_group_info &group,
mode_suffix_index mode)
{
- unsigned int explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
- unsigned int explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
- for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+ bool explicit_type0 = (*group.shape)->explicit_type_suffix_p (0);
+ bool explicit_type1 = (*group.shape)->explicit_type_suffix_p (1);
+ bool explicit_group = (*group.shape)->explicit_group_suffix_p ();
+ auto add_function = [&](const type_suffix_pair &types,
+ group_suffix_index group_suffix_id,
+ unsigned int pi)
+ {
+ function_instance instance (group.base_name, *group.base,
+ *group.shape, mode, types,
+ group_suffix_id, group.preds[pi]);
+ add_overloaded_function (instance, group.required_extensions);
+ };
+
+ auto add_group_suffix = [&](group_suffix_index group_suffix_id,
+ unsigned int pi)
{
if (!explicit_type0 && !explicit_type1)
- {
- /* Deal with the common case in which there is one overloaded
- function for all type combinations. */
- function_instance instance (group.base_name, *group.base,
- *group.shape, mode, types_none[0],
- group.preds[pi]);
- add_overloaded_function (instance, group.required_extensions);
- }
+ /* Deal with the common case in which there is one overloaded
+ function for all type combinations. */
+ add_function (types_none[0], group_suffix_id, pi);
else
for (unsigned int ti = 0; group.types[ti][0] != NUM_TYPE_SUFFIXES;
++ti)
@@ -1136,12 +1157,16 @@ function_builder::add_overloaded_functions (const function_group_info &group,
explicit_type0 ? group.types[ti][0] : NUM_TYPE_SUFFIXES,
explicit_type1 ? group.types[ti][1] : NUM_TYPE_SUFFIXES
};
- function_instance instance (group.base_name, *group.base,
- *group.shape, mode, types,
- group.preds[pi]);
- add_overloaded_function (instance, group.required_extensions);
+ add_function (types, group_suffix_id, pi);
}
- }
+ };
+
+ for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
+ if (explicit_group)
+ for (unsigned int gi = 0; group.groups[gi] != NUM_GROUP_SUFFIXES; ++gi)
+ add_group_suffix (group.groups[gi], pi);
+ else
+ add_group_suffix (GROUP_none, pi);
}
/* Register all the functions in GROUP. */
@@ -1213,29 +1238,34 @@ function_resolver::report_no_such_form (type_suffix_index type)
}
/* Silently check whether there is an instance of the function with the
- mode suffix given by MODE and the type suffixes given by TYPE0 and TYPE1.
- Return its function decl if so, otherwise return null. */
+ mode suffix given by MODE, the type suffixes given by TYPE0 and TYPE1,
+ and the group suffix given by GROUP. Return its function decl if so,
+ otherwise return null. */
tree
function_resolver::lookup_form (mode_suffix_index mode,
type_suffix_index type0,
- type_suffix_index type1)
+ type_suffix_index type1,
+ group_suffix_index group)
{
type_suffix_pair types = { type0, type1 };
- function_instance instance (base_name, base, shape, mode, types, pred);
+ function_instance instance (base_name, base, shape, mode, types,
+ group, pred);
registered_function *rfn
= function_table->find_with_hash (instance, instance.hash ());
return rfn ? rfn->decl : NULL_TREE;
}
-/* Resolve the function to one with the mode suffix given by MODE and the
- type suffixes given by TYPE0 and TYPE1. Return its function decl on
- success, otherwise report an error and return error_mark_node. */
+/* Resolve the function to one with the mode suffix given by MODE, the
+ type suffixes given by TYPE0 and TYPE1, and group suffix given by
+ GROUP. Return its function decl on success, otherwise report an
+ error and return error_mark_node. */
tree
function_resolver::resolve_to (mode_suffix_index mode,
type_suffix_index type0,
- type_suffix_index type1)
+ type_suffix_index type1,
+ group_suffix_index group)
{
- tree res = lookup_form (mode, type0, type1);
+ tree res = lookup_form (mode, type0, type1, group);
if (!res)
{
if (type1 == NUM_TYPE_SUFFIXES)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
index 6e4dcdbc9..d9bf9c350 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -29,6 +29,10 @@
#define DEF_SVE_TYPE_SUFFIX(A, B, C, D, E)
#endif
+#ifndef DEF_SVE_GROUP_SUFFIX
+#define DEF_SVE_GROUP_SUFFIX(A, B, C)
+#endif
+
#ifndef DEF_SVE_FUNCTION
#define DEF_SVE_FUNCTION(A, B, C, D)
#endif
@@ -95,10 +99,15 @@ DEF_SVE_TYPE_SUFFIX (u16, svuint16_t, unsigned, 16, VNx8HImode)
DEF_SVE_TYPE_SUFFIX (u32, svuint32_t, unsigned, 32, VNx4SImode)
DEF_SVE_TYPE_SUFFIX (u64, svuint64_t, unsigned, 64, VNx2DImode)
+DEF_SVE_GROUP_SUFFIX (x2, 0, 2)
+DEF_SVE_GROUP_SUFFIX (x3, 0, 3)
+DEF_SVE_GROUP_SUFFIX (x4, 0, 4)
+
#include "aarch64-sve-builtins-base.def"
#include "aarch64-sve-builtins-sve2.def"
#undef DEF_SVE_FUNCTION
+#undef DEF_SVE_GROUP_SUFFIX
#undef DEF_SVE_TYPE_SUFFIX
#undef DEF_SVE_TYPE
#undef DEF_SVE_MODE
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 824c31cd7..374c57e93 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -180,6 +180,17 @@ enum type_suffix_index
NUM_TYPE_SUFFIXES
};
+/* Enumerates the possible group suffixes. Each suffix combines two
+ optional pieces of information: the vector group size in a ZA index,
+ and the number of vectors in the largest tuple argument. */
+enum group_suffix_index
+{
+#define DEF_SVE_GROUP_SUFFIX(NAME, VG, VECTORS_PER_TUPLE) GROUP_##NAME,
+#include "aarch64-sve-builtins.def"
+ GROUP_none,
+ NUM_GROUP_SUFFIXES
+};
+
/* Combines two type suffixes. */
typedef enum type_suffix_index type_suffix_pair[2];
@@ -237,6 +248,21 @@ struct type_suffix_info
machine_mode vector_mode : 16;
};
+/* Static information about a group suffix. */
+struct group_suffix_info
+{
+ /* The suffix string itself. */
+ const char *string;
+
+ /* If the suffix describes a vector group in a ZA index, this is the
+ size of that group, otherwise it is zero. */
+ unsigned int vg;
+
+ /* The number of vectors in the largest (or only) tuple argument,
+ or 1 if the suffix does not convey this information. */
+ unsigned int vectors_per_tuple;
+};
+
/* Static information about a set of functions. */
struct function_group_info
{
@@ -251,14 +277,16 @@ struct function_group_info
shapes. */
const function_shape *const *shape;
- /* A list of the available type suffixes, and of the available predication
- types. The function supports every combination of the two.
+ /* A list of the available type suffixes, group suffixes, and predication
+ types. The function supports every combination of the three.
+
+ The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES.
+ It is lexicographically ordered based on the index value.
- The list of type suffixes is terminated by two NUM_TYPE_SUFFIXES
- while the list of predication types is terminated by NUM_PREDS.
- The list of type suffixes is lexicographically ordered based
- on the index value. */
+ The list of group suffixes is terminated by NUM_GROUP_SUFFIXES
+ and the list of predication types is terminated by NUM_PREDS. */
const type_suffix_pair *types;
+ const group_suffix_index *groups;
const predication_index *preds;
/* The architecture extensions that the functions require, as a set of
@@ -273,7 +301,8 @@ class GTY((user)) function_instance
public:
function_instance (const char *, const function_base *,
const function_shape *, mode_suffix_index,
- const type_suffix_pair &, predication_index);
+ const type_suffix_pair &, group_suffix_index,
+ predication_index);
bool operator== (const function_instance &) const;
bool operator!= (const function_instance &) const;
@@ -294,6 +323,8 @@ public:
units_index displacement_units () const;
const type_suffix_info &type_suffix (unsigned int) const;
+ const group_suffix_info &group_suffix () const;
+
tree scalar_type (unsigned int) const;
tree vector_type (unsigned int) const;
tree tuple_type (unsigned int) const;
@@ -301,14 +332,14 @@ public:
machine_mode vector_mode (unsigned int) const;
machine_mode gp_mode (unsigned int) const;
- /* The properties of the function. (The explicit "enum"s are required
- for gengtype.) */
+ /* The properties of the function. */
const char *base_name;
const function_base *base;
const function_shape *shape;
- enum mode_suffix_index mode_suffix_id;
+ mode_suffix_index mode_suffix_id;
type_suffix_pair type_suffix_ids;
- enum predication_index pred;
+ group_suffix_index group_suffix_id;
+ predication_index pred;
};
class registered_function;
@@ -390,10 +421,12 @@ public:
tree report_no_such_form (type_suffix_index);
tree lookup_form (mode_suffix_index,
type_suffix_index = NUM_TYPE_SUFFIXES,
- type_suffix_index = NUM_TYPE_SUFFIXES);
+ type_suffix_index = NUM_TYPE_SUFFIXES,
+ group_suffix_index = GROUP_none);
tree resolve_to (mode_suffix_index,
type_suffix_index = NUM_TYPE_SUFFIXES,
- type_suffix_index = NUM_TYPE_SUFFIXES);
+ type_suffix_index = NUM_TYPE_SUFFIXES,
+ group_suffix_index = GROUP_none);
type_suffix_index infer_integer_scalar_type (unsigned int);
type_suffix_index infer_pointer_type (unsigned int, bool = false);
@@ -641,6 +674,11 @@ class function_shape
public:
virtual bool explicit_type_suffix_p (unsigned int) const = 0;
+ /* True if the group suffix is present in overloaded names.
+ This isn't meaningful for pre-SME intrinsics, and true is
+ more common than false, so provide a default definition. */
+ virtual bool explicit_group_suffix_p () const { return true; }
+
/* Define all functions associated with the given group. */
virtual void build (function_builder &,
const function_group_info &) const = 0;
@@ -669,6 +707,7 @@ private:
extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
extern const mode_suffix_info mode_suffixes[MODE_none + 1];
+extern const group_suffix_info group_suffixes[NUM_GROUP_SUFFIXES];
extern tree scalar_types[NUM_VECTOR_TYPES];
extern tree acle_vector_types[MAX_TUPLE_SIZE][NUM_VECTOR_TYPES + 1];
@@ -728,9 +767,11 @@ function_instance (const char *base_name_in,
const function_shape *shape_in,
mode_suffix_index mode_suffix_id_in,
const type_suffix_pair &type_suffix_ids_in,
+ group_suffix_index group_suffix_id_in,
predication_index pred_in)
: base_name (base_name_in), base (base_in), shape (shape_in),
- mode_suffix_id (mode_suffix_id_in), pred (pred_in)
+ mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in),
+ pred (pred_in)
{
memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids));
}
@@ -741,9 +782,10 @@ function_instance::operator== (const function_instance &other) const
return (base == other.base
&& shape == other.shape
&& mode_suffix_id == other.mode_suffix_id
- && pred == other.pred
&& type_suffix_ids[0] == other.type_suffix_ids[0]
- && type_suffix_ids[1] == other.type_suffix_ids[1]);
+ && type_suffix_ids[1] == other.type_suffix_ids[1]
+ && group_suffix_id == other.group_suffix_id
+ && pred == other.pred);
}
inline bool
@@ -815,6 +857,13 @@ function_instance::type_suffix (unsigned int i) const
return type_suffixes[type_suffix_ids[i]];
}
+/* Return information about the function's group suffix. */
+inline const group_suffix_info &
+function_instance::group_suffix () const
+{
+ return group_suffixes[group_suffix_id];
+}
+
/* Return the scalar type associated with type suffix I. */
inline tree
function_instance::scalar_type (unsigned int i) const
--
2.33.0

View File

@ -0,0 +1,230 @@
From a32a9321b3336907fe2d17148cb9e4652642a3e6 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:20 +0000
Subject: [PATCH 066/157] [Backport][SME] aarch64: Add sve_type to SVE builtins
code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7f6de9861e5d7745a0af5174582519a39d545a92
Until now, the SVE ACLE code had mostly been able to represent
individual SVE arguments with just an element type suffix (s32, u32,
etc.). However, the SME2 ACLE provides many overloaded intrinsics
that operate on tuples rather than single vectors. This patch
therefore adds a new type (sve_type) that combines an element
type suffix with a vector count. This is enough to uniquely
represent all SVE ACLE types.
gcc/
* config/aarch64/aarch64-sve-builtins.h (sve_type): New struct.
(sve_type::operator==): New function.
(function_resolver::get_vector_type): Delete.
(function_resolver::report_no_such_form): Take an sve_type rather
than a type_suffix_index.
* config/aarch64/aarch64-sve-builtins.cc (get_vector_type): New
function.
(function_resolver::get_vector_type): Delete.
(function_resolver::report_no_such_form): Take an sve_type rather
than a type_suffix_index.
(find_sve_type): New function, split out from...
(function_resolver::infer_vector_or_tuple_type): ...here.
---
gcc/config/aarch64/aarch64-sve-builtins.cc | 93 ++++++++++++----------
gcc/config/aarch64/aarch64-sve-builtins.h | 37 ++++++++-
2 files changed, 88 insertions(+), 42 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index dc3fd80da..cc676bfe1 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -659,6 +659,14 @@ find_type_suffix_for_scalar_type (const_tree type)
return NUM_TYPE_SUFFIXES;
}
+/* Return the vector type associated with TYPE. */
+static tree
+get_vector_type (sve_type type)
+{
+ auto vector_type = type_suffixes[type.type].vector_type;
+ return acle_vector_types[type.num_vectors - 1][vector_type];
+}
+
/* Report an error against LOCATION that the user has tried to use
function FNDECL when extension EXTENSION is disabled. */
static void
@@ -1190,13 +1198,6 @@ function_resolver::function_resolver (location_t location,
{
}
-/* Return the vector type associated with type suffix TYPE. */
-tree
-function_resolver::get_vector_type (type_suffix_index type)
-{
- return acle_vector_types[0][type_suffixes[type].vector_type];
-}
-
/* Return the <stdint.h> name associated with TYPE. Using the <stdint.h>
name should be more user-friendly than the underlying canonical type,
since it makes the signedness and bitwidth explicit. */
@@ -1227,10 +1228,10 @@ function_resolver::scalar_argument_p (unsigned int i)
|| SCALAR_FLOAT_TYPE_P (type));
}
-/* Report that the function has no form that takes type suffix TYPE.
+/* Report that the function has no form that takes type TYPE.
Return error_mark_node. */
tree
-function_resolver::report_no_such_form (type_suffix_index type)
+function_resolver::report_no_such_form (sve_type type)
{
error_at (location, "%qE has no form that takes %qT arguments",
fndecl, get_vector_type (type));
@@ -1352,6 +1353,25 @@ function_resolver::infer_pointer_type (unsigned int argno,
return type;
}
+/* If TYPE is an SVE predicate or vector type, or a tuple of such a type,
+ return the associated sve_type, otherwise return an invalid sve_type. */
+static sve_type
+find_sve_type (const_tree type)
+{
+ /* A linear search should be OK here, since the code isn't hot and
+ the number of types is only small. */
+ for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
+ for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
+ {
+ vector_type_index type_i = type_suffixes[suffix_i].vector_type;
+ tree this_type = acle_vector_types[size_i][type_i];
+ if (this_type && matches_type_p (this_type, type))
+ return { type_suffix_index (suffix_i), size_i + 1 };
+ }
+
+ return {};
+}
+
/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
vectors; NUM_VECTORS is 1 for the former. Return the associated type
suffix on success, using TYPE_SUFFIX_b for predicates. Report an error
@@ -1364,37 +1384,30 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
if (actual == error_mark_node)
return NUM_TYPE_SUFFIXES;
- /* A linear search should be OK here, since the code isn't hot and
- the number of types is only small. */
- for (unsigned int size_i = 0; size_i < MAX_TUPLE_SIZE; ++size_i)
- for (unsigned int suffix_i = 0; suffix_i < NUM_TYPE_SUFFIXES; ++suffix_i)
- {
- vector_type_index type_i = type_suffixes[suffix_i].vector_type;
- tree type = acle_vector_types[size_i][type_i];
- if (type && matches_type_p (type, actual))
- {
- if (size_i + 1 == num_vectors)
- return type_suffix_index (suffix_i);
-
- if (num_vectors == 1)
- error_at (location, "passing %qT to argument %d of %qE, which"
- " expects a single SVE vector rather than a tuple",
- actual, argno + 1, fndecl);
- else if (size_i == 0 && type_i != VECTOR_TYPE_svbool_t)
- /* num_vectors is always != 1, so the singular isn't needed. */
- error_n (location, num_vectors, "%qT%d%qE%d",
- "passing single vector %qT to argument %d"
- " of %qE, which expects a tuple of %d vectors",
- actual, argno + 1, fndecl, num_vectors);
- else
- /* num_vectors is always != 1, so the singular isn't needed. */
- error_n (location, num_vectors, "%qT%d%qE%d",
- "passing %qT to argument %d of %qE, which"
- " expects a tuple of %d vectors", actual, argno + 1,
- fndecl, num_vectors);
- return NUM_TYPE_SUFFIXES;
- }
- }
+ if (auto sve_type = find_sve_type (actual))
+ {
+ if (sve_type.num_vectors == num_vectors)
+ return sve_type.type;
+
+ if (num_vectors == 1)
+ error_at (location, "passing %qT to argument %d of %qE, which"
+ " expects a single SVE vector rather than a tuple",
+ actual, argno + 1, fndecl);
+ else if (sve_type.num_vectors == 1
+ && sve_type.type != TYPE_SUFFIX_b)
+ /* num_vectors is always != 1, so the singular isn't needed. */
+ error_n (location, num_vectors, "%qT%d%qE%d",
+ "passing single vector %qT to argument %d"
+ " of %qE, which expects a tuple of %d vectors",
+ actual, argno + 1, fndecl, num_vectors);
+ else
+ /* num_vectors is always != 1, so the singular isn't needed. */
+ error_n (location, num_vectors, "%qT%d%qE%d",
+ "passing %qT to argument %d of %qE, which"
+ " expects a tuple of %d vectors", actual, argno + 1,
+ fndecl, num_vectors);
+ return NUM_TYPE_SUFFIXES;
+ }
if (num_vectors == 1)
error_at (location, "passing %qT to argument %d of %qE, which"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 374c57e93..f4f2c415f 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -263,6 +263,40 @@ struct group_suffix_info
unsigned int vectors_per_tuple;
};
+/* Represents an SVE vector, predicate, tuple of vectors, or tuple of
+ predicates. There is also a representation of "no type"/"invalid type". */
+struct sve_type
+{
+ sve_type () = default;
+ sve_type (type_suffix_index type) : type (type), num_vectors (1) {}
+ sve_type (type_suffix_index type, unsigned int num_vectors)
+ : type (type), num_vectors (num_vectors) {}
+
+ /* Return true if the type is valid. */
+ explicit operator bool () const { return type != NUM_TYPE_SUFFIXES; }
+
+ bool operator== (const sve_type &) const;
+ bool operator!= (const sve_type &x) const { return !operator== (x); }
+
+ /* This is one of:
+
+ - TYPE_SUFFIX_b for svbool_t-based types
+ - TYPE_SUFFIX_c for svcount_t-based types
+ - the type suffix of a data element for SVE data vectors and tuples
+ - NUM_TYPE_SUFFIXES for invalid types. */
+ type_suffix_index type = NUM_TYPE_SUFFIXES;
+
+ /* If the type is a tuple, this is the number of vectors in the tuple,
+ otherwise it is 1. */
+ unsigned int num_vectors = 1;
+};
+
+inline bool
+sve_type::operator== (const sve_type &other) const
+{
+ return type == other.type && num_vectors == other.num_vectors;
+}
+
/* Static information about a set of functions. */
struct function_group_info
{
@@ -413,12 +447,11 @@ public:
function_resolver (location_t, const function_instance &, tree,
vec<tree, va_gc> &);
- tree get_vector_type (type_suffix_index);
const char *get_scalar_type_name (type_suffix_index);
tree get_argument_type (unsigned int);
bool scalar_argument_p (unsigned int);
- tree report_no_such_form (type_suffix_index);
+ tree report_no_such_form (sve_type);
tree lookup_form (mode_suffix_index,
type_suffix_index = NUM_TYPE_SUFFIXES,
type_suffix_index = NUM_TYPE_SUFFIXES,
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,698 @@
From 6a7cb5074824416ae562de0589550a930e9dbcaf Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:21 +0000
Subject: [PATCH 068/157] [Backport][SME] aarch64: Replace vague "previous
arguments" message
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1b52d4b66e8b91ec1e3de9c0b79aaf258824b875
If an SVE ACLE intrinsic requires two arguments to have the
same type, the C resolver would report mismatches as "argument N
has type T2, but previous arguments had type T1". This patch makes
the message say which argument had type T1.
This is needed to give decent error messages for some SME cases.
gcc/
* config/aarch64/aarch64-sve-builtins.h
(function_resolver::require_matching_vector_type): Add a parameter
that specifies the number of the earlier argument that is being
matched against.
* config/aarch64/aarch64-sve-builtins.cc
(function_resolver::require_matching_vector_type): Likewise.
(require_derived_vector_type): Update calls accordingly.
(function_resolver::resolve_unary): Likewise.
(function_resolver::resolve_uniform): Likewise.
(function_resolver::resolve_uniform_opt_n): Likewise.
* config/aarch64/aarch64-sve-builtins-shapes.cc
(binary_long_lane_def::resolve): Likewise.
(clast_def::resolve, ternary_uint_def::resolve): Likewise.
gcc/testsuite/
* gcc.target/aarch64/sve/acle/general-c/*: Replace "but previous
arguments had" with "but argument N had".
---
.../aarch64/aarch64-sve-builtins-shapes.cc | 6 ++--
gcc/config/aarch64/aarch64-sve-builtins.cc | 17 +++++------
gcc/config/aarch64/aarch64-sve-builtins.h | 3 +-
.../aarch64/sve/acle/general-c/binary_1.c | 6 ++--
.../sve/acle/general-c/binary_lane_1.c | 2 +-
.../sve/acle/general-c/binary_long_lane_1.c | 2 +-
.../sve/acle/general-c/binary_long_opt_n_1.c | 8 +++---
.../acle/general-c/binary_narrowb_opt_n_1.c | 8 +++---
.../acle/general-c/binary_narrowt_opt_n_1.c | 8 +++---
.../sve/acle/general-c/binary_opt_n_2.c | 14 +++++-----
.../sve/acle/general-c/binary_opt_n_3.c | 16 +++++------
.../sve/acle/general-c/binary_rotate_1.c | 2 +-
.../sve/acle/general-c/binary_to_uint_1.c | 4 +--
.../aarch64/sve/acle/general-c/clast_1.c | 2 +-
.../aarch64/sve/acle/general-c/compare_1.c | 14 +++++-----
.../sve/acle/general-c/compare_opt_n_1.c | 14 +++++-----
.../aarch64/sve/acle/general-c/create_1.c | 6 ++--
.../aarch64/sve/acle/general-c/create_3.c | 6 ++--
.../aarch64/sve/acle/general-c/create_5.c | 6 ++--
.../aarch64/sve/acle/general-c/mmla_1.c | 14 +++++-----
.../sve/acle/general-c/ternary_lane_1.c | 4 +--
.../acle/general-c/ternary_lane_rotate_1.c | 4 +--
.../sve/acle/general-c/ternary_opt_n_1.c | 28 +++++++++----------
.../sve/acle/general-c/ternary_rotate_1.c | 4 +--
.../general-c/ternary_shift_right_imm_1.c | 6 ++--
.../sve/acle/general-c/ternary_uint_1.c | 6 ++--
.../aarch64/sve/acle/general-c/tmad_1.c | 2 +-
.../aarch64/sve/acle/general-c/unary_1.c | 8 +++---
.../aarch64/sve/acle/general-c/undeclared_2.c | 2 +-
29 files changed, 112 insertions(+), 110 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 3ecef026c..40aa418e0 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -1153,7 +1153,7 @@ struct binary_long_lane_def : public overloaded_base<0>
type_suffix_index type, result_type;
if (!r.check_gp_argument (3, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
- || !r.require_matching_vector_type (i + 1, type)
+ || !r.require_matching_vector_type (i + 1, i, type)
|| !r.require_integer_immediate (i + 2)
|| (result_type = long_type_suffix (r, type)) == NUM_TYPE_SUFFIXES)
return error_mark_node;
@@ -1608,7 +1608,7 @@ struct clast_def : public overloaded_base<0>
{
type_suffix_index type;
if ((type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
- || !r.require_matching_vector_type (i + 1, type))
+ || !r.require_matching_vector_type (i + 1, i, type))
return error_mark_node;
return r.resolve_to (MODE_none, type);
}
@@ -3108,7 +3108,7 @@ struct ternary_uint_def : public overloaded_base<0>
type_suffix_index type;
if (!r.check_gp_argument (3, i, nargs)
|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
- || !r.require_matching_vector_type (i + 1, type)
+ || !r.require_matching_vector_type (i + 1, i, type)
|| !r.require_derived_vector_type (i + 2, i, type, TYPE_unsigned))
return error_mark_node;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 4e94e3633..1545fd78d 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1561,11 +1561,12 @@ function_resolver::require_vector_type (unsigned int argno,
return true;
}
-/* Like require_vector_type, but TYPE is inferred from previous arguments
+/* Like require_vector_type, but TYPE is inferred from argument FIRST_ARGNO
rather than being a fixed part of the function signature. This changes
the nature of the error messages. */
bool
function_resolver::require_matching_vector_type (unsigned int argno,
+ unsigned int first_argno,
type_suffix_index type)
{
type_suffix_index new_type = infer_vector_type (argno);
@@ -1575,9 +1576,9 @@ function_resolver::require_matching_vector_type (unsigned int argno,
if (type != new_type)
{
error_at (location, "passing %qT to argument %d of %qE, but"
- " previous arguments had type %qT",
+ " argument %d had type %qT",
get_vector_type (new_type), argno + 1, fndecl,
- get_vector_type (type));
+ first_argno + 1, get_vector_type (type));
return false;
}
return true;
@@ -1626,7 +1627,7 @@ require_derived_vector_type (unsigned int argno,
{
/* There's no need to resolve this case out of order. */
gcc_assert (argno > first_argno);
- return require_matching_vector_type (argno, first_type);
+ return require_matching_vector_type (argno, first_argno, first_type);
}
/* Use FIRST_TYPE to get the expected type class and element size. */
@@ -2314,7 +2315,7 @@ function_resolver::resolve_unary (type_class_index merge_tclass,
so we can use normal left-to-right resolution. */
if ((type = infer_vector_type (0)) == NUM_TYPE_SUFFIXES
|| !require_vector_type (1, VECTOR_TYPE_svbool_t)
- || !require_matching_vector_type (2, type))
+ || !require_matching_vector_type (2, 0, type))
return error_mark_node;
}
else
@@ -2359,9 +2360,9 @@ function_resolver::resolve_uniform (unsigned int nops, unsigned int nimm)
|| (type = infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
return error_mark_node;
- i += 1;
+ unsigned int first_arg = i++;
for (; i < nargs - nimm; ++i)
- if (!require_matching_vector_type (i, type))
+ if (!require_matching_vector_type (i, first_arg, type))
return error_mark_node;
for (; i < nargs; ++i)
@@ -2390,7 +2391,7 @@ function_resolver::resolve_uniform_opt_n (unsigned int nops)
unsigned int first_arg = i++;
for (; i < nargs - 1; ++i)
- if (!require_matching_vector_type (i, type))
+ if (!require_matching_vector_type (i, first_arg, type))
return error_mark_node;
return finish_opt_n_resolution (i, first_arg, type);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 5a4f35123..f7d6cc084 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -476,7 +476,8 @@ public:
bool require_vector_or_scalar_type (unsigned int);
bool require_vector_type (unsigned int, vector_type_index);
- bool require_matching_vector_type (unsigned int, type_suffix_index);
+ bool require_matching_vector_type (unsigned int, unsigned int,
+ type_suffix_index);
bool require_derived_vector_type (unsigned int, unsigned int,
type_suffix_index,
type_class_index = SAME_TYPE_CLASS,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
index 4343146de..2e919d287 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_1.c
@@ -7,8 +7,8 @@ f1 (svbool_t pg, svuint8_t u8, svint16_t s16)
{
svzip1 (pg); /* { dg-error {too few arguments to function 'svzip1'} } */
svzip1 (pg, u8, u8); /* { dg-error {too many arguments to function 'svzip1'} } */
- svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but previous arguments had type 'svbool_t'} } */
- svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
- svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but previous arguments had type 'svuint8_t'} } */
+ svzip1 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svzip1', but argument 1 had type 'svbool_t'} } */
+ svzip1 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
+ svzip1 (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svzip1', but argument 1 had type 'svuint8_t'} } */
svzip1 (u8, 0); /* { dg-error {passing 'int' to argument 2 of 'svzip1', which expects an SVE type rather than a scalar} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
index 10b6b7e81..81533b25d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_lane_1.c
@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
svmul_lane (s32, s32, 0); /* { dg-error {ACLE function 'svmul_lane_s32' requires ISA extension 'sve2'} "" { xfail aarch64_sve2 } } */
svmul_lane (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmul_lane', which expects an SVE type rather than a scalar} } */
svmul_lane (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmul_lane', which expects an SVE type rather than a scalar} } */
- svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but previous arguments had type 'svfloat32_t'} } */
+ svmul_lane (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmul_lane', but argument 1 had type 'svfloat32_t'} } */
svmul_lane (f32, f32, s32); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */
svmul_lane (f32, f32, i); /* { dg-error {argument 3 of 'svmul_lane' must be an integer constant expression} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
index 805863f76..25b620877 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_lane_1.c
@@ -21,7 +21,7 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8, svint16_t s16, svuint16_t u16,
svmullb_lane (f64, f64, 0); /* { dg-error {'svmullb_lane' has no form that takes 'svfloat64_t' arguments} } */
svmullb_lane (1, u32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */
svmullb_lane (u32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svmullb_lane', which expects an SVE type rather than a scalar} } */
- svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but previous arguments had type 'svuint32_t'} } */
+ svmullb_lane (u32, s32, 0); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmullb_lane', but argument 1 had type 'svuint32_t'} } */
svmullb_lane (u32, u32, s32); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */
svmullb_lane (u32, u32, i); /* { dg-error {argument 3 of 'svmullb_lane' must be an integer constant expression} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
index ee704eeae..1f513dde9 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_long_opt_n_1.c
@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svaddlb (s64, s64); /* { dg-error {'svaddlb' has no form that takes 'svint64_t' arguments} } */
svaddlb (f16, f16); /* { dg-error {'svaddlb' has no form that takes 'svfloat16_t' arguments} } */
svaddlb (1, u8); /* { dg-error {passing 'int' to argument 1 of 'svaddlb', which expects an SVE type rather than a scalar} } */
- svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
- svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
- svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint8_t'} } */
- svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but previous arguments had type 'svuint16_t'} } */
+ svaddlb (u8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
+ svaddlb (u8, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
+ svaddlb (u8, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint8_t'} } */
+ svaddlb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddlb', but argument 1 had type 'svuint16_t'} } */
svaddlb (u8, 0);
svaddlb (u16, 0);
svaddlb (u32, 0);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
index 8ca549ba9..4a29b5c43 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowb_opt_n_1.c
@@ -24,10 +24,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svaddhnb (s64, s64);
svaddhnb (f32, f32); /* { dg-error {'svaddhnb' has no form that takes 'svfloat32_t' arguments} } */
svaddhnb (1, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnb', which expects an SVE type rather than a scalar} } */
- svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
- svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
- svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
- svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but previous arguments had type 'svuint16_t'} } */
+ svaddhnb (u16, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
+ svaddhnb (u16, s16); /* { dg-error {passing 'svint16_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
+ svaddhnb (u16, u32); /* { dg-error {passing 'svuint32_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
+ svaddhnb (u16, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svaddhnb', but argument 1 had type 'svuint16_t'} } */
svaddhnb (u8, 0); /* { dg-error {'svaddhnb' has no form that takes 'svuint8_t' arguments} } */
svaddhnb (u16, 0);
svaddhnb (u32, 0);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
index 2b537965b..4a442616e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_narrowt_opt_n_1.c
@@ -28,10 +28,10 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svaddhnt (f16, f32, f32); /* { dg-error {'svaddhnt' has no form that takes 'svfloat32_t' arguments} } */
svaddhnt (1, u16, u16); /* { dg-error {passing 'int' to argument 1 of 'svaddhnt', which expects an SVE type rather than a scalar} } */
svaddhnt (u8, 1, u16); /* { dg-error {passing 'int' to argument 2 of 'svaddhnt', which expects an SVE type rather than a scalar} } */
- svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
- svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
- svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
- svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but previous arguments had type 'svuint16_t'} } */
+ svaddhnt (u8, u16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
+ svaddhnt (u8, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
+ svaddhnt (u8, u16, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
+ svaddhnt (u8, u16, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svaddhnt', but argument 2 had type 'svuint16_t'} } */
svaddhnt (u8, u8, 0); /* { dg-error {'svaddhnt' has no form that takes 'svuint8_t' arguments} } */
svaddhnt (u16, u16, 0); /* { dg-error {passing 'svuint16_t' instead of the expected 'svuint8_t' to argument 1 of 'svaddhnt', after passing 'svuint16_t' to argument 2} } */
svaddhnt (s8, u16, 0); /* { dg-error {arguments 1 and 2 of 'svaddhnt' must have the same signedness, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
index a151f90d1..40447cf83 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_2.c
@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svadd_x (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svadd_x', which expects 'svbool_t'} } */
svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */
svadd_x (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svadd_x', which expects an SVE type rather than a scalar} } */
- svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
+ svadd_x (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
svadd_x (pg, u8, u8);
- svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
- svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
- svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
- svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but previous arguments had type 'svuint8_t'} } */
+ svadd_x (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
+ svadd_x (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
+ svadd_x (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
+ svadd_x (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svadd_x', but argument 2 had type 'svuint8_t'} } */
svadd_x (pg, u8, 0);
- svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */
- svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but previous arguments had type 'svfloat16_t'} } */
+ svadd_x (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */
+ svadd_x (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svadd_x', but argument 2 had type 'svfloat16_t'} } */
svadd_x (pg, f16, f16);
svadd_x (pg, f16, 1);
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
index 70ec9c585..94e20bc91 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_opt_n_3.c
@@ -11,19 +11,19 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svand_z (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svand_z', which expects 'svbool_t'} } */
svand_z (pg, pg, pg);
svand_z (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svand_z', which expects an SVE type rather than a scalar} } */
- svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
+ svand_z (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
svand_z (pg, u8, u8);
- svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
- svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
- svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
- svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but previous arguments had type 'svuint8_t'} } */
+ svand_z (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
+ svand_z (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
+ svand_z (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
+ svand_z (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svand_z', but argument 2 had type 'svuint8_t'} } */
svand_z (pg, u8, 0);
- svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but previous arguments had type 'svbool_t'} } */
+ svand_z (pg, pg, u8); /* { dg-error {passing 'svuint8_t' to argument 3 of 'svand_z', but argument 2 had type 'svbool_t'} } */
svand_z (pg, pg, 0); /* { dg-error {passing 'int' to argument 3 of 'svand_z', but its 'svbool_t' form does not accept scalars} } */
- svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */
- svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but previous arguments had type 'svfloat16_t'} } */
+ svand_z (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */
+ svand_z (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svand_z', but argument 2 had type 'svfloat16_t'} } */
svand_z (pg, f16, f16); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */
svand_z (pg, f16, 1); /* { dg-error {'svand_z' has no form that takes 'svfloat16_t' arguments} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
index 7669e4a02..8939ce258 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_rotate_1.c
@@ -12,7 +12,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
svcadd_x (pg, s32, s32, 90); /* { dg-error {'svcadd_x' has no form that takes 'svint32_t' arguments} } */
svcadd_x (pg, 1, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcadd_x', which expects an SVE type rather than a scalar} } */
svcadd_x (pg, f32, 1, 90); /* { dg-error {passing 'int' to argument 3 of 'svcadd_x', which expects an SVE type rather than a scalar} } */
- svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but previous arguments had type 'svfloat32_t'} } */
+ svcadd_x (pg, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcadd_x', but argument 2 had type 'svfloat32_t'} } */
svcadd_x (pg, f32, f32, s32); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */
svcadd_x (pg, f32, f32, i); /* { dg-error {argument 4 of 'svcadd_x' must be an integer constant expression} } */
svcadd_x (pg, f32, f32, -90); /* { dg-error {passing -90 to argument 4 of 'svcadd_x', which expects either 90 or 270} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
index 154662487..2c3fe5df1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/binary_to_uint_1.c
@@ -12,8 +12,8 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32)
svhistcnt_z (0, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */
svhistcnt_z (s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 1 of 'svhistcnt_z', which expects 'svbool_t'} } */
svhistcnt_z (pg, 0, s32); /* { dg-error {passing 'int' to argument 2 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */
- svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svbool_t'} } */
- svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but previous arguments had type 'svint32_t'} } */
+ svhistcnt_z (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svbool_t'} } */
+ svhistcnt_z (pg, s32, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svhistcnt_z', but argument 2 had type 'svint32_t'} } */
svhistcnt_z (pg, s32, 0); /* { dg-error {passing 'int' to argument 3 of 'svhistcnt_z', which expects an SVE type rather than a scalar} } */
svhistcnt_z (pg, pg, pg); /* { dg-error {'svhistcnt_z' has no form that takes 'svbool_t' arguments} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
index ba1b2520f..47ce47328 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/clast_1.c
@@ -10,6 +10,6 @@ test (svbool_t pg, svint32_t s32, svint64_t s64, int i)
svclasta (pg, 1, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */
svclasta (pg, i, s32);
svclasta (pg, s32, 1); /* { dg-error {passing 'int' to argument 3 of 'svclasta', which expects an SVE type rather than a scalar} } */
- svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but previous arguments had type 'svint32_t'} } */
+ svclasta (pg, s32, s64); /* { dg-error {passing 'svint64_t' to argument 3 of 'svclasta', but argument 2 had type 'svint32_t'} } */
svclasta (pg, pg, pg); /* { dg-error {'svclasta' has no form that takes 'svbool_t' arguments} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
index 5474124cc..0dd0ad910 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_1.c
@@ -13,15 +13,15 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svmatch (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmatch', which expects 'svbool_t'} } */
svmatch (pg, pg, pg); /* { dg-error {'svmatch' has no form that takes 'svbool_t' arguments} } */
svmatch (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svmatch', which expects an SVE type rather than a scalar} } */
- svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
+ svmatch (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
svmatch (pg, u8, u8);
- svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
- svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
- svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
- svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but previous arguments had type 'svuint8_t'} } */
+ svmatch (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
+ svmatch (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
+ svmatch (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
+ svmatch (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmatch', but argument 2 had type 'svuint8_t'} } */
svmatch (pg, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svmatch', which expects an SVE type rather than a scalar} } */
- svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */
- svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but previous arguments had type 'svfloat16_t'} } */
+ svmatch (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */
+ svmatch (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmatch', but argument 2 had type 'svfloat16_t'} } */
svmatch (pg, f16, f16); /* { dg-error {'svmatch' has no form that takes 'svfloat16_t' arguments} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
index 6faa73972..cfa50d387 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/compare_opt_n_1.c
@@ -11,16 +11,16 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svcmpeq (u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svcmpeq', which expects 'svbool_t'} } */
svcmpeq (pg, pg, pg); /* { dg-error {'svcmpeq' has no form that takes 'svbool_t' arguments} } */
svcmpeq (pg, 1, u8); /* { dg-error {passing 'int' to argument 2 of 'svcmpeq', which expects an SVE type rather than a scalar} } */
- svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
+ svcmpeq (pg, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
svcmpeq (pg, u8, u8);
- svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
- svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
- svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
- svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svuint8_t'} } */
+ svcmpeq (pg, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
+ svcmpeq (pg, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
+ svcmpeq (pg, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
+ svcmpeq (pg, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svuint8_t'} } */
svcmpeq (pg, u8, 0);
- svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */
- svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but previous arguments had type 'svfloat16_t'} } */
+ svcmpeq (pg, f16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */
+ svcmpeq (pg, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svcmpeq', but argument 2 had type 'svfloat16_t'} } */
svcmpeq (pg, f16, f16);
svcmpeq (pg, f16, 1);
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
index 83e4a5600..7a617aa15 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_1.c
@@ -10,11 +10,11 @@ f1 (svuint8x2_t *ptr, svbool_t pg, svuint8_t u8, svfloat64_t f64,
*ptr = svcreate2 (u8); /* { dg-error {too few arguments to function 'svcreate2'} } */
*ptr = svcreate2 (u8, u8, u8); /* { dg-error {too many arguments to function 'svcreate2'} } */
*ptr = svcreate2 (u8x2, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 1 of 'svcreate2', which expects a single SVE vector rather than a tuple} } */
- *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */
- *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but previous arguments had type 'svuint8_t'} } */
+ *ptr = svcreate2 (u8, f64); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */
+ *ptr = svcreate2 (u8, pg); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate2', but argument 1 had type 'svuint8_t'} } */
*ptr = svcreate2 (u8, x); /* { dg-error {passing 'int' to argument 2 of 'svcreate2', which expects an SVE type rather than a scalar} } */
*ptr = svcreate2 (x, u8); /* { dg-error {passing 'int' to argument 1 of 'svcreate2', which expects an SVE type rather than a scalar} } */
- *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but previous arguments had type 'svbool_t'} } */
+ *ptr = svcreate2 (pg, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svcreate2', but argument 1 had type 'svbool_t'} } */
*ptr = svcreate2 (pg, pg); /* { dg-error {'svcreate2' has no form that takes 'svbool_t' arguments} } */
*ptr = svcreate2 (u8, u8);
*ptr = svcreate2 (f64, f64); /* { dg-error {incompatible types when assigning to type 'svuint8x2_t' from type 'svfloat64x2_t'} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
index e3302f7e7..40f3a1fed 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_3.c
@@ -11,11 +11,11 @@ f1 (svfloat16x3_t *ptr, svbool_t pg, svfloat16_t f16, svfloat64_t f64,
*ptr = svcreate3 (f16, f16); /* { dg-error {too few arguments to function 'svcreate3'} } */
*ptr = svcreate3 (f16, f16, f16, f16); /* { dg-error {too many arguments to function 'svcreate3'} } */
*ptr = svcreate3 (f16x3, f16x3, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 1 of 'svcreate3', which expects a single SVE vector rather than a tuple} } */
- *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */
- *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but previous arguments had type 'svfloat16_t'} } */
+ *ptr = svcreate3 (f16, f16, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */
+ *ptr = svcreate3 (f16, pg, f16); /* { dg-error {passing 'svbool_t' to argument 2 of 'svcreate3', but argument 1 had type 'svfloat16_t'} } */
*ptr = svcreate3 (f16, x, f16); /* { dg-error {passing 'int' to argument 2 of 'svcreate3', which expects an SVE type rather than a scalar} } */
*ptr = svcreate3 (x, f16, f16); /* { dg-error {passing 'int' to argument 1 of 'svcreate3', which expects an SVE type rather than a scalar} } */
- *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but previous arguments had type 'svbool_t'} } */
+ *ptr = svcreate3 (pg, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svcreate3', but argument 1 had type 'svbool_t'} } */
*ptr = svcreate3 (pg, pg, pg); /* { dg-error {'svcreate3' has no form that takes 'svbool_t' arguments} } */
*ptr = svcreate3 (f16, f16, f16);
*ptr = svcreate3 (f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svfloat16x3_t' from type 'svfloat64x3_t'} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
index c850c94f0..bf3dd5d75 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
@@ -12,11 +12,11 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64,
*ptr = svcreate4 (s32, s32, s32); /* { dg-error {too few arguments to function 'svcreate4'} } */
*ptr = svcreate4 (s32, s32, s32, s32, s32); /* { dg-error {too many arguments to function 'svcreate4'} } */
*ptr = svcreate4 (s32x4, s32x4, s32x4, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 1 of 'svcreate4', which expects a single SVE vector rather than a tuple} } */
- *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but previous arguments had type 'svint32_t'} } */
- *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but previous arguments had type 'svint32_t'} } */
+ *ptr = svcreate4 (s32, s32, s32, f64); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcreate4', but argument 1 had type 'svint32_t'} } */
+ *ptr = svcreate4 (s32, s32, pg, s32); /* { dg-error {passing 'svbool_t' to argument 3 of 'svcreate4', but argument 1 had type 'svint32_t'} } */
*ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */
*ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */
- *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but previous arguments had type 'svbool_t'} } */
+ *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */
*ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */
*ptr = svcreate4 (s32, s32, s32, s32);
*ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
index 7fc7bb67b..ca2ab8a6f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/mmla_1.c
@@ -44,13 +44,13 @@ f2 (svbool_t pg, svint8_t s8, svuint8_t u8, svuint32_t u32, svint32_t s32,
svmmla (u32, u32, u32); /* { dg-error {passing 'svuint32_t' instead of the expected 'svuint8_t' to argument 2 of 'svmmla', after passing 'svuint32_t' to argument 1} } */
svmmla (f16, s8, s8); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
- svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
- svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
- svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
- svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
- svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat32_t'} } */
- svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
- svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but previous arguments had type 'svfloat64_t'} } */
+ svmmla (f32, s8, s8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
+ svmmla (f32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
+ svmmla (f32, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
+ svmmla (f64, f16, f16); /* { dg-error {passing 'svfloat16_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
+ svmmla (f32, f32, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat32_t'} } */
+ svmmla (f64, f32, f16); /* { dg-error {passing 'svfloat32_t' to argument 2 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
+ svmmla (f64, f64, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmmla', but argument 1 had type 'svfloat64_t'} } */
svmmla (f16, f16, f16); /* { dg-error {'svmmla' has no form that takes 'svfloat16_t' arguments} } */
svmmla (f32, f32, f32);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
index 520c11f79..0a67f82bf 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_1.c
@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
svmla_lane (1, f32, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
svmla_lane (f32, 1, f32, 0); /* { dg-error {passing 'int' to argument 2 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
svmla_lane (f32, f32, 1, 0); /* { dg-error {passing 'int' to argument 3 of 'svmla_lane', which expects an SVE type rather than a scalar} } */
- svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */
- svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but previous arguments had type 'svfloat32_t'} } */
+ svmla_lane (f32, f64, f32, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */
+ svmla_lane (f32, f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svmla_lane', but argument 1 had type 'svfloat32_t'} } */
svmla_lane (f32, f32, f32, s32); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */
svmla_lane (f32, f32, f32, i); /* { dg-error {argument 4 of 'svmla_lane' must be an integer constant expression} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
index 3163d130c..60c9c466e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_lane_rotate_1.c
@@ -14,8 +14,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat32_t f32, svfloat64_t f64,
svcmla_lane (1, f32, f32, 0, 90); /* { dg-error {passing 'int' to argument 1 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
svcmla_lane (f32, 1, f32, 0, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
svcmla_lane (f32, f32, 1, 0, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_lane', which expects an SVE type rather than a scalar} } */
- svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */
- svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but previous arguments had type 'svfloat32_t'} } */
+ svcmla_lane (f32, f64, f32, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */
+ svcmla_lane (f32, f32, f64, 0, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_lane', but argument 1 had type 'svfloat32_t'} } */
svcmla_lane (f32, f32, f32, s32, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */
svcmla_lane (f32, f32, f32, i, 0); /* { dg-error {argument 4 of 'svcmla_lane' must be an integer constant expression} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
index ac789c2be..6ca223475 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_opt_n_1.c
@@ -11,24 +11,24 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svmla_x (u8, u8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 1 of 'svmla_x', which expects 'svbool_t'} } */
svmla_x (pg, pg, pg, pg); /* { dg-error {'svmla_x' has no form that takes 'svbool_t' arguments} } */
svmla_x (pg, 1, u8, u8); /* { dg-error {passing 'int' to argument 2 of 'svmla_x', which expects an SVE type rather than a scalar} } */
- svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
+ svmla_x (pg, u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
svmla_x (pg, u8, u8, u8);
- svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
+ svmla_x (pg, u8, s16, u8); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u16, u8); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, f16, u8); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, pg, u8); /* { dg-error {passing 'svbool_t' to argument 3 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
svmla_x (pg, u8, 0, u8); /* { dg-error {passing 'int' to argument 3 of 'svmla_x', which expects an SVE type rather than a scalar} } */
- svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
- svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but previous arguments had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u8, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u8, f16); /* { dg-error {passing 'svfloat16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
+ svmla_x (pg, u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 4 of 'svmla_x', but argument 2 had type 'svuint8_t'} } */
svmla_x (pg, u8, u8, 0);
- svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
- svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
- svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
- svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but previous arguments had type 'svfloat16_t'} } */
+ svmla_x (pg, f16, s16, f16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
+ svmla_x (pg, f16, u16, f16); /* { dg-error {passing 'svuint16_t' to argument 3 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
+ svmla_x (pg, f16, f16, s16); /* { dg-error {passing 'svint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
+ svmla_x (pg, f16, f16, u16); /* { dg-error {passing 'svuint16_t' to argument 4 of 'svmla_x', but argument 2 had type 'svfloat16_t'} } */
svmla_x (pg, f16, f16, f16);
svmla_x (pg, f16, f16, 1);
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
index bb6740289..68b2cfc1d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_rotate_1.c
@@ -13,8 +13,8 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
svcmla_x (pg, 1, f32, f32, 90); /* { dg-error {passing 'int' to argument 2 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
svcmla_x (pg, f32, 1, f32, 90); /* { dg-error {passing 'int' to argument 3 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
svcmla_x (pg, f32, f32, 1, 90); /* { dg-error {passing 'int' to argument 4 of 'svcmla_x', which expects an SVE type rather than a scalar} } */
- svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */
- svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but previous arguments had type 'svfloat32_t'} } */
+ svcmla_x (pg, f32, f64, f32, 90); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */
+ svcmla_x (pg, f32, f32, f64, 90); /* { dg-error {passing 'svfloat64_t' to argument 4 of 'svcmla_x', but argument 2 had type 'svfloat32_t'} } */
svcmla_x (pg, f32, f32, f32, s32); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */
svcmla_x (pg, f32, f32, f32, i); /* { dg-error {argument 5 of 'svcmla_x' must be an integer constant expression} } */
svcmla_x (pg, f32, f32, f32, -90); /* { dg-error {passing -90 to argument 5 of 'svcmla_x', which expects 0, 90, 180 or 270} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
index cfe601631..134cf98fd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_shift_right_imm_1.c
@@ -11,10 +11,10 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svint16_t s16,
{
const int one = 1;
pg = svsra (pg, pg, 1); /* { dg-error {'svsra' has no form that takes 'svbool_t' arguments} } */
- pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but previous arguments had type 'svbool_t'} } */
+ pg = svsra (pg, s8, 1); /* { dg-error {passing 'svint8_t' to argument 2 of 'svsra', but argument 1 had type 'svbool_t'} } */
s8 = svsra (1, s8, 1); /* { dg-error {passing 'int' to argument 1 of 'svsra', which expects an SVE type rather than a scalar} } */
- s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */
- s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but previous arguments had type 'svint8_t'} } */
+ s8 = svsra (s8, u8, 1); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */
+ s8 = svsra (s8, pg, 1); /* { dg-error {passing 'svbool_t' to argument 2 of 'svsra', but argument 1 had type 'svint8_t'} } */
s8 = svsra (s8, 1, 1); /* { dg-error {passing 'int' to argument 2 of 'svsra', which expects an SVE type rather than a scalar} } */
s8 = svsra (s8, s8, x); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */
s8 = svsra (s8, s8, one); /* { dg-error {argument 3 of 'svsra' must be an integer constant expression} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
index 5fb497701..a639562b1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/ternary_uint_1.c
@@ -15,14 +15,14 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16,
svtbx (u8, 0, u8); /* { dg-error {passing 'int' to argument 2 of 'svtbx', which expects an SVE type rather than a scalar} } */
svtbx (u8, u8, 0); /* { dg-error {passing 'int' to argument 3 of 'svtbx', which expects an SVE type rather than a scalar} } */
- svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svuint8_t'} } */
+ svtbx (u8, s8, u8); /* { dg-error {passing 'svint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svuint8_t'} } */
svtbx (u8, u8, u8);
svtbx (u8, u8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
svtbx (u8, u8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svuint8_t' and 'svuint16_t' respectively} } */
svtbx (u8, u8, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
svtbx (u8, u8, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
- svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but previous arguments had type 'svint8_t'} } */
+ svtbx (s8, u8, u8); /* { dg-error {passing 'svuint8_t' to argument 2 of 'svtbx', but argument 1 had type 'svint8_t'} } */
svtbx (s8, s8, u8);
svtbx (s8, s8, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
svtbx (s8, s8, u16); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint8_t' and 'svuint16_t' respectively} } */
@@ -36,7 +36,7 @@ f1 (svbool_t pg, svuint8_t u8, svint8_t s8, svuint16_t u16, svint16_t s16,
svtbx (u16, u16, s16); /* { dg-error {passing 'svint16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
svtbx (u16, u16, f16); /* { dg-error {passing 'svfloat16_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
- svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but previous arguments had type 'svint16_t'} } */
+ svtbx (s16, u16, u16); /* { dg-error {passing 'svuint16_t' to argument 2 of 'svtbx', but argument 1 had type 'svint16_t'} } */
svtbx (s16, s16, u8); /* { dg-error {arguments 1 and 3 of 'svtbx' must have the same element size, but the values passed here have type 'svint16_t' and 'svuint8_t' respectively} } */
svtbx (s16, s16, s8); /* { dg-error {passing 'svint8_t' to argument 3 of 'svtbx', which expects a vector of unsigned integers} } */
svtbx (s16, s16, u16);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
index c2eda93e3..992b50199 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/tmad_1.c
@@ -11,7 +11,7 @@ f1 (svbool_t pg, svfloat32_t f32, svfloat64_t f64, svint32_t s32, int i)
svtmad (s32, s32, 0); /* { dg-error {'svtmad' has no form that takes 'svint32_t' arguments} } */
svtmad (1, f32, 0); /* { dg-error {passing 'int' to argument 1 of 'svtmad', which expects an SVE type rather than a scalar} } */
svtmad (f32, 1, 0); /* { dg-error {passing 'int' to argument 2 of 'svtmad', which expects an SVE type rather than a scalar} } */
- svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but previous arguments had type 'svfloat32_t'} } */
+ svtmad (f32, f64, 0); /* { dg-error {passing 'svfloat64_t' to argument 2 of 'svtmad', but argument 1 had type 'svfloat32_t'} } */
svtmad (f32, f32, s32); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */
svtmad (f32, f32, i); /* { dg-error {argument 3 of 'svtmad' must be an integer constant expression} } */
svtmad (f32, f32, -1); /* { dg-error {passing -1 to argument 3 of 'svtmad', which expects a value in the range \[0, 7\]} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
index 8c865a0e6..9c9c383dd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/unary_1.c
@@ -13,9 +13,9 @@ f1 (svbool_t pg, svint32_t s32, svuint32_t u32, svfloat32_t f32)
svabs_m (s32, pg, s32);
svabs_m (u32, pg, u32); /* { dg-error {'svabs_m' has no form that takes 'svuint32_t' arguments} } */
svabs_m (f32, pg, f32);
- svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
- svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
- svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but previous arguments had type 'svint32_t'} } */
- svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but previous arguments had type 'svbool_t'} } */
+ svabs_m (s32, pg, u32); /* { dg-error {passing 'svuint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
+ svabs_m (s32, pg, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
+ svabs_m (s32, pg, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svabs_m', but argument 1 had type 'svint32_t'} } */
+ svabs_m (pg, pg, s32); /* { dg-error {passing 'svint32_t' to argument 3 of 'svabs_m', but argument 1 had type 'svbool_t'} } */
svabs_m (pg, pg, pg); /* { dg-error {'svabs_m' has no form that takes 'svbool_t' arguments} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
index 7e869bda8..6ffd3d9e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/undeclared_2.c
@@ -9,7 +9,7 @@ f (svint8_t s8, svuint16_t u16, svfloat32_t f32,
u16 = svneg_x (pg, u16); /* { dg-error {'svneg_x' has no form that takes 'svuint16_t' arguments} } */
f32 = svclz_x (pg, f32); /* { dg-error {'svclz_x' has no form that takes 'svfloat32_t' arguments} } */
s16x2 = svcreate2 (s8); /* { dg-error {too few arguments to function 'svcreate2'} } */
- u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but previous arguments had type 'svuint16_t'} } */
+ u32x3 = svcreate3 (u16, u16, f32); /* { dg-error {passing 'svfloat32_t' to argument 3 of 'svcreate3', but argument 1 had type 'svuint16_t'} } */
f64x4 = svcreate4 (f32, f32, f32, f32, f32); /* { dg-error {too many arguments to function 'svcreate4'} } */
pg = svadd_x (pg, pg, pg); /* { dg-error {'svadd_x' has no form that takes 'svbool_t' arguments} } */
}
--
2.33.0

View File

@ -0,0 +1,368 @@
From 05dee9ad331c27345b014fe9aec0067a6f3b07d9 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:21 +0000
Subject: [PATCH 069/157] [Backport][SME] aarch64: Make more use of sve_type in
ACLE code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1f7f076ad6293cad19d35efdf726eb48cf78e3dd
This patch makes some functions operate on sve_type, rather than just
on type suffixes. It also allows an overload to be resolved based on
a mode and sve_type. In this case the sve_type is used to derive the
group size as well as a type suffix.
This is needed for the SME2 intrinsics and the new tuple forms of
svreinterpret. No functional change intended on its own.
gcc/
* config/aarch64/aarch64-sve-builtins.h
(function_resolver::lookup_form): Add an overload that takes
an sve_type rather than type and group suffixes.
(function_resolver::resolve_to): Likewise.
(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
(function_resolver::infer_tuple_type): Likewise.
(function_resolver::require_matching_vector_type): Take an sve_type
rather than a type_suffix_index.
(function_resolver::require_derived_vector_type): Likewise.
* config/aarch64/aarch64-sve-builtins.cc (num_vectors_to_group):
New function.
(function_resolver::lookup_form): Add an overload that takes
an sve_type rather than type and group suffixes.
(function_resolver::resolve_to): Likewise.
(function_resolver::infer_vector_or_tuple_type): Return an sve_type.
(function_resolver::infer_tuple_type): Likewise.
(function_resolver::infer_vector_type): Update accordingly.
(function_resolver::require_matching_vector_type): Take an sve_type
rather than a type_suffix_index.
(function_resolver::require_derived_vector_type): Likewise.
* config/aarch64/aarch64-sve-builtins-shapes.cc (get_def::resolve)
(set_def::resolve, store_def::resolve, tbl_tuple_def::resolve): Update
calls accordingly.
---
.../aarch64/aarch64-sve-builtins-shapes.cc | 16 +--
gcc/config/aarch64/aarch64-sve-builtins.cc | 111 +++++++++++++-----
gcc/config/aarch64/aarch64-sve-builtins.h | 12 +-
3 files changed, 95 insertions(+), 44 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index 40aa418e0..f187b4cb2 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -1904,9 +1904,9 @@ struct get_def : public overloaded_base<0>
resolve (function_resolver &r) const OVERRIDE
{
unsigned int i, nargs;
- type_suffix_index type;
+ sve_type type;
if (!r.check_gp_argument (2, i, nargs)
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+ || !(type = r.infer_tuple_type (i))
|| !r.require_integer_immediate (i + 1))
return error_mark_node;
@@ -2417,9 +2417,9 @@ struct set_def : public overloaded_base<0>
resolve (function_resolver &r) const OVERRIDE
{
unsigned int i, nargs;
- type_suffix_index type;
+ sve_type type;
if (!r.check_gp_argument (3, i, nargs)
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+ || !(type = r.infer_tuple_type (i))
|| !r.require_integer_immediate (i + 1)
|| !r.require_derived_vector_type (i + 2, i, type))
return error_mark_node;
@@ -2592,11 +2592,11 @@ struct store_def : public overloaded_base<0>
gcc_assert (r.mode_suffix_id == MODE_none || vnum_p);
unsigned int i, nargs;
- type_suffix_index type;
+ sve_type type;
if (!r.check_gp_argument (vnum_p ? 3 : 2, i, nargs)
|| !r.require_pointer_type (i)
|| (vnum_p && !r.require_scalar_type (i + 1, "int64_t"))
- || ((type = r.infer_tuple_type (nargs - 1)) == NUM_TYPE_SUFFIXES))
+ || !(type = r.infer_tuple_type (nargs - 1)))
return error_mark_node;
return r.resolve_to (r.mode_suffix_id, type);
@@ -2713,9 +2713,9 @@ struct tbl_tuple_def : public overloaded_base<0>
resolve (function_resolver &r) const OVERRIDE
{
unsigned int i, nargs;
- type_suffix_index type;
+ sve_type type;
if (!r.check_gp_argument (2, i, nargs)
- || (type = r.infer_tuple_type (i)) == NUM_TYPE_SUFFIXES
+ || !(type = r.infer_tuple_type (i))
|| !r.require_derived_vector_type (i + 1, i, type, TYPE_unsigned))
return error_mark_node;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 1545fd78d..e98274f8a 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -659,6 +659,21 @@ find_type_suffix_for_scalar_type (const_tree type)
return NUM_TYPE_SUFFIXES;
}
+/* Return the implicit group suffix for intrinsics that operate on NVECTORS
+ vectors. */
+static group_suffix_index
+num_vectors_to_group (unsigned int nvectors)
+{
+ switch (nvectors)
+ {
+ case 1: return GROUP_none;
+ case 2: return GROUP_x2;
+ case 3: return GROUP_x3;
+ case 4: return GROUP_x4;
+ }
+ gcc_unreachable ();
+}
+
/* Return the vector type associated with TYPE. */
static tree
get_vector_type (sve_type type)
@@ -1282,6 +1297,27 @@ function_resolver::lookup_form (mode_suffix_index mode,
return rfn ? rfn->decl : NULL_TREE;
}
+/* Silently check whether there is an instance of the function that has the
+ mode suffix given by MODE and the type and group suffixes implied by TYPE.
+ If the overloaded function has an explicit first type suffix (like
+ conversions do), TYPE describes the implicit second type suffix.
+ Otherwise, TYPE describes the only type suffix.
+
+ Return the decl of the function if it exists, otherwise return null. */
+tree
+function_resolver::lookup_form (mode_suffix_index mode, sve_type type)
+{
+ type_suffix_index type0 = type_suffix_ids[0];
+ type_suffix_index type1 = type_suffix_ids[1];
+ (type0 == NUM_TYPE_SUFFIXES ? type0 : type1) = type.type;
+
+ group_suffix_index group = group_suffix_id;
+ if (group == GROUP_none && type.num_vectors != vectors_per_tuple ())
+ group = num_vectors_to_group (type.num_vectors);
+
+ return lookup_form (mode, type0, type1, group);
+}
+
/* Resolve the function to one with the mode suffix given by MODE, the
type suffixes given by TYPE0 and TYPE1, and group suffix given by
GROUP. Return its function decl on success, otherwise report an
@@ -1305,6 +1341,19 @@ function_resolver::resolve_to (mode_suffix_index mode,
return res;
}
+/* Resolve the function to one that has the suffixes associated with MODE
+ and TYPE; see lookup_form for how TYPE is interpreted. Return the
+ function decl on success, otherwise report an error and return
+ error_mark_node. */
+tree
+function_resolver::resolve_to (mode_suffix_index mode, sve_type type)
+{
+ if (tree res = lookup_form (mode, type))
+ return res;
+
+ return report_no_such_form (type);
+}
+
/* Require argument ARGNO to be a 32-bit or 64-bit scalar integer type.
Return the associated type suffix on success, otherwise report an
error and return NUM_TYPE_SUFFIXES. */
@@ -1424,21 +1473,20 @@ function_resolver::infer_sve_type (unsigned int argno)
/* Require argument ARGNO to be a single vector or a tuple of NUM_VECTORS
vectors; NUM_VECTORS is 1 for the former. Return the associated type
- suffix on success, using TYPE_SUFFIX_b for predicates. Report an error
- and return NUM_TYPE_SUFFIXES on failure. */
-type_suffix_index
+ on success. Report an error on failure. */
+sve_type
function_resolver::infer_vector_or_tuple_type (unsigned int argno,
unsigned int num_vectors)
{
auto type = infer_sve_type (argno);
if (!type)
- return NUM_TYPE_SUFFIXES;
+ return type;
if (type.num_vectors == num_vectors)
- return type.type;
+ return type;
report_incorrect_num_vectors (argno, type, num_vectors);
- return NUM_TYPE_SUFFIXES;
+ return {};
}
/* Require argument ARGNO to have some form of vector type. Return the
@@ -1447,7 +1495,9 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
type_suffix_index
function_resolver::infer_vector_type (unsigned int argno)
{
- return infer_vector_or_tuple_type (argno, 1);
+ if (auto type = infer_vector_or_tuple_type (argno, 1))
+ return type.type;
+ return NUM_TYPE_SUFFIXES;
}
/* Like infer_vector_type, but also require the type to be integral. */
@@ -1512,10 +1562,9 @@ function_resolver::infer_sd_vector_type (unsigned int argno)
/* If the function operates on tuples of vectors, require argument ARGNO to be
a tuple with the appropriate number of vectors, otherwise require it to be
- a single vector. Return the associated type suffix on success, using
- TYPE_SUFFIX_b for predicates. Report an error and return NUM_TYPE_SUFFIXES
+ a single vector. Return the associated type on success. Report an error
on failure. */
-type_suffix_index
+sve_type
function_resolver::infer_tuple_type (unsigned int argno)
{
return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
@@ -1567,10 +1616,10 @@ function_resolver::require_vector_type (unsigned int argno,
bool
function_resolver::require_matching_vector_type (unsigned int argno,
unsigned int first_argno,
- type_suffix_index type)
+ sve_type type)
{
- type_suffix_index new_type = infer_vector_type (argno);
- if (new_type == NUM_TYPE_SUFFIXES)
+ sve_type new_type = infer_sve_type (argno);
+ if (!new_type)
return false;
if (type != new_type)
@@ -1613,15 +1662,13 @@ function_resolver::require_matching_vector_type (unsigned int argno,
bool function_resolver::
require_derived_vector_type (unsigned int argno,
unsigned int first_argno,
- type_suffix_index first_type,
+ sve_type first_type,
type_class_index expected_tclass,
unsigned int expected_bits)
{
/* If the type needs to match FIRST_ARGNO exactly, use the preferred
- error message for that case. The VECTOR_TYPE_P test excludes tuple
- types, which we handle below instead. */
- bool both_vectors_p = VECTOR_TYPE_P (get_argument_type (first_argno));
- if (both_vectors_p
+ error message for that case. */
+ if (first_type.num_vectors == 1
&& expected_tclass == SAME_TYPE_CLASS
&& expected_bits == SAME_SIZE)
{
@@ -1631,17 +1678,18 @@ require_derived_vector_type (unsigned int argno,
}
/* Use FIRST_TYPE to get the expected type class and element size. */
+ auto &first_type_suffix = type_suffixes[first_type.type];
type_class_index orig_expected_tclass = expected_tclass;
if (expected_tclass == NUM_TYPE_CLASSES)
- expected_tclass = type_suffixes[first_type].tclass;
+ expected_tclass = first_type_suffix.tclass;
unsigned int orig_expected_bits = expected_bits;
if (expected_bits == SAME_SIZE)
- expected_bits = type_suffixes[first_type].element_bits;
+ expected_bits = first_type_suffix.element_bits;
else if (expected_bits == HALF_SIZE)
- expected_bits = type_suffixes[first_type].element_bits / 2;
+ expected_bits = first_type_suffix.element_bits / 2;
else if (expected_bits == QUARTER_SIZE)
- expected_bits = type_suffixes[first_type].element_bits / 4;
+ expected_bits = first_type_suffix.element_bits / 4;
/* If the expected type doesn't depend on FIRST_TYPE at all,
just check for the fixed choice of vector type. */
@@ -1655,13 +1703,14 @@ require_derived_vector_type (unsigned int argno,
/* Require the argument to be some form of SVE vector type,
without being specific about the type of vector we want. */
- type_suffix_index actual_type = infer_vector_type (argno);
- if (actual_type == NUM_TYPE_SUFFIXES)
+ sve_type actual_type = infer_vector_type (argno);
+ if (!actual_type)
return false;
/* Exit now if we got the right type. */
- bool tclass_ok_p = (type_suffixes[actual_type].tclass == expected_tclass);
- bool size_ok_p = (type_suffixes[actual_type].element_bits == expected_bits);
+ auto &actual_type_suffix = type_suffixes[actual_type.type];
+ bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass);
+ bool size_ok_p = (actual_type_suffix.element_bits == expected_bits);
if (tclass_ok_p && size_ok_p)
return true;
@@ -1701,7 +1750,9 @@ require_derived_vector_type (unsigned int argno,
/* If the arguments have consistent type classes, but a link between
the sizes has been broken, try to describe the error in those terms. */
- if (both_vectors_p && tclass_ok_p && orig_expected_bits == SAME_SIZE)
+ if (first_type.num_vectors == 1
+ && tclass_ok_p
+ && orig_expected_bits == SAME_SIZE)
{
if (argno < first_argno)
{
@@ -1718,11 +1769,11 @@ require_derived_vector_type (unsigned int argno,
/* Likewise in reverse: look for cases in which the sizes are consistent
but a link between the type classes has been broken. */
- if (both_vectors_p
+ if (first_type.num_vectors == 1
&& size_ok_p
&& orig_expected_tclass == SAME_TYPE_CLASS
- && type_suffixes[first_type].integer_p
- && type_suffixes[actual_type].integer_p)
+ && first_type_suffix.integer_p
+ && actual_type_suffix.integer_p)
{
if (argno < first_argno)
{
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index f7d6cc084..a7cfff7c1 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -458,28 +458,28 @@ public:
type_suffix_index = NUM_TYPE_SUFFIXES,
type_suffix_index = NUM_TYPE_SUFFIXES,
group_suffix_index = GROUP_none);
+ tree lookup_form (mode_suffix_index, sve_type);
tree resolve_to (mode_suffix_index,
type_suffix_index = NUM_TYPE_SUFFIXES,
type_suffix_index = NUM_TYPE_SUFFIXES,
group_suffix_index = GROUP_none);
+ tree resolve_to (mode_suffix_index, sve_type);
type_suffix_index infer_integer_scalar_type (unsigned int);
type_suffix_index infer_pointer_type (unsigned int, bool = false);
sve_type infer_sve_type (unsigned int);
- type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
+ sve_type infer_vector_or_tuple_type (unsigned int, unsigned int);
type_suffix_index infer_vector_type (unsigned int);
type_suffix_index infer_integer_vector_type (unsigned int);
type_suffix_index infer_unsigned_vector_type (unsigned int);
type_suffix_index infer_sd_vector_type (unsigned int);
- type_suffix_index infer_tuple_type (unsigned int);
+ sve_type infer_tuple_type (unsigned int);
bool require_vector_or_scalar_type (unsigned int);
bool require_vector_type (unsigned int, vector_type_index);
- bool require_matching_vector_type (unsigned int, unsigned int,
- type_suffix_index);
- bool require_derived_vector_type (unsigned int, unsigned int,
- type_suffix_index,
+ bool require_matching_vector_type (unsigned int, unsigned int, sve_type);
+ bool require_derived_vector_type (unsigned int, unsigned int, sve_type,
type_class_index = SAME_TYPE_CLASS,
unsigned int = SAME_SIZE);
--
2.33.0

View File

@ -0,0 +1,106 @@
From 1abb02c636eef4f9a5f55f243bc0c4d38ee1f849 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:22 +0000
Subject: [PATCH 070/157] [Backport][SME] aarch64: Tweak error message for
(tuple,vector) pairs
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce2e22b7e02c7fbd1ab8145b632559b67ae9958
SME2 adds more intrinsics that take a tuple of vectors followed
by a single vector, with the two arguments expected to have the
same element type. Unlike with the existing svset* intrinsics,
the size of the tuple is not fixed by the overloaded function name.
This patch adds an error message that (hopefully) copes better
with that combination.
gcc/
* config/aarch64/aarch64-sve-builtins.cc
(function_resolver::require_derived_vector_type): Add a specific
error message for the case in which the caller wants a single
vector whose element type matches a previous tuyple argument.
gcc/testsuite/
* gcc.target/aarch64/sve/acle/general-c/set_1.c: Tweak expected
error message.
* gcc.target/aarch64/sve/acle/general-c/set_3.c: Likewise.
* gcc.target/aarch64/sve/acle/general-c/set_5.c: Likewise.
---
gcc/config/aarch64/aarch64-sve-builtins.cc | 13 +++++++++++++
.../gcc.target/aarch64/sve/acle/general-c/set_1.c | 4 ++--
.../gcc.target/aarch64/sve/acle/general-c/set_3.c | 4 ++--
.../gcc.target/aarch64/sve/acle/general-c/set_5.c | 4 ++--
4 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index e98274f8a..9224916a7 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1707,6 +1707,19 @@ require_derived_vector_type (unsigned int argno,
if (!actual_type)
return false;
+ if (orig_expected_tclass == SAME_TYPE_CLASS
+ && orig_expected_bits == SAME_SIZE)
+ {
+ if (actual_type.type == first_type.type)
+ return true;
+
+ error_at (location, "passing %qT to argument %d of %qE, but"
+ " argument %d was a tuple of %qT",
+ get_vector_type (actual_type), argno + 1, fndecl,
+ first_argno + 1, get_vector_type (first_type.type));
+ return false;
+ }
+
/* Exit now if we got the right type. */
auto &actual_type_suffix = type_suffixes[actual_type.type];
bool tclass_ok_p = (actual_type_suffix.tclass == expected_tclass);
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
index f07c76102..f2a6da536 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_1.c
@@ -16,8 +16,8 @@ f1 (svbool_t pg, svuint8_t u8, svuint8x2_t u8x2, svuint8x3_t u8x3, int x)
u8x2 = svset2 (u8x3, 0, u8); /* { dg-error {passing 'svuint8x3_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
u8x2 = svset2 (pg, 0, u8); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset2', which expects a tuple of 2 vectors} } */
u8x2 = svset2 (u8x2, 0, u8x2); /* { dg-error {passing 'svuint8x2_t' to argument 3 of 'svset2', which expects a single SVE vector rather than a tuple} } */
- u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
- u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svuint8_t' to argument 3 of 'svset2', after passing 'svuint8x2_t' to argument 1} } */
+ u8x2 = svset2 (u8x2, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
+ u8x2 = svset2 (u8x2, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset2', but argument 1 was a tuple of 'svuint8_t'} } */
u8x2 = svset2 (u8x2, x, u8); /* { dg-error {argument 2 of 'svset2' must be an integer constant expression} } */
u8x2 = svset2 (u8x2, 0, u8);
f64 = svset2 (u8x2, 0, u8); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svuint8x2_t'} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
index 543a1bea8..92b955f83 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_3.c
@@ -17,8 +17,8 @@ f1 (svbool_t pg, svfloat16_t f16, svfloat16x3_t f16x3, svfloat16x4_t f16x4,
f16x3 = svset3 (f16x4, 0, f16); /* { dg-error {passing 'svfloat16x4_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
f16x3 = svset3 (pg, 0, f16); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset3', which expects a tuple of 3 vectors} } */
f16x3 = svset3 (f16x3, 0, f16x3); /* { dg-error {passing 'svfloat16x3_t' to argument 3 of 'svset3', which expects a single SVE vector rather than a tuple} } */
- f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
- f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svfloat16_t' to argument 3 of 'svset3', after passing 'svfloat16x3_t' to argument 1} } */
+ f16x3 = svset3 (f16x3, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
+ f16x3 = svset3 (f16x3, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset3', but argument 1 was a tuple of 'svfloat16_t'} } */
f16x3 = svset3 (f16x3, x, f16); /* { dg-error {argument 2 of 'svset3' must be an integer constant expression} } */
f16x3 = svset3 (f16x3, 0, f16);
f64 = svset3 (f16x3, 0, f16); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svfloat16x3_t'} } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
index be911a731..f0696fb07 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/set_5.c
@@ -16,8 +16,8 @@ f1 (svbool_t pg, svint32_t s32, svint32x4_t s32x4, svint32x2_t s32x2, int x)
s32x4 = svset4 (s32x2, 0, s32); /* { dg-error {passing 'svint32x2_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
s32x4 = svset4 (pg, 0, s32); /* { dg-error {passing 'svbool_t' to argument 1 of 'svset4', which expects a tuple of 4 vectors} } */
s32x4 = svset4 (s32x4, 0, s32x4); /* { dg-error {passing 'svint32x4_t' to argument 3 of 'svset4', which expects a single SVE vector rather than a tuple} } */
- s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
- s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' instead of the expected 'svint32_t' to argument 3 of 'svset4', after passing 'svint32x4_t' to argument 1} } */
+ s32x4 = svset4 (s32x4, 0, f64); /* { dg-error {passing 'svfloat64_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
+ s32x4 = svset4 (s32x4, 0, pg); /* { dg-error {passing 'svbool_t' to argument 3 of 'svset4', but argument 1 was a tuple of 'svint32_t'} } */
s32x4 = svset4 (s32x4, x, s32); /* { dg-error {argument 2 of 'svset4' must be an integer constant expression} } */
s32x4 = svset4 (s32x4, 0, s32);
f64 = svset4 (s32x4, 0, s32); /* { dg-error {incompatible types when assigning to type 'svfloat64_t' from type 'svint32x4_t'} } */
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,90 @@
From 11f813112629dbad432134f7b4c7c9a93551eb3c Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Mon, 27 Nov 2023 13:38:16 +0000
Subject: [PATCH 072/157] [Backport][SME] attribs: Use existing traits for
excl_hash_traits
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b33cf3a3a2025a4856f90fea8bd04884c2f6b31
excl_hash_traits can be defined more simply by reusing existing traits.
gcc/
* attribs.cc (excl_hash_traits): Delete.
(test_attribute_exclusions): Use pair_hash and nofree_string_hash
instead.
---
gcc/attribs.cc | 45 +++------------------------------------------
1 file changed, 3 insertions(+), 42 deletions(-)
diff --git a/gcc/attribs.cc b/gcc/attribs.cc
index b219f8780..16d05b1da 100644
--- a/gcc/attribs.cc
+++ b/gcc/attribs.cc
@@ -2555,47 +2555,6 @@ namespace selftest
typedef std::pair<const char *, const char *> excl_pair;
-struct excl_hash_traits: typed_noop_remove<excl_pair>
-{
- typedef excl_pair value_type;
- typedef value_type compare_type;
-
- static hashval_t hash (const value_type &x)
- {
- hashval_t h1 = htab_hash_string (x.first);
- hashval_t h2 = htab_hash_string (x.second);
- return h1 ^ h2;
- }
-
- static bool equal (const value_type &x, const value_type &y)
- {
- return !strcmp (x.first, y.first) && !strcmp (x.second, y.second);
- }
-
- static void mark_deleted (value_type &x)
- {
- x = value_type (NULL, NULL);
- }
-
- static const bool empty_zero_p = false;
-
- static void mark_empty (value_type &x)
- {
- x = value_type ("", "");
- }
-
- static bool is_deleted (const value_type &x)
- {
- return !x.first && !x.second;
- }
-
- static bool is_empty (const value_type &x)
- {
- return !*x.first && !*x.second;
- }
-};
-
-
/* Self-test to verify that each attribute exclusion is symmetric,
meaning that if attribute A is encoded as incompatible with
attribute B then the opposite relationship is also encoded.
@@ -2605,13 +2564,15 @@ struct excl_hash_traits: typed_noop_remove<excl_pair>
static void
test_attribute_exclusions ()
{
+ using excl_hash_traits = pair_hash<nofree_string_hash, nofree_string_hash>;
+
/* Iterate over the array of attribute tables first (with TI0 as
the index) and over the array of attribute_spec in each table
(with SI0 as the index). */
const size_t ntables = ARRAY_SIZE (attribute_tables);
/* Set of pairs of mutually exclusive attributes. */
- typedef hash_set<excl_pair, false, excl_hash_traits> exclusion_set;
+ typedef hash_set<excl_hash_traits> exclusion_set;
exclusion_set excl_set;
for (size_t ti0 = 0; ti0 != ntables; ++ti0)
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,64 @@
From b1025ef48bff0622e54822dc0974f38748e9109f Mon Sep 17 00:00:00 2001
From: Jakub Jelinek <jakub@redhat.com>
Date: Thu, 22 Dec 2022 11:15:47 +0100
Subject: [PATCH 074/157] [Backport][SME] aarch64: Fix plugin header install
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5b30e9bc211fede06cf85b54e466012540bef14d
The r13-2943-g11a113d501ff64 made aarch64.h include
aarch64-option-extensions.def, but that file isn't installed
for building plugins.
On Wed, Dec 21, 2022 at 09:56:33AM +0000, Richard Sandiford wrote:
> Should this (and aarch64-fusion-pairs.def and aarch64-tuning-flags.def)
> be in TM_H instead? The first two OPTIONS_H_EXTRA entries seem to be
> for aarch64-opt.h (included via aarch64.opt).
>
> I guess TM_H should also have aarch64-arches.def, since it's included
> for aarch64_feature.
gcc/Makefile.in has
TM_H = $(GTM_H) insn-flags.h $(OPTIONS_H)
and
OPTIONS_H = options.h flag-types.h $(OPTIONS_H_EXTRA)
which means that adding something into TM_H when it is already in
OPTIONS_H_EXTRA is a unnecessary.
It is true that aarch64-fusion-pairs.def (included by aarch64-protos.h)
and aarch64-tuning-flags.def (ditto) and aarch64-option-extensions.def
(included by aarch64.h) aren't needed for options.h, so I think the
right patch would be following.
2022-12-22 Jakub Jelinek <jakub@redhat.com>
* config/aarch64/t-aarch64 (TM_H): Don't add aarch64-cores.def,
add aarch64-fusion-pairs.def, aarch64-tuning-flags.def and
aarch64-option-extensions.def.
(OPTIONS_H_EXTRA): Don't add aarch64-fusion-pairs.def nor
aarch64-tuning-flags.def.
---
gcc/config/aarch64/t-aarch64 | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index ba74abc0a..6a21a248f 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -18,11 +18,11 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-TM_H += $(srcdir)/config/aarch64/aarch64-cores.def
+TM_H += $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
+ $(srcdir)/config/aarch64/aarch64-tuning-flags.def \
+ $(srcdir)/config/aarch64/aarch64-option-extensions.def
OPTIONS_H_EXTRA += $(srcdir)/config/aarch64/aarch64-cores.def \
- $(srcdir)/config/aarch64/aarch64-arches.def \
- $(srcdir)/config/aarch64/aarch64-fusion-pairs.def \
- $(srcdir)/config/aarch64/aarch64-tuning-flags.def
+ $(srcdir)/config/aarch64/aarch64-arches.def
$(srcdir)/config/aarch64/aarch64-tune.md: s-aarch64-tune-md; @true
s-aarch64-tune-md: $(srcdir)/config/aarch64/gentune.sh \
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,330 @@
From c097d9ffc7dd8f90f78eb3b994f3691f4c8f812d Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 5 Dec 2023 10:11:23 +0000
Subject: [PATCH 076/157] [Backport][SME] aarch64: Add +sme
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7e04bd1fadf3410c3d24b56f650a52ff53d01a3c
This patch adds the +sme ISA feature and requires it to be present
when compiling arm_streaming code. (arm_streaming_compatible code
does not necessarily assume the presence of SME. It just has to
work when SME is present and streaming mode is enabled.)
gcc/
* doc/invoke.texi: Document SME.
* doc/sourcebuild.texi: Document aarch64_sve.
* config/aarch64/aarch64-option-extensions.def (sme): Define.
* config/aarch64/aarch64.h (AARCH64_ISA_SME): New macro.
(TARGET_SME): Likewise.
* config/aarch64/aarch64.cc (aarch64_override_options_internal):
Ensure that SME is present when compiling streaming code.
gcc/testsuite/
* lib/target-supports.exp (check_effective_target_aarch64_sme): New
target test.
* gcc.target/aarch64/sme/aarch64-sme.exp: Force SME to be enabled
if it isn't by default.
* g++.target/aarch64/sme/aarch64-sme.exp: Likewise.
* gcc.target/aarch64/sme/streaming_mode_3.c: New test.
---
.../aarch64/aarch64-option-extensions.def | 2 +
gcc/config/aarch64/aarch64.cc | 33 ++++++++++
gcc/config/aarch64/aarch64.h | 5 ++
gcc/doc/invoke.texi | 2 +
gcc/doc/sourcebuild.texi | 2 +
.../g++.target/aarch64/sme/aarch64-sme.exp | 10 ++-
.../gcc.target/aarch64/sme/aarch64-sme.exp | 10 ++-
.../gcc.target/aarch64/sme/streaming_mode_3.c | 63 +++++++++++++++++++
.../gcc.target/aarch64/sme/streaming_mode_4.c | 22 +++++++
gcc/testsuite/lib/target-supports.exp | 12 ++++
10 files changed, 157 insertions(+), 4 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
create mode 100644 gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index bdf4baf30..faee64a79 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -149,4 +149,6 @@ AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
+AARCH64_OPT_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
+
#undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 904166b21..8f8395201 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -11648,6 +11648,23 @@ aarch64_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
return true;
}
+/* Implement TARGET_START_CALL_ARGS. */
+
+static void
+aarch64_start_call_args (cumulative_args_t ca_v)
+{
+ CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
+
+ if (!TARGET_SME && (ca->isa_mode & AARCH64_FL_SM_ON))
+ {
+ error ("calling a streaming function requires the ISA extension %qs",
+ "sme");
+ inform (input_location, "you can enable %qs using the command-line"
+ " option %<-march%>, or by using the %<target%>"
+ " attribute or pragma", "sme");
+ }
+}
+
/* This function is used by the call expanders of the machine description.
RESULT is the register in which the result is returned. It's NULL for
"call" and "sibcall".
@@ -18194,6 +18211,19 @@ aarch64_override_options_internal (struct gcc_options *opts)
&& !fixed_regs[R18_REGNUM])
error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+ if ((opts->x_aarch64_isa_flags & AARCH64_FL_SM_ON)
+ && !(opts->x_aarch64_isa_flags & AARCH64_FL_SME))
+ {
+ error ("streaming functions require the ISA extension %qs", "sme");
+ inform (input_location, "you can enable %qs using the command-line"
+ " option %<-march%>, or by using the %<target%>"
+ " attribute or pragma", "sme");
+ opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+ auto new_flags = (opts->x_aarch64_asm_isa_flags
+ | feature_deps::SME ().enable);
+ aarch64_set_asm_isa_flags (opts, new_flags);
+ }
+
initialize_aarch64_code_model (opts);
initialize_aarch64_tls_size (opts);
@@ -28159,6 +28189,9 @@ aarch64_get_v16qi_mode ()
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P aarch64_function_value_regno_p
+#undef TARGET_START_CALL_ARGS
+#define TARGET_START_CALL_ARGS aarch64_start_call_args
+
#undef TARGET_GIMPLE_FOLD_BUILTIN
#define TARGET_GIMPLE_FOLD_BUILTIN aarch64_gimple_fold_builtin
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 84215c8c3..dd2de4e88 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -214,6 +214,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
#define AARCH64_ISA_SVE2_BITPERM (aarch64_isa_flags & AARCH64_FL_SVE2_BITPERM)
#define AARCH64_ISA_SVE2_SHA3 (aarch64_isa_flags & AARCH64_FL_SVE2_SHA3)
#define AARCH64_ISA_SVE2_SM4 (aarch64_isa_flags & AARCH64_FL_SVE2_SM4)
+#define AARCH64_ISA_SME (aarch64_isa_flags & AARCH64_FL_SME)
#define AARCH64_ISA_V8_3A (aarch64_isa_flags & AARCH64_FL_V8_3A)
#define AARCH64_ISA_DOTPROD (aarch64_isa_flags & AARCH64_FL_DOTPROD)
#define AARCH64_ISA_AES (aarch64_isa_flags & AARCH64_FL_AES)
@@ -292,6 +293,10 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE = AARCH64_FL_SM_OFF;
/* SVE2 SM4 instructions, enabled through +sve2-sm4. */
#define TARGET_SVE2_SM4 (AARCH64_ISA_SVE2_SM4)
+/* SME instructions, enabled through +sme. Note that this does not
+ imply anything about the state of PSTATE.SM. */
+#define TARGET_SME (AARCH64_ISA_SME)
+
/* ARMv8.3-A features. */
#define TARGET_ARMV8_3 (AARCH64_ISA_V8_3A)
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 53709b246..2420b05d9 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19478,6 +19478,8 @@ Enable the instructions to accelerate memory operations like @code{memcpy},
Enable the Flag Manipulation instructions Extension.
@item pauth
Enable the Pointer Authentication Extension.
+@item sme
+Enable the Scalable Matrix Extension.
@end table
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index 454fae11a..80936a0eb 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2277,6 +2277,8 @@ AArch64 target which generates instruction sequences for big endian.
@item aarch64_small_fpic
Binutils installed on test system supports relocation types required by -fpic
for AArch64 small memory model.
+@item aarch64_sme
+AArch64 target that generates instructions for SME.
@item aarch64_sve_hw
AArch64 target that is able to generate and execute SVE code (regardless of
whether it does so by default).
diff --git a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
index 72fcd0bd9..1c3e69cde 100644
--- a/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
+++ b/gcc/testsuite/g++.target/aarch64/sme/aarch64-sme.exp
@@ -30,10 +30,16 @@ load_lib g++-dg.exp
# Initialize `dg'.
dg-init
-aarch64-with-arch-dg-options "" {
+if { [check_effective_target_aarch64_sme] } {
+ set sme_flags ""
+} else {
+ set sme_flags "-march=armv9-a+sme"
+}
+
+aarch64-with-arch-dg-options $sme_flags {
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
- "" ""
+ "" $sme_flags
}
# All done.
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
index c990e5924..011310e80 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
+++ b/gcc/testsuite/gcc.target/aarch64/sme/aarch64-sme.exp
@@ -30,10 +30,16 @@ load_lib gcc-dg.exp
# Initialize `dg'.
dg-init
-aarch64-with-arch-dg-options "" {
+if { [check_effective_target_aarch64_sme] } {
+ set sme_flags ""
+} else {
+ set sme_flags "-march=armv9-a+sme"
+}
+
+aarch64-with-arch-dg-options $sme_flags {
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cCS\]]] \
- "" ""
+ "" $sme_flags
}
# All done.
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
new file mode 100644
index 000000000..45ec92321
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_3.c
@@ -0,0 +1,63 @@
+// { dg-options "" }
+
+#pragma GCC target "+nosme"
+
+void sc_a () [[arm::streaming_compatible]] {}
+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+void ns_a () {}
+
+void sc_b () [[arm::streaming_compatible]] {}
+void ns_b () {}
+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+
+void sc_c () [[arm::streaming_compatible]] {}
+void sc_d () [[arm::streaming_compatible]] {}
+
+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+
+void ns_c () {}
+void ns_d () {}
+
+void sc_e () [[arm::streaming_compatible]];
+void s_e () [[arm::streaming]];
+void ns_e ();
+
+#pragma GCC target "+sme"
+
+void sc_f () [[arm::streaming_compatible]] {}
+void s_f () [[arm::streaming]] {}
+void ns_f () {}
+
+void sc_g () [[arm::streaming_compatible]] {}
+void ns_g () {}
+void s_g () [[arm::streaming]] {}
+
+void sc_h () [[arm::streaming_compatible]] {}
+void sc_i () [[arm::streaming_compatible]] {}
+
+void s_h () [[arm::streaming]] {}
+void s_i () [[arm::streaming]] {}
+
+void ns_h () {}
+void ns_i () {}
+
+void sc_j () [[arm::streaming_compatible]];
+void s_j () [[arm::streaming]];
+void ns_j ();
+
+#pragma GCC target "+sme"
+
+void sc_k () [[arm::streaming_compatible]] {}
+
+#pragma GCC target "+nosme"
+#pragma GCC target "+sme"
+
+void s_k () [[arm::streaming]] {}
+
+#pragma GCC target "+nosme"
+#pragma GCC target "+sme"
+
+void ns_k () {}
+
+#pragma GCC target "+nosme"
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
new file mode 100644
index 000000000..50e92f2e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme/streaming_mode_4.c
@@ -0,0 +1,22 @@
+// { dg-options "-mgeneral-regs-only" }
+
+void sc_a () [[arm::streaming_compatible]] {}
+void s_a () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+void ns_a () {}
+
+void sc_b () [[arm::streaming_compatible]] {}
+void ns_b () {}
+void s_b () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+
+void sc_c () [[arm::streaming_compatible]] {}
+void sc_d () [[arm::streaming_compatible]] {}
+
+void s_c () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+void s_d () [[arm::streaming]] {} // { dg-error "streaming functions require the ISA extension 'sme'" }
+
+void ns_c () {}
+void ns_d () {}
+
+void sc_e () [[arm::streaming_compatible]];
+void s_e () [[arm::streaming]];
+void ns_e ();
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index bd89d4f52..e2a9ef5fa 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3887,6 +3887,18 @@ proc aarch64_sve_bits { } {
}]
}
+# Return 1 if this is an AArch64 target that generates instructions for SME.
+proc check_effective_target_aarch64_sme { } {
+ if { ![istarget aarch64*-*-*] } {
+ return 0
+ }
+ return [check_no_compiler_messages aarch64_sme assembly {
+ #if !defined (__ARM_FEATURE_SME)
+ #error FOO
+ #endif
+ }]
+}
+
# Return 1 if this is a compiler supporting ARC atomic operations
proc check_effective_target_arc_atomic { } {
return [check_no_compiler_messages arc_atomic assembly {
--
2.33.0

View File

@ -0,0 +1,168 @@
From d8233e19aae2272c4863de5e8d61d49d3147e807 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Thu, 1 Jun 2023 09:37:06 +0100
Subject: [PATCH 077/157] [Backport][SME] aarch64: Add =r,m and =m,r
alternatives to 64-bit vector move patterns
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
to the mov patterns. This straightforward patch does that and for the pair variants too.
For the testcase in the code we now generate the optimal assembly without any superfluous
GP<->SIMD moves.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
Add =r,m and =r,m alternatives.
(load_pair<DREG:mode><DREG2:mode>): Likewise.
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/xreg-vec-modes_1.c: New test.
---
gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++--------
.../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++
2 files changed, 65 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 2d688edf5..b5c52ba16 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -116,26 +116,28 @@
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, m, m, w, ?r, ?w, ?r, w, w")
+ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
(match_operand:VDMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
"@
ldr\t%d0, %1
+ ldr\t%x0, %1
str\txzr, %0
str\t%d1, %0
+ str\t%x1, %0
* return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
* return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
fmov\t%d0, %1
mov\t%0, %1
* return aarch64_output_simd_mov_immediate (operands[1], 64);
fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
- neon_logic<q>, neon_to_gp<q>, f_mcr,\
+ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
+ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
mov_reg, neon_move<q>, f_mcr")
- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
+ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
)
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
@@ -177,31 +179,35 @@
)
(define_insn "load_pair<DREG:mode><DREG2:mode>"
- [(set (match_operand:DREG 0 "register_operand" "=w")
- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
- (set (match_operand:DREG2 2 "register_operand" "=w")
- (match_operand:DREG2 3 "memory_operand" "m"))]
+ [(set (match_operand:DREG 0 "register_operand" "=w,r")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:DREG2 2 "register_operand" "=w,r")
+ (match_operand:DREG2 3 "memory_operand" "m,m"))]
"TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (<DREG:MODE>mode)))"
- "ldp\\t%d0, %d2, %z1"
- [(set_attr "type" "neon_ldp")]
+ "@
+ ldp\t%d0, %d2, %z1
+ ldp\t%x0, %x2, %z1"
+ [(set_attr "type" "neon_ldp,load_16")]
)
(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
- (match_operand:DREG 1 "register_operand" "w"))
- (set (match_operand:DREG2 2 "memory_operand" "=m")
- (match_operand:DREG2 3 "register_operand" "w"))]
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:DREG 1 "register_operand" "w,r"))
+ (set (match_operand:DREG2 2 "memory_operand" "=m,m")
+ (match_operand:DREG2 3 "register_operand" "w,r"))]
"TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (<DREG:MODE>mode)))"
- "stp\\t%d1, %d3, %z0"
- [(set_attr "type" "neon_stp")]
+ "@
+ stp\t%d1, %d3, %z0
+ stp\t%x1, %x3, %z0"
+ [(set_attr "type" "neon_stp,store_16")]
)
(define_insn "load_pair<VQ:mode><VQ2:mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
new file mode 100644
index 000000000..fc4dcb1ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef unsigned int v2si __attribute__((vector_size (8)));
+
+#define force_gp(V1) asm volatile ("" \
+ : "=r"(V1) \
+ : "r"(V1) \
+ : /* No clobbers */);
+
+/*
+** foo:
+** ldr (x[0-9]+), \[x1\]
+** str \1, \[x0\]
+** ret
+*/
+
+void
+foo (v2si *a, v2si *b)
+{
+ v2si tmp = *b;
+ force_gp (tmp);
+ *a = tmp;
+}
+
+/*
+** foo2:
+** ldp (x[0-9]+), (x[0-9]+), \[x0\]
+** stp \1, \2, \[x1\]
+** ret
+*/
+void
+foo2 (v2si *a, v2si *b)
+{
+ v2si t1 = *a;
+ v2si t2 = a[1];
+ force_gp (t1);
+ force_gp (t2);
+ *b = t1;
+ b[1] = t2;
+}
--
2.33.0

View File

@ -0,0 +1,167 @@
From 7d40978965ff893871a79f5f624f54ae02a34a8b Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Wed, 18 Oct 2023 09:34:01 +0100
Subject: [PATCH 078/157] [Backport][SME] AArch64: Rewrite simd move immediate
patterns to new syntax
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=04227acbe9e6c60d1e314a6b4f2d949c07f30baa
This rewrites the simd MOV patterns to use the new compact syntax.
No change in semantics is expected. This will be needed in follow on patches.
This also merges the splits into the define_insn which will also be needed soon.
gcc/ChangeLog:
PR tree-optimization/109154
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
Rewrite to new syntax.
(*aarch64_simd_mov<VQMOV:mode): Rewrite to new syntax and merge in
splits.
---
gcc/config/aarch64/aarch64-simd.md | 116 ++++++++++++-----------------
1 file changed, 47 insertions(+), 69 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index b5c52ba16..1f4b30642 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -115,54 +115,59 @@
)
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
- [(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VDMOV 1 "general_operand"
- "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
+ [(set (match_operand:VDMOV 0 "nonimmediate_operand")
+ (match_operand:VDMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%d0, %1
- ldr\t%x0, %1
- str\txzr, %0
- str\t%d1, %0
- str\t%x1, %0
- * return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
- * return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
- fmov\t%d0, %1
- mov\t%0, %1
- * return aarch64_output_simd_mov_immediate (operands[1], 64);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
- store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
- mov_reg, neon_move<q>, f_mcr")
- (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
-)
-
-(define_insn "*aarch64_simd_mov<VQMOV:mode>"
- [(set (match_operand:VQMOV 0 "nonimmediate_operand"
- "=w, Umn, m, w, ?r, ?w, ?r, w, w")
- (match_operand:VQMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ {@ [cons: =0, 1; attrs: type, arch]
+ [w , m ; neon_load1_1reg<q> , * ] ldr\t%d0, %1
+ [r , m ; load_8 , * ] ldr\t%x0, %1
+ [m , Dz; store_8 , * ] str\txzr, %0
+ [m , w ; neon_store1_1reg<q>, * ] str\t%d1, %0
+ [m , r ; store_8 , * ] str\t%x1, %0
+ [w , w ; neon_logic<q> , simd] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [w , w ; neon_logic<q> , * ] fmov\t%d0, %d1
+ [?r, w ; neon_to_gp<q> , simd] umov\t%0, %1.d[0]
+ [?r, w ; neon_to_gp<q> , * ] fmov\t%x0, %d1
+ [?w, r ; f_mcr , * ] fmov\t%d0, %1
+ [?r, r ; mov_reg , * ] mov\t%0, %1
+ [w , Dn; neon_move<q> , simd] << aarch64_output_simd_mov_immediate (operands[1], 64);
+ [w , Dz; f_mcr , * ] fmov\t%d0, xzr
+ }
+)
+
+(define_insn_and_split "*aarch64_simd_mov<VQMOV:mode>"
+ [(set (match_operand:VQMOV 0 "nonimmediate_operand")
+ (match_operand:VQMOV 1 "general_operand"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
- "@
- ldr\t%q0, %1
- stp\txzr, xzr, %0
- str\t%q1, %0
- mov\t%0.<Vbtype>, %1.<Vbtype>
- #
- #
- #
- * return aarch64_output_simd_mov_immediate (operands[1], 128);
- fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
- neon_logic<q>, multiple, multiple,\
- multiple, neon_move<q>, fmov")
- (set_attr "length" "4,4,4,4,8,8,8,4,4")
- (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
+ {@ [cons: =0, 1; attrs: type, arch, length]
+ [w , m ; neon_load1_1reg<q> , * , 4] ldr\t%q0, %1
+ [Umn, Dz; store_16 , * , 4] stp\txzr, xzr, %0
+ [m , w ; neon_store1_1reg<q>, * , 4] str\t%q1, %0
+ [w , w ; neon_logic<q> , simd, 4] mov\t%0.<Vbtype>, %1.<Vbtype>
+ [?r , w ; multiple , * , 8] #
+ [?w , r ; multiple , * , 8] #
+ [?r , r ; multiple , * , 8] #
+ [w , Dn; neon_move<q> , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+ [w , Dz; fmov , * , 4] fmov\t%d0, xzr
+ }
+ "&& reload_completed
+ && (REG_P (operands[0])
+ && REG_P (operands[1])
+ && !(FP_REGNUM_P (REGNO (operands[0]))
+ && FP_REGNUM_P (REGNO (operands[1]))))"
+ [(const_int 0)]
+ {
+ if (GP_REGNUM_P (REGNO (operands[0]))
+ && GP_REGNUM_P (REGNO (operands[1])))
+ aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
+ else
+ aarch64_split_simd_move (operands[0], operands[1]);
+ DONE;
+ }
)
;; When storing lane zero we can use the normal STR and its more permissive
@@ -238,33 +243,6 @@
[(set_attr "type" "neon_stp_q")]
)
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && GP_REGNUM_P (REGNO (operands[0]))
- && GP_REGNUM_P (REGNO (operands[1]))"
- [(const_int 0)]
-{
- aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
- DONE;
-})
-
-(define_split
- [(set (match_operand:VQMOV 0 "register_operand" "")
- (match_operand:VQMOV 1 "register_operand" ""))]
- "TARGET_FLOAT
- && reload_completed
- && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
- || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
- [(const_int 0)]
-{
- aarch64_split_simd_move (operands[0], operands[1]);
- DONE;
-})
-
(define_expand "@aarch64_split_simd_mov<mode>"
[(set (match_operand:VQMOV 0)
(match_operand:VQMOV 1))]
--
2.33.0

View File

@ -0,0 +1,34 @@
From 883af5a13e648e74cb8d8722be6d4980e8bc8f48 Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Tue, 20 Jun 2023 08:54:42 +0100
Subject: [PATCH 079/157] [Backport][SME] AArch64: remove test comment from
*mov<mode>_aarch64
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=36de416df8b3f109353e309011061fa66e872e3a
I accidentally left a test comment in the final version of the patch.
This removes the comment.
gcc/ChangeLog:
* config/aarch64/aarch64.md (*mov<mode>_aarch64): Drop test comment.
---
gcc/config/aarch64/aarch64.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 29a665e45..1ec23fae8 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1213,7 +1213,7 @@
[m, r Z ; store_4 , * ] str<size>\\t%w1, %0
[m, w ; store_4 , * ] str\t%<size>1, %0
[r, w ; neon_to_gp<q> , simd ] umov\t%w0, %1.<v>[0]
- [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1 /*foo */
+ [r, w ; neon_to_gp<q> , nosimd] fmov\t%w0, %s1
[w, r Z ; neon_from_gp<q>, simd ] dup\t%0.<Vallxd>, %w1
[w, r Z ; neon_from_gp<q>, nosimd] fmov\t%s0, %w1
[w, w ; neon_dup , simd ] dup\t%<Vetype>0, %1.<v>[0]
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,250 @@
From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001
From: Tamar Christina <tamar.christina@arm.com>
Date: Mon, 12 Dec 2022 15:18:56 +0000
Subject: [PATCH 082/157] [Backport][SME] AArch64: Support new tbranch optab.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11
This implements the new tbranch optab for AArch64.
we cannot emit one big RTL for the final instruction immediately.
The reason that all comparisons in the AArch64 backend expand to separate CC
compares, and separate testing of the operands is for ifcvt.
The separate CC compare is needed so ifcvt can produce csel, cset etc from the
compares. Unlike say combine, ifcvt can not do recog on a parallel with a
clobber. Should we emit the instruction directly then ifcvt will not be able
to say, make a csel, because we have no patterns which handle zero_extract and
compare. (unlike combine ifcvt cannot transform the extract into an AND).
While you could provide various patterns for this (and I did try) you end up
with broken patterns because you can't add the clobber to the CC register. If
you do, ifcvt recog fails.
i.e.
int
f1 (int x)
{
if (x & 1)
return 1;
return x;
}
We lose csel here.
Secondly the reason the compare with an explicit CC mode is needed is so that
ifcvt can transform the operation into a version that doesn't require the flags
to be set. But it only does so if it know the explicit usage of the CC reg.
For instance
int
foo (int a, int b)
{
return ((a & (1 << 25)) ? 5 : 4);
}
Doesn't require a comparison, the optimal form is:
foo(int, int):
ubfx x0, x0, 25, 1
add w0, w0, 4
ret
and no compare is actually needed. If you represent the instruction using an
ANDS instead of a zero_extract then you get close, but you end up with an ands
followed by an add, which is a slower operation.
gcc/ChangeLog:
* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
(tbranch_<code><mode>4): New.
* config/aarch64/iterators.md(ZEROM, zerom): New.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/tbz_1.c: New test.
---
gcc/config/aarch64/aarch64.md | 33 ++++++--
gcc/config/aarch64/iterators.md | 2 +
gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++
3 files changed, 122 insertions(+), 8 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 079c8a3f9..2becc888e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -953,12 +953,29 @@
(const_int 1)))]
)
-(define_insn "*tb<optab><mode>1"
+(define_expand "tbranch_<code><mode>3"
[(set (pc) (if_then_else
- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
- (const_int 1)
- (match_operand 1
- "aarch64_simd_shift_imm_<mode>" "n"))
+ (EQL (match_operand:ALLI 0 "register_operand")
+ (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
+ (label_ref (match_operand 2 ""))
+ (pc)))]
+ ""
+{
+ rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+ rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
+ rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
+ emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+ operands[1] = const0_rtx;
+ operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+ operands[1]);
+})
+
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
+ [(set (pc) (if_then_else
+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
+ (const_int 1)
+ (match_operand 1
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
(const_int 0))
(label_ref (match_operand 2 "" ""))
(pc)))
@@ -969,15 +986,15 @@
{
if (get_attr_far_branch (insn) == 1)
return aarch64_gen_far_branch (operands, 2, "Ltb",
- "<inv_tb>\\t%<w>0, %1, ");
+ "<inv_tb>\\t%<ALLI:w>0, %1, ");
else
{
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
+ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
}
}
else
- return "<tbz>\t%<w>0, %1, %l2";
+ return "<tbz>\t%<ALLI:w>0, %1, %l2";
}
[(set_attr "type" "branch")
(set (attr "length")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 226dea48a..b616f5c9a 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1104,6 +1104,8 @@
;; Give the number of bits in the mode
(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
;; Give the ordinal of the MSB in the mode
(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
new file mode 100644
index 000000000..39deb58e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
@@ -0,0 +1,95 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+#include <stdbool.h>
+
+void h(void);
+
+/*
+** g1:
+** tbnz w[0-9]+, #?0, .L([0-9]+)
+** ret
+** ...
+*/
+void g1(bool x)
+{
+ if (__builtin_expect (x, 0))
+ h ();
+}
+
+/*
+** g2:
+** tbz w[0-9]+, #?0, .L([0-9]+)
+** b h
+** ...
+*/
+void g2(bool x)
+{
+ if (__builtin_expect (x, 1))
+ h ();
+}
+
+/*
+** g3_ge:
+** tbnz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_ge(int x)
+{
+ if (__builtin_expect (x >= 0, 1))
+ h ();
+}
+
+/*
+** g3_gt:
+** cmp w[0-9]+, 0
+** ble .L[0-9]+
+** b h
+** ...
+*/
+void g3_gt(int x)
+{
+ if (__builtin_expect (x > 0, 1))
+ h ();
+}
+
+/*
+** g3_lt:
+** tbz w[0-9]+, #?31, .L[0-9]+
+** b h
+** ...
+*/
+void g3_lt(int x)
+{
+ if (__builtin_expect (x < 0, 1))
+ h ();
+}
+
+/*
+** g3_le:
+** cmp w[0-9]+, 0
+** bgt .L[0-9]+
+** b h
+** ...
+*/
+void g3_le(int x)
+{
+ if (__builtin_expect (x <= 0, 1))
+ h ();
+}
+
+/*
+** g5:
+** mov w[0-9]+, 65279
+** tst w[0-9]+, w[0-9]+
+** beq .L[0-9]+
+** b h
+** ...
+*/
+void g5(int x)
+{
+ if (__builtin_expect (x & 0xfeff, 1))
+ h ();
+}
--
2.33.0

View File

@ -1,7 +1,10 @@
From 62fbb215cc817e9f2c1ca80282a64f4ee30806bc Mon Sep 17 00:00:00 2001
From 755f67b1abd70b3c3ea20076fe60c1d303bf1e0c Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:48 +0100
Subject: [PATCH] aarch64: Use local frame vars in shrink-wrapping code
Date: Tue, 12 Sep 2023 16:05:04 +0100
Subject: [PATCH 083/157] [Backport][SME] aarch64: Use local frame vars in
shrink-wrapping code
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=76d89da25af3064e80c9b7b584c678ff72b1f0bd
aarch64_layout_frame uses a shorthand for referring to
cfun->machine->frame:
@ -24,10 +27,10 @@ gcc/
1 file changed, 64 insertions(+), 59 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 226dc9dffd47..ae42ffdedbeb 100644
index 08a98f8ba..b7da1d0be 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8351,7 +8351,7 @@ aarch64_layout_frame (void)
@@ -8951,7 +8951,7 @@ aarch64_layout_frame (void)
frame.is_scs_enabled
= (!crtl->calls_eh_return
&& sanitize_flags_p (SANITIZE_SHADOW_CALL_STACK)
@ -36,7 +39,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
/* When shadow call stack is enabled, the scs_pop in the epilogue will
restore x30, and we don't need to pop x30 again in the traditional
@@ -8763,6 +8763,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9363,6 +9363,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
unsigned start, unsigned limit, bool skip_wb,
bool hard_fp_valid_p)
{
@ -44,7 +47,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
rtx_insn *insn;
unsigned regno;
unsigned regno2;
@@ -8777,8 +8778,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9377,8 +9378,8 @@ aarch64_save_callee_saves (poly_int64 start_offset,
bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
if (skip_wb
@ -55,7 +58,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
continue;
if (cfun->machine->reg_is_wrapped_separately[regno])
@@ -8786,7 +8787,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9386,7 +9387,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
machine_mode mode = aarch64_reg_save_mode (regno);
reg = gen_rtx_REG (mode, regno);
@ -64,7 +67,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
rtx base_rtx = stack_pointer_rtx;
poly_int64 sp_offset = offset;
@@ -8799,7 +8800,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9399,7 +9400,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
{
gcc_assert (known_eq (start_offset, 0));
poly_int64 fp_offset
@ -73,7 +76,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
if (hard_fp_valid_p)
base_rtx = hard_frame_pointer_rtx;
else
@@ -8821,8 +8822,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9421,8 +9422,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
&& (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
&& !cfun->machine->reg_is_wrapped_separately[regno2]
&& known_eq (GET_MODE_SIZE (mode),
@ -83,7 +86,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
@@ -8872,6 +8872,7 @@ static void
@@ -9472,6 +9472,7 @@ static void
aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
unsigned limit, bool skip_wb, rtx *cfi_ops)
{
@ -91,7 +94,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
unsigned regno;
unsigned regno2;
poly_int64 offset;
@@ -8888,13 +8889,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
@@ -9488,13 +9489,13 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
rtx reg, mem;
if (skip_wb
@ -108,7 +111,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
rtx base_rtx = stack_pointer_rtx;
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
@@ -8905,8 +8906,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
@@ -9505,8 +9506,7 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
&& (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
&& !cfun->machine->reg_is_wrapped_separately[regno2]
&& known_eq (GET_MODE_SIZE (mode),
@ -118,7 +121,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
@@ -9011,6 +9011,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
@@ -9611,6 +9611,7 @@ offset_12bit_unsigned_scaled_p (machine_mode mode, poly_int64 offset)
static sbitmap
aarch64_get_separate_components (void)
{
@ -126,7 +129,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
sbitmap components = sbitmap_alloc (LAST_SAVED_REGNUM + 1);
bitmap_clear (components);
@@ -9027,18 +9028,18 @@ aarch64_get_separate_components (void)
@@ -9627,18 +9628,18 @@ aarch64_get_separate_components (void)
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
continue;
@ -148,7 +151,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
else
offset += crtl->outgoing_args_size;
@@ -9057,11 +9058,11 @@ aarch64_get_separate_components (void)
@@ -9657,11 +9658,11 @@ aarch64_get_separate_components (void)
/* If the spare predicate register used by big-endian SVE code
is call-preserved, it must be saved in the main prologue
before any saves that use it. */
@ -164,7 +167,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
/* If registers have been chosen to be stored/restored with
writeback don't interfere with them to avoid having to output explicit
stack adjustment instructions. */
@@ -9170,6 +9171,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
@@ -9770,6 +9771,7 @@ aarch64_get_next_set_bit (sbitmap bmp, unsigned int start)
static void
aarch64_process_components (sbitmap components, bool prologue_p)
{
@ -172,7 +175,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
rtx ptr_reg = gen_rtx_REG (Pmode, frame_pointer_needed
? HARD_FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM);
@@ -9184,9 +9186,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9784,9 +9786,9 @@ aarch64_process_components (sbitmap components, bool prologue_p)
machine_mode mode = aarch64_reg_save_mode (regno);
rtx reg = gen_rtx_REG (mode, regno);
@ -184,7 +187,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
else
offset += crtl->outgoing_args_size;
@@ -9211,14 +9213,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9811,14 +9813,14 @@ aarch64_process_components (sbitmap components, bool prologue_p)
break;
}
@ -201,7 +204,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
GET_MODE_SIZE (mode)))
{
insn = emit_insn (set);
@@ -9240,7 +9242,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9840,7 +9842,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
/* REGNO2 can be saved/restored in a pair with REGNO. */
rtx reg2 = gen_rtx_REG (mode, regno2);
if (frame_pointer_needed)
@ -210,7 +213,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
else
offset2 += crtl->outgoing_args_size;
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
@@ -9335,6 +9337,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -9935,6 +9937,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
bool frame_related_p,
bool final_adjustment_p)
{
@ -218,7 +221,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
HOST_WIDE_INT guard_size
= 1 << param_stack_clash_protection_guard_size;
HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
@@ -9355,25 +9358,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -9955,25 +9958,25 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
register as a probe. We can't assume that LR was saved at position 0
though, so treat any space below it as unprobed. */
if (final_adjustment_p
@ -250,7 +253,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
if (known_eq (frame_size, 0))
{
@@ -9662,17 +9665,18 @@ aarch64_epilogue_uses (int regno)
@@ -10262,17 +10265,18 @@ aarch64_epilogue_uses (int regno)
void
aarch64_expand_prologue (void)
{
@ -279,7 +282,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
rtx_insn *insn;
if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
@@ -9703,7 +9707,7 @@ aarch64_expand_prologue (void)
@@ -10303,7 +10307,7 @@ aarch64_expand_prologue (void)
}
/* Push return address to shadow call stack. */
@ -288,7 +291,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
emit_insn (gen_scs_push ());
if (flag_stack_usage_info)
@@ -9742,7 +9746,7 @@ aarch64_expand_prologue (void)
@@ -10342,7 +10346,7 @@ aarch64_expand_prologue (void)
/* The offset of the frame chain record (if any) from the current SP. */
poly_int64 chain_offset = (initial_adjust + callee_adjust
@ -297,9 +300,9 @@ index 226dc9dffd47..ae42ffdedbeb 100644
gcc_assert (known_ge (chain_offset, 0));
/* The offset of the bottom of the save area from the current SP. */
@@ -9845,16 +9849,17 @@ aarch64_use_return_insn_p (void)
@@ -10445,16 +10449,17 @@ aarch64_use_return_insn_p (void)
void
aarch64_expand_epilogue (bool for_sibcall)
aarch64_expand_epilogue (rtx_call_insn *sibcall)
{
- poly_int64 initial_adjust = cfun->machine->frame.initial_adjust;
- HOST_WIDE_INT callee_adjust = cfun->machine->frame.callee_adjust;
@ -324,7 +327,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
? R29_REGNUM : R30_REGNUM);
rtx cfi_ops = NULL;
rtx_insn *insn;
@@ -9888,7 +9893,7 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10488,7 +10493,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
/* We need to add memory barrier to prevent read from deallocated stack. */
bool need_barrier_p
= maybe_ne (get_frame_size ()
@ -333,7 +336,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
/* Emit a barrier to prevent loads from a deallocated stack. */
if (maybe_gt (final_adjust, crtl->outgoing_args_size)
@@ -9969,7 +9974,7 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10569,7 +10574,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
}
/* Pop return address from shadow call stack. */
@ -342,7 +345,7 @@ index 226dc9dffd47..ae42ffdedbeb 100644
{
machine_mode mode = aarch64_reg_save_mode (R30_REGNUM);
rtx reg = gen_rtx_REG (mode, R30_REGNUM);
@@ -12564,24 +12569,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
@@ -13023,24 +13028,24 @@ aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
poly_int64
aarch64_initial_elimination_offset (unsigned from, unsigned to)
{
@ -374,5 +377,5 @@ index 226dc9dffd47..ae42ffdedbeb 100644
--
2.43.5
2.33.0

View File

@ -1,7 +1,9 @@
From 12a8889de169f892d2e927584c00d20b8b7e456f Mon Sep 17 00:00:00 2001
From 54a6e52207703a8643fc406175377105f887ebef Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:49 +0100
Subject: [PATCH] aarch64: Avoid a use of callee_offset
Date: Tue, 12 Sep 2023 16:05:04 +0100
Subject: [PATCH] [Backport][SME] aarch64: Avoid a use of callee_offset
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f9ab771fa8cd747f34786c6f33deea32c2eb828b
When we emit the frame chain, i.e. when we reach Here in this statement
of aarch64_expand_prologue:
@ -47,10 +49,17 @@ gcc/
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ae42ffdedbeb..79253322fd7c 100644
index b7da1d0be..fbd7a079a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9670,7 +9670,6 @@ aarch64_expand_prologue (void)
@@ -10263,21 +10263,20 @@ aarch64_epilogue_uses (int regno)
current FP is also set up if it is in use. */
void
aarch64_expand_prologue (void)
{
aarch64_frame &frame = cfun->machine->frame;
poly_int64 frame_size = frame.frame_size;
poly_int64 initial_adjust = frame.initial_adjust;
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
poly_int64 final_adjust = frame.final_adjust;
@ -58,7 +67,21 @@ index ae42ffdedbeb..79253322fd7c 100644
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
poly_int64 below_hard_fp_saved_regs_size
= frame.below_hard_fp_saved_regs_size;
@@ -9783,8 +9782,7 @@ aarch64_expand_prologue (void)
unsigned reg1 = frame.wb_push_candidate1;
unsigned reg2 = frame.wb_push_candidate2;
bool emit_frame_chain = frame.emit_frame_chain;
rtx_insn *insn;
if (flag_stack_clash_protection && known_eq (callee_adjust, 0))
{
@@ -10376,22 +10375,21 @@ aarch64_expand_prologue (void)
the CFA based on the frame pointer. We therefore need new
DW_CFA_expressions to re-express the save slots with addresses
based on the frame pointer. */
rtx_insn *insn = get_last_insn ();
gcc_assert (RTX_FRAME_RELATED_P (insn));
/* Add an explicit CFA definition if this was previously
implicit. */
if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
{
@ -68,6 +91,13 @@ index ae42ffdedbeb..79253322fd7c 100644
add_reg_note (insn, REG_CFA_ADJUST_CFA,
gen_rtx_SET (hard_frame_pointer_rtx, src));
}
--
2.43.5
/* Change the save slot expressions for the registers that
we've already saved. */
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg2],
hard_frame_pointer_rtx, UNITS_PER_WORD);
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
hard_frame_pointer_rtx, 0);
--
2.38.1.windows.1

View File

@ -1,7 +1,10 @@
From 03d5e89e7f3be53fd7142556e8e0a2774c653dca Mon Sep 17 00:00:00 2001
From 82bbe6513987a7656150110164e25f44fe410796 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:49 +0100
Subject: [PATCH] aarch64: Explicitly handle frames with no saved registers
Date: Tue, 12 Sep 2023 16:05:05 +0100
Subject: [PATCH 085/157] [Backport][SME] aarch64: Explicitly handle frames
with no saved registers
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c601c918c9ac01ef8315774a642ff924f77c85e5
If a frame has no saved registers, it can be allocated in one go.
There is no need to treat the areas below and above the saved
@ -25,10 +28,10 @@ gcc/
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 79253322fd7c..e1f21230c15e 100644
index fbd7a079a..c59af6b1c 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8378,9 +8378,11 @@ aarch64_layout_frame (void)
@@ -8978,9 +8978,11 @@ aarch64_layout_frame (void)
HOST_WIDE_INT const_size, const_outgoing_args_size, const_fp_offset;
HOST_WIDE_INT const_saved_regs_size;
@ -44,5 +47,5 @@ index 79253322fd7c..e1f21230c15e 100644
/* Simple, small frame with no outgoing arguments:
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 49c2eb7616756c323b7f6b18d8616ec945eb1263 Mon Sep 17 00:00:00 2001
From bf985fe08b6298218180666a7d20f4aa0b41326f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:49 +0100
Subject: [PATCH] aarch64: Add bytes_below_saved_regs to frame info
Date: Tue, 12 Sep 2023 16:05:05 +0100
Subject: [PATCH 086/157] [Backport][SME] aarch64: Add bytes_below_saved_regs
to frame info
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=7b792ecaa9414bc81520b3da552d40ad854be976
The frame layout code currently hard-codes the assumption that
the number of bytes below the saved registers is equal to the
@ -22,10 +25,10 @@ gcc/
2 files changed, 41 insertions(+), 35 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e1f21230c15e..94e1b6865849 100644
index c59af6b1c..5533dd85b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8217,6 +8217,8 @@ aarch64_layout_frame (void)
@@ -8817,6 +8817,8 @@ aarch64_layout_frame (void)
gcc_assert (crtl->is_leaf
|| maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
@ -34,7 +37,7 @@ index e1f21230c15e..94e1b6865849 100644
/* Now assign stack slots for the registers. Start with the predicate
registers, since predicate LDR and STR have a relatively small
offset range. These saves happen below the hard frame pointer. */
@@ -8321,18 +8323,18 @@ aarch64_layout_frame (void)
@@ -8921,18 +8923,18 @@ aarch64_layout_frame (void)
poly_int64 varargs_and_saved_regs_size = offset + frame.saved_varargs_size;
@ -57,7 +60,7 @@ index e1f21230c15e..94e1b6865849 100644
frame.locals_offset = frame.saved_varargs_size;
@@ -8376,7 +8378,7 @@ aarch64_layout_frame (void)
@@ -8976,7 +8978,7 @@ aarch64_layout_frame (void)
else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
max_push_offset = 256;
@ -66,7 +69,7 @@ index e1f21230c15e..94e1b6865849 100644
HOST_WIDE_INT const_saved_regs_size;
if (known_eq (frame.saved_regs_size, 0))
frame.initial_adjust = frame.frame_size;
@@ -8384,31 +8386,31 @@ aarch64_layout_frame (void)
@@ -8984,31 +8986,31 @@ aarch64_layout_frame (void)
&& const_size < max_push_offset
&& known_eq (frame.hard_fp_offset, const_size))
{
@ -110,7 +113,7 @@ index e1f21230c15e..94e1b6865849 100644
}
else if (saves_below_hard_fp_p
&& known_eq (frame.saved_regs_size,
@@ -8418,30 +8420,29 @@ aarch64_layout_frame (void)
@@ -9018,30 +9020,29 @@ aarch64_layout_frame (void)
sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
save SVE registers relative to SP
@ -148,7 +151,7 @@ index e1f21230c15e..94e1b6865849 100644
sub sp, sp, hard_fp_offset
stp x29, x30, [sp, 0]
@@ -8449,10 +8450,10 @@ aarch64_layout_frame (void)
@@ -9049,10 +9050,10 @@ aarch64_layout_frame (void)
stp reg3, reg4, [sp, 16]
[sub sp, sp, below_hard_fp_saved_regs_size]
[save SVE registers relative to SP]
@ -161,7 +164,7 @@ index e1f21230c15e..94e1b6865849 100644
}
/* Make sure the individual adjustments add up to the full frame size. */
@@ -9043,7 +9044,7 @@ aarch64_get_separate_components (void)
@@ -9643,7 +9644,7 @@ aarch64_get_separate_components (void)
if (frame_pointer_needed)
offset -= frame.below_hard_fp_saved_regs_size;
else
@ -170,7 +173,7 @@ index e1f21230c15e..94e1b6865849 100644
/* Check that we can access the stack slot of the register with one
direct load with no adjustments needed. */
@@ -9192,7 +9193,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9792,7 +9793,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
if (frame_pointer_needed)
offset -= frame.below_hard_fp_saved_regs_size;
else
@ -179,7 +182,7 @@ index e1f21230c15e..94e1b6865849 100644
rtx addr = plus_constant (Pmode, ptr_reg, offset);
rtx mem = gen_frame_mem (mode, addr);
@@ -9246,7 +9247,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9846,7 +9847,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
if (frame_pointer_needed)
offset2 -= frame.below_hard_fp_saved_regs_size;
else
@ -188,7 +191,7 @@ index e1f21230c15e..94e1b6865849 100644
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
rtx mem2 = gen_frame_mem (mode, addr2);
rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -9320,10 +9321,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
@@ -9920,10 +9921,10 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
registers. If POLY_SIZE is not large enough to require a probe this function
will only adjust the stack. When allocating the stack space
FRAME_RELATED_P is then used to indicate if the allocation is frame related.
@ -203,7 +206,7 @@ index e1f21230c15e..94e1b6865849 100644
We emit barriers after each stack adjustment to prevent optimizations from
breaking the invariant that we never drop the stack more than a page. This
@@ -9532,7 +9533,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -10132,7 +10133,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
/* Handle any residuals. Residuals of at least MIN_PROBE_THRESHOLD have to
be probed. This maintains the requirement that each page is probed at
least once. For initial probing we probe only if the allocation is
@ -213,10 +216,10 @@ index e1f21230c15e..94e1b6865849 100644
GUARD_SIZE. This works that for any allocation that is large enough to
trigger a probe here, we'll have at least one, and if they're not large
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 6834c3e99226..1e105e12db8d 100644
index 8f0ac2cde..9e0ca380e 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -871,6 +871,11 @@ struct GTY (()) aarch64_frame
@@ -801,6 +801,11 @@ struct GTY (()) aarch64_frame
/* The size of the callee-save registers with a slot in REG_OFFSET. */
poly_int64 saved_regs_size;
@ -229,5 +232,5 @@ index 6834c3e99226..1e105e12db8d 100644
are saved below the hard frame pointer. */
poly_int64 below_hard_fp_saved_regs_size;
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 34081079ea4de0c98331843f574b5f6f94d7b234 Mon Sep 17 00:00:00 2001
From bd5299017c233bcdf0fcc3dd7217eec1641411fe Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:50 +0100
Subject: [PATCH] aarch64: Add bytes_below_hard_fp to frame info
Date: Tue, 12 Sep 2023 16:05:06 +0100
Subject: [PATCH 087/157] [Backport][SME] aarch64: Add bytes_below_hard_fp to
frame info
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=99305f306246079cc57d30dae7c32107f02ff3e8
Following on from the previous bytes_below_saved_regs patch, this one
records the number of bytes that are below the hard frame pointer.
@ -33,10 +36,10 @@ gcc/
2 files changed, 8 insertions(+), 3 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 94e1b6865849..c7d84245fbfc 100644
index 5533dd85b..2bb49b9b0 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8269,6 +8269,7 @@ aarch64_layout_frame (void)
@@ -8869,6 +8869,7 @@ aarch64_layout_frame (void)
of the callee save area. */
bool saves_below_hard_fp_p = maybe_ne (offset, 0);
frame.below_hard_fp_saved_regs_size = offset;
@ -44,7 +47,7 @@ index 94e1b6865849..c7d84245fbfc 100644
if (frame.emit_frame_chain)
{
/* FP and LR are placed in the linkage record. */
@@ -9856,8 +9857,7 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10456,8 +10457,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
poly_int64 final_adjust = frame.final_adjust;
poly_int64 callee_offset = frame.callee_offset;
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
@ -54,7 +57,7 @@ index 94e1b6865849..c7d84245fbfc 100644
unsigned reg1 = frame.wb_pop_candidate1;
unsigned reg2 = frame.wb_pop_candidate2;
unsigned int last_gpr = (frame.is_scs_enabled
@@ -9915,7 +9915,7 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10515,7 +10515,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
is restored on the instruction doing the writeback. */
aarch64_add_offset (Pmode, stack_pointer_rtx,
hard_frame_pointer_rtx,
@ -64,10 +67,10 @@ index 94e1b6865849..c7d84245fbfc 100644
else
/* The case where we need to re-use the register here is very rare, so
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 1e105e12db8d..de68ff7202fc 100644
index 9e0ca380e..dedc5b32f 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -880,6 +880,11 @@ struct GTY (()) aarch64_frame
@@ -810,6 +810,11 @@ struct GTY (()) aarch64_frame
are saved below the hard frame pointer. */
poly_int64 below_hard_fp_saved_regs_size;
@ -80,5 +83,5 @@ index 1e105e12db8d..de68ff7202fc 100644
top of the locals area. This value is always a multiple of
STACK_BOUNDARY. */
--
2.43.5
2.33.0

View File

@ -0,0 +1,126 @@
From 4dc3e578d958ceb73f973483f42247c3d33210dc Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 20 Jun 2023 21:48:38 +0100
Subject: [PATCH 088/157] [Backport][SME] aarch64: Robustify stack tie handling
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=580b74a79146e51268dd11192d3870645adb0bbb
The SVE handling of stack clash protection copied the stack
pointer to X11 before the probe and set up X11 as the CFA
for unwind purposes:
/* This is done to provide unwinding information for the stack
adjustments we're about to do, however to prevent the optimizers
from removing the R11 move and leaving the CFA note (which would be
very wrong) we tie the old and new stack pointer together.
The tie will expand to nothing but the optimizers will not touch
the instruction. */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
RTX_FRAME_RELATED_P (insn) = 1;
-fcprop-registers is now smart enough to realise that X11 = SP,
replace X11 with SP in the stack tie, and delete the instruction
created above.
This patch tries to prevent that by making stack_tie fussy about
the register numbers. It fixes failures in
gcc.target/aarch64/sve/pcs/stack_clash*.c.
gcc/
* config/aarch64/aarch64.md (stack_tie): Hard-code the first
register operand to the stack pointer. Require the second register
operand to have the number specified in a separate const_int operand.
* config/aarch64/aarch64.cc (aarch64_emit_stack_tie): New function.
(aarch64_allocate_and_probe_stack_space): Use it.
(aarch64_expand_prologue, aarch64_expand_epilogue): Likewise.
(aarch64_expand_epilogue): Likewise.
---
gcc/config/aarch64/aarch64.cc | 18 ++++++++++++++----
gcc/config/aarch64/aarch64.md | 7 ++++---
2 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 2bb49b9b0..4d505c6fc 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9917,6 +9917,16 @@ aarch64_stack_clash_protection_alloca_probe_range (void)
return STACK_CLASH_CALLER_GUARD;
}
+/* Emit a stack tie that acts as a scheduling barrier for all previous and
+ subsequent memory accesses and that requires the stack pointer and REG
+ to have their current values. REG can be stack_pointer_rtx if no
+ other register's value needs to be fixed. */
+
+static void
+aarch64_emit_stack_tie (rtx reg)
+{
+ emit_insn (gen_stack_tie (reg, gen_int_mode (REGNO (reg), DImode)));
+}
/* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
registers. If POLY_SIZE is not large enough to require a probe this function
@@ -10030,7 +10040,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
the instruction. */
rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
- emit_insn (gen_stack_tie (stack_ptr_copy, stack_pointer_rtx));
+ aarch64_emit_stack_tie (stack_ptr_copy);
/* We want the CFA independent of the stack pointer for the
duration of the loop. */
@@ -10398,7 +10408,7 @@ aarch64_expand_prologue (void)
aarch64_add_cfa_expression (insn, regno_reg_rtx[reg1],
hard_frame_pointer_rtx, 0);
}
- emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+ aarch64_emit_stack_tie (hard_frame_pointer_rtx);
}
aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
@@ -10501,7 +10511,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
|| cfun->calls_alloca
|| crtl->calls_eh_return)
{
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+ aarch64_emit_stack_tie (stack_pointer_rtx);
need_barrier_p = false;
}
@@ -10540,7 +10550,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
callee_adjust != 0, &cfi_ops);
if (need_barrier_p)
- emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
+ aarch64_emit_stack_tie (stack_pointer_rtx);
if (callee_adjust != 0)
aarch64_pop_regs (reg1, reg2, callee_adjust, &cfi_ops);
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 2becc888e..2ce123255 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -7088,10 +7088,11 @@
(define_insn "stack_tie"
[(set (mem:BLK (scratch))
- (unspec:BLK [(match_operand:DI 0 "register_operand" "rk")
- (match_operand:DI 1 "register_operand" "rk")]
+ (unspec:BLK [(reg:DI SP_REGNUM)
+ (match_operand:DI 0 "register_operand" "rk")
+ (match_operand:DI 1 "const_int_operand")]
UNSPEC_PRLG_STK))]
- ""
+ "REGNO (operands[0]) == INTVAL (operands[1])"
""
[(set_attr "length" "0")]
)
--
2.33.0

View File

@ -1,7 +1,10 @@
From 187861af7c51db9eddc6f954b589c121b210fc74 Mon Sep 17 00:00:00 2001
From 8e010ea1a3e122a74696250d7c6ce5660a88b8f5 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:50 +0100
Subject: [PATCH] aarch64: Tweak aarch64_save/restore_callee_saves
Date: Tue, 12 Sep 2023 16:05:06 +0100
Subject: [PATCH 089/157] [Backport][SME] aarch64: Tweak
aarch64_save/restore_callee_saves
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=38698967268c44991e02aa1e5a2ce9382d6de9db
aarch64_save_callee_saves and aarch64_restore_callee_saves took
a parameter called start_offset that gives the offset of the
@ -29,10 +32,10 @@ gcc/
2 files changed, 28 insertions(+), 32 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index c7d84245fbfc..e79551af41df 100644
index 4d505c6fc..a0a4c7ac3 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8343,7 +8343,6 @@ aarch64_layout_frame (void)
@@ -8943,7 +8943,6 @@ aarch64_layout_frame (void)
frame.final_adjust = 0;
frame.callee_adjust = 0;
frame.sve_callee_adjust = 0;
@ -40,7 +43,7 @@ index c7d84245fbfc..e79551af41df 100644
frame.wb_pop_candidate1 = frame.wb_push_candidate1;
frame.wb_pop_candidate2 = frame.wb_push_candidate2;
@@ -8411,7 +8410,6 @@ aarch64_layout_frame (void)
@@ -9011,7 +9010,6 @@ aarch64_layout_frame (void)
stp reg1, reg2, [sp, bytes_below_saved_regs]
stp reg3, reg4, [sp, bytes_below_saved_regs + 16] */
frame.initial_adjust = frame.frame_size;
@ -48,7 +51,7 @@ index c7d84245fbfc..e79551af41df 100644
}
else if (saves_below_hard_fp_p
&& known_eq (frame.saved_regs_size,
@@ -8758,12 +8756,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
@@ -9358,12 +9356,13 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
}
/* Emit code to save the callee-saved registers from register number START
@ -66,7 +69,7 @@ index c7d84245fbfc..e79551af41df 100644
unsigned start, unsigned limit, bool skip_wb,
bool hard_fp_valid_p)
{
@@ -8791,7 +8790,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9391,7 +9390,9 @@ aarch64_save_callee_saves (poly_int64 start_offset,
machine_mode mode = aarch64_reg_save_mode (regno);
reg = gen_rtx_REG (mode, regno);
@ -77,7 +80,7 @@ index c7d84245fbfc..e79551af41df 100644
rtx base_rtx = stack_pointer_rtx;
poly_int64 sp_offset = offset;
@@ -8802,9 +8803,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9402,9 +9403,7 @@ aarch64_save_callee_saves (poly_int64 start_offset,
else if (GP_REGNUM_P (regno)
&& (!offset.is_constant (&const_offset) || const_offset >= 512))
{
@ -88,7 +91,7 @@ index c7d84245fbfc..e79551af41df 100644
if (hard_fp_valid_p)
base_rtx = hard_frame_pointer_rtx;
else
@@ -8868,12 +8867,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
@@ -9468,12 +9467,13 @@ aarch64_save_callee_saves (poly_int64 start_offset,
}
/* Emit code to restore the callee registers from register number START
@ -106,7 +109,7 @@ index c7d84245fbfc..e79551af41df 100644
unsigned limit, bool skip_wb, rtx *cfi_ops)
{
aarch64_frame &frame = cfun->machine->frame;
@@ -8899,7 +8899,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
@@ -9499,7 +9499,9 @@ aarch64_restore_callee_saves (poly_int64 start_offset, unsigned start,
machine_mode mode = aarch64_reg_save_mode (regno);
reg = gen_rtx_REG (mode, regno);
@ -117,7 +120,7 @@ index c7d84245fbfc..e79551af41df 100644
rtx base_rtx = stack_pointer_rtx;
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
@@ -9675,8 +9677,6 @@ aarch64_expand_prologue (void)
@@ -10285,8 +10287,6 @@ aarch64_expand_prologue (void)
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
poly_int64 final_adjust = frame.final_adjust;
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
@ -126,7 +129,7 @@ index c7d84245fbfc..e79551af41df 100644
unsigned reg1 = frame.wb_push_candidate1;
unsigned reg2 = frame.wb_push_candidate2;
bool emit_frame_chain = frame.emit_frame_chain;
@@ -9752,8 +9752,8 @@ aarch64_expand_prologue (void)
@@ -10362,8 +10362,8 @@ aarch64_expand_prologue (void)
- frame.hard_fp_offset);
gcc_assert (known_ge (chain_offset, 0));
@ -137,7 +140,7 @@ index c7d84245fbfc..e79551af41df 100644
if (emit_frame_chain)
{
@@ -9761,7 +9761,7 @@ aarch64_expand_prologue (void)
@@ -10371,7 +10371,7 @@ aarch64_expand_prologue (void)
{
reg1 = R29_REGNUM;
reg2 = R30_REGNUM;
@ -146,8 +149,8 @@ index c7d84245fbfc..e79551af41df 100644
false, false);
}
else
@@ -9801,7 +9801,7 @@ aarch64_expand_prologue (void)
emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
@@ -10411,7 +10411,7 @@ aarch64_expand_prologue (void)
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
}
- aarch64_save_callee_saves (saved_regs_offset, R0_REGNUM, R30_REGNUM,
@ -155,7 +158,7 @@ index c7d84245fbfc..e79551af41df 100644
callee_adjust != 0 || emit_frame_chain,
emit_frame_chain);
if (maybe_ne (sve_callee_adjust, 0))
@@ -9811,16 +9811,17 @@ aarch64_expand_prologue (void)
@@ -10421,16 +10421,17 @@ aarch64_expand_prologue (void)
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
sve_callee_adjust,
!frame_pointer_needed, false);
@ -176,7 +179,7 @@ index c7d84245fbfc..e79551af41df 100644
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
!frame_pointer_needed, true);
}
@@ -9855,7 +9856,6 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10465,7 +10466,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
poly_int64 initial_adjust = frame.initial_adjust;
HOST_WIDE_INT callee_adjust = frame.callee_adjust;
poly_int64 final_adjust = frame.final_adjust;
@ -184,7 +187,7 @@ index c7d84245fbfc..e79551af41df 100644
poly_int64 sve_callee_adjust = frame.sve_callee_adjust;
poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
unsigned reg1 = frame.wb_pop_candidate1;
@@ -9925,9 +9925,9 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10535,9 +10535,9 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
/* Restore the vector registers before the predicate registers,
so that we can use P4 as a temporary for big-endian SVE frames. */
@ -196,7 +199,7 @@ index c7d84245fbfc..e79551af41df 100644
false, &cfi_ops);
if (maybe_ne (sve_callee_adjust, 0))
aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
@@ -9935,7 +9935,7 @@ aarch64_expand_epilogue (bool for_sibcall)
@@ -10545,7 +10545,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
/* When shadow call stack is enabled, the scs_pop in the epilogue will
restore x30, we don't need to restore x30 again in the traditional
way. */
@ -206,10 +209,10 @@ index c7d84245fbfc..e79551af41df 100644
callee_adjust != 0, &cfi_ops);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index de68ff7202fc..94fca4b94716 100644
index dedc5b32f..a1db4f689 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -907,10 +907,6 @@ struct GTY (()) aarch64_frame
@@ -837,10 +837,6 @@ struct GTY (()) aarch64_frame
It is zero when no push is used. */
HOST_WIDE_INT callee_adjust;
@ -221,5 +224,5 @@ index de68ff7202fc..94fca4b94716 100644
SVE registers. */
poly_int64 sve_callee_adjust;
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 2b983f9064d808daf909bde1d4a13980934a7e6e Mon Sep 17 00:00:00 2001
From c8768dd861538817db8c1955dcce4b6d8ce17c48 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:51 +0100
Subject: [PATCH] aarch64: Only calculate chain_offset if there is a chain
Date: Tue, 12 Sep 2023 16:05:07 +0100
Subject: [PATCH 090/157] [Backport][SME] aarch64: Only calculate chain_offset
if there is a chain
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aa8b57ee0206e8e5ac7078692ee67fb6ead05645
After previous patches, it is no longer necessary to calculate
a chain_offset in cases where there is no chain record.
@ -14,10 +17,10 @@ gcc/
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e79551af41df..d71a042d6112 100644
index a0a4c7ac3..bef6a658b 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9747,16 +9747,16 @@ aarch64_expand_prologue (void)
@@ -10357,16 +10357,16 @@ aarch64_expand_prologue (void)
if (callee_adjust != 0)
aarch64_push_regs (reg1, reg2, callee_adjust);
@ -40,5 +43,5 @@ index e79551af41df..d71a042d6112 100644
{
reg1 = R29_REGNUM;
--
2.43.5
2.33.0

View File

@ -1,22 +1,25 @@
From 0a0a824808d1dec51004fb5805c1a0ae2a35433f Mon Sep 17 00:00:00 2001
From 43dc03de6d608e10d83cc7994d127e3764bfbcf7 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:51 +0100
Subject: [PATCH] aarch64: Rename locals_offset to bytes_above_locals
Date: Tue, 12 Sep 2023 16:05:07 +0100
Subject: [PATCH 091/157] [Backport][SME] aarch64: Rename locals_offset to
bytes_above_locals
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=28034dbb5e32711d5f4d655576f2499e6f57f854
locals_offset was described as:
/* Offset from the base of the frame (incomming SP) to the
top of the locals area. This value is always a multiple of
STACK_BOUNDARY. */
This is implicitly an “upside down” view of the frame: the incoming
This is implicitly an “upside down” view of the frame: the incoming
SP is at offset 0, and anything N bytes below the incoming SP is at
offset N (rather than -N).
However, reg_offset instead uses a “right way up” view; that is,
However, reg_offset instead uses a “right way up” view; that is,
it views offsets in address terms. Something above X is at a
positive offset from X and something below X is at a negative
offset from X.
@ -40,10 +43,10 @@ gcc/
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d71a042d6112..d4ec352ba98a 100644
index bef6a658b..992f71bbd 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8337,7 +8337,7 @@ aarch64_layout_frame (void)
@@ -8937,7 +8937,7 @@ aarch64_layout_frame (void)
STACK_BOUNDARY / BITS_PER_UNIT));
frame.frame_size = saved_regs_and_above + frame.bytes_below_saved_regs;
@ -52,7 +55,7 @@ index d71a042d6112..d4ec352ba98a 100644
frame.initial_adjust = 0;
frame.final_adjust = 0;
@@ -12578,13 +12578,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
@@ -13047,13 +13047,13 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
return frame.hard_fp_offset;
if (from == FRAME_POINTER_REGNUM)
@ -69,10 +72,10 @@ index d71a042d6112..d4ec352ba98a 100644
return frame.frame_size;
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 94fca4b94716..bf46e6124aa9 100644
index a1db4f689..2acff9a96 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -885,10 +885,10 @@ struct GTY (()) aarch64_frame
@@ -815,10 +815,10 @@ struct GTY (()) aarch64_frame
always a multiple of STACK_BOUNDARY. */
poly_int64 bytes_below_hard_fp;
@ -87,5 +90,5 @@ index 94fca4b94716..bf46e6124aa9 100644
/* Offset from the base of the frame (incomming SP) to the
hard_frame_pointer. This value is always a multiple of
--
2.43.5
2.33.0

View File

@ -1,11 +1,14 @@
From 3fbf0789202b30a67b12e1fb785c7130f098d665 Mon Sep 17 00:00:00 2001
From e33aa6e25334fd94e1e4f2d8b6c8247029657a54 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:52 +0100
Subject: [PATCH] aarch64: Rename hard_fp_offset to bytes_above_hard_fp
Date: Tue, 12 Sep 2023 16:05:08 +0100
Subject: [PATCH 092/157] [Backport][SME] aarch64: Rename hard_fp_offset to
bytes_above_hard_fp
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ed61c87f044f5460109c197855b316641db3c6c6
Similarly to the previous locals_offset patch, hard_fp_offset
was described as:
@ -14,7 +17,7 @@ was described as:
STACK_BOUNDARY. */
poly_int64 hard_fp_offset;
which again took an “upside-down” view: higher offsets meant lower
which again took an “upside-down” view: higher offsets meant lower
addresses. This patch renames the field to bytes_above_hard_fp instead.
gcc/
@ -30,10 +33,10 @@ gcc/
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index d4ec352ba98a..3c4052740e7a 100644
index 992f71bbd..67199a026 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8329,7 +8329,7 @@ aarch64_layout_frame (void)
@@ -8929,7 +8929,7 @@ aarch64_layout_frame (void)
+ get_frame_size (),
STACK_BOUNDARY / BITS_PER_UNIT);
@ -42,7 +45,7 @@ index d4ec352ba98a..3c4052740e7a 100644
= saved_regs_and_above - frame.below_hard_fp_saved_regs_size;
/* Both these values are already aligned. */
@@ -8378,13 +8378,13 @@ aarch64_layout_frame (void)
@@ -8978,13 +8978,13 @@ aarch64_layout_frame (void)
else if (frame.wb_pop_candidate1 != INVALID_REGNUM)
max_push_offset = 256;
@ -58,7 +61,7 @@ index d4ec352ba98a..3c4052740e7a 100644
{
/* Simple, small frame with no data below the saved registers.
@@ -8401,8 +8401,8 @@ aarch64_layout_frame (void)
@@ -9001,8 +9001,8 @@ aarch64_layout_frame (void)
case that it hardly seems worth the effort though. */
&& (!saves_below_hard_fp_p || const_below_saved_regs == 0)
&& !(cfun->calls_alloca
@ -69,7 +72,7 @@ index d4ec352ba98a..3c4052740e7a 100644
{
/* Frame with small area below the saved registers:
@@ -8420,12 +8420,12 @@ aarch64_layout_frame (void)
@@ -9020,12 +9020,12 @@ aarch64_layout_frame (void)
sub sp, sp, hard_fp_offset + below_hard_fp_saved_regs_size
save SVE registers relative to SP
sub sp, sp, bytes_below_saved_regs */
@ -85,7 +88,7 @@ index d4ec352ba98a..3c4052740e7a 100644
{
/* Frame with large area below the saved registers, or with SVE saves,
but with a small area above:
@@ -8435,7 +8435,7 @@ aarch64_layout_frame (void)
@@ -9035,7 +9035,7 @@ aarch64_layout_frame (void)
[sub sp, sp, below_hard_fp_saved_regs_size]
[save SVE registers relative to SP]
sub sp, sp, bytes_below_saved_regs */
@ -94,7 +97,7 @@ index d4ec352ba98a..3c4052740e7a 100644
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
frame.final_adjust = frame.bytes_below_saved_regs;
}
@@ -8450,7 +8450,7 @@ aarch64_layout_frame (void)
@@ -9050,7 +9050,7 @@ aarch64_layout_frame (void)
[sub sp, sp, below_hard_fp_saved_regs_size]
[save SVE registers relative to SP]
sub sp, sp, bytes_below_saved_regs */
@ -103,7 +106,7 @@ index d4ec352ba98a..3c4052740e7a 100644
frame.sve_callee_adjust = frame.below_hard_fp_saved_regs_size;
frame.final_adjust = frame.bytes_below_saved_regs;
}
@@ -9754,7 +9754,7 @@ aarch64_expand_prologue (void)
@@ -10364,7 +10364,7 @@ aarch64_expand_prologue (void)
{
/* The offset of the frame chain record (if any) from the current SP. */
poly_int64 chain_offset = (initial_adjust + callee_adjust
@ -112,7 +115,7 @@ index d4ec352ba98a..3c4052740e7a 100644
gcc_assert (known_ge (chain_offset, 0));
if (callee_adjust == 0)
@@ -12575,10 +12575,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
@@ -13044,10 +13044,10 @@ aarch64_initial_elimination_offset (unsigned from, unsigned to)
if (to == HARD_FRAME_POINTER_REGNUM)
{
if (from == ARG_POINTER_REGNUM)
@ -126,10 +129,10 @@ index d4ec352ba98a..3c4052740e7a 100644
if (to == STACK_POINTER_REGNUM)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bf46e6124aa9..dd1f403f9393 100644
index 2acff9a96..0f7822c3d 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -890,10 +890,10 @@ struct GTY (()) aarch64_frame
@@ -820,10 +820,10 @@ struct GTY (()) aarch64_frame
STACK_BOUNDARY. */
poly_int64 bytes_above_locals;
@ -144,5 +147,5 @@ index bf46e6124aa9..dd1f403f9393 100644
/* The size of the frame. This value is the offset from base of the
frame (incomming SP) to the stack_pointer. This value is always
--
2.43.5
2.33.0

View File

@ -1,13 +1,15 @@
From aac8b31379ac3bbd14fc6427dce23f56e54e8485 Mon Sep 17 00:00:00 2001
From 6aa0db727b6e3a7fed95b014f25f3f022d1f46e2 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:52 +0100
Subject: [PATCH] aarch64: Tweak frame_size comment
Date: Tue, 12 Sep 2023 16:05:08 +0100
Subject: [PATCH 093/157] [Backport][SME] aarch64: Tweak frame_size comment
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=492b60670e69b0a7f11345b69a3c922c20d5d8c3
This patch fixes another case in which a value was described with
an “upside-down” view.
an “upside-down” view.
gcc/
* config/aarch64/aarch64.h (aarch64_frame::frame_size): Tweak comment.
@ -16,10 +18,10 @@ gcc/
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index dd1f403f9393..700524ae22bf 100644
index 0f7822c3d..39abca051 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -895,8 +895,8 @@ struct GTY (()) aarch64_frame
@@ -825,8 +825,8 @@ struct GTY (()) aarch64_frame
STACK_BOUNDARY. */
poly_int64 bytes_above_hard_fp;
@ -31,5 +33,5 @@ index dd1f403f9393..700524ae22bf 100644
poly_int64 frame_size;
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 8d5506a8aeb8dd7e8b209a3663b07688478f76b9 Mon Sep 17 00:00:00 2001
From 3b10711c6a5610bf8e2287b9491557268ee148da Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:53 +0100
Subject: [PATCH] aarch64: Measure reg_offset from the bottom of the frame
Date: Tue, 12 Sep 2023 16:05:09 +0100
Subject: [PATCH 094/157] [Backport][SME] aarch64: Measure reg_offset from the
bottom of the frame
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=67a36b6f5d6be11d280081b461e72910aca2fc54
reg_offset was measured from the bottom of the saved register area.
This made perfect sense with the original layout, since the bottom
@ -31,10 +34,10 @@ gcc/
2 files changed, 27 insertions(+), 29 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 3c4052740e7a..97dd077844b4 100644
index 67199a026..df8a83b04 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8139,7 +8139,6 @@ aarch64_needs_frame_chain (void)
@@ -8739,7 +8739,6 @@ aarch64_needs_frame_chain (void)
static void
aarch64_layout_frame (void)
{
@ -42,7 +45,7 @@ index 3c4052740e7a..97dd077844b4 100644
int regno, last_fp_reg = INVALID_REGNUM;
machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
@@ -8217,7 +8216,9 @@ aarch64_layout_frame (void)
@@ -8817,7 +8816,9 @@ aarch64_layout_frame (void)
gcc_assert (crtl->is_leaf
|| maybe_ne (frame.reg_offset[R30_REGNUM], SLOT_NOT_REQUIRED));
@ -53,7 +56,7 @@ index 3c4052740e7a..97dd077844b4 100644
/* Now assign stack slots for the registers. Start with the predicate
registers, since predicate LDR and STR have a relatively small
@@ -8229,7 +8230,8 @@ aarch64_layout_frame (void)
@@ -8829,7 +8830,8 @@ aarch64_layout_frame (void)
offset += BYTES_PER_SVE_PRED;
}
@ -63,7 +66,7 @@ index 3c4052740e7a..97dd077844b4 100644
{
/* If we have any vector registers to save above the predicate registers,
the offset of the vector register save slots need to be a multiple
@@ -8247,10 +8249,10 @@ aarch64_layout_frame (void)
@@ -8847,10 +8849,10 @@ aarch64_layout_frame (void)
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
else
{
@ -78,7 +81,7 @@ index 3c4052740e7a..97dd077844b4 100644
else
gcc_unreachable ();
}
@@ -8267,9 +8269,10 @@ aarch64_layout_frame (void)
@@ -8867,9 +8869,10 @@ aarch64_layout_frame (void)
/* OFFSET is now the offset of the hard frame pointer from the bottom
of the callee save area. */
@ -92,7 +95,7 @@ index 3c4052740e7a..97dd077844b4 100644
if (frame.emit_frame_chain)
{
/* FP and LR are placed in the linkage record. */
@@ -8320,9 +8323,10 @@ aarch64_layout_frame (void)
@@ -8920,9 +8923,10 @@ aarch64_layout_frame (void)
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@ -105,7 +108,7 @@ index 3c4052740e7a..97dd077844b4 100644
poly_int64 saved_regs_and_above
= aligned_upper_bound (varargs_and_saved_regs_size
@@ -8790,9 +8794,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
@@ -9390,9 +9394,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
machine_mode mode = aarch64_reg_save_mode (regno);
reg = gen_rtx_REG (mode, regno);
@ -116,7 +119,7 @@ index 3c4052740e7a..97dd077844b4 100644
rtx base_rtx = stack_pointer_rtx;
poly_int64 sp_offset = offset;
@@ -8899,9 +8901,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
@@ -9499,9 +9501,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
machine_mode mode = aarch64_reg_save_mode (regno);
reg = gen_rtx_REG (mode, regno);
@ -127,7 +130,7 @@ index 3c4052740e7a..97dd077844b4 100644
rtx base_rtx = stack_pointer_rtx;
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
@@ -9040,14 +9040,12 @@ aarch64_get_separate_components (void)
@@ -9640,14 +9640,12 @@ aarch64_get_separate_components (void)
it as a stack probe for -fstack-clash-protection. */
if (flag_stack_clash_protection
&& maybe_ne (frame.below_hard_fp_saved_regs_size, 0)
@ -144,7 +147,7 @@ index 3c4052740e7a..97dd077844b4 100644
/* Check that we can access the stack slot of the register with one
direct load with no adjustments needed. */
@@ -9194,9 +9192,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9794,9 +9792,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
rtx reg = gen_rtx_REG (mode, regno);
poly_int64 offset = frame.reg_offset[regno];
if (frame_pointer_needed)
@ -155,7 +158,7 @@ index 3c4052740e7a..97dd077844b4 100644
rtx addr = plus_constant (Pmode, ptr_reg, offset);
rtx mem = gen_frame_mem (mode, addr);
@@ -9248,9 +9244,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
@@ -9848,9 +9844,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
/* REGNO2 can be saved/restored in a pair with REGNO. */
rtx reg2 = gen_rtx_REG (mode, regno2);
if (frame_pointer_needed)
@ -166,7 +169,7 @@ index 3c4052740e7a..97dd077844b4 100644
rtx addr2 = plus_constant (Pmode, ptr_reg, offset2);
rtx mem2 = gen_frame_mem (mode, addr2);
rtx set2 = prologue_p ? gen_rtx_SET (mem2, reg2)
@@ -9366,7 +9360,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -9976,7 +9970,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (final_adjustment_p
&& known_eq (frame.below_hard_fp_saved_regs_size, 0))
{
@ -177,10 +180,10 @@ index 3c4052740e7a..97dd077844b4 100644
min_probe_threshold -= lr_offset.to_constant ();
else
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 700524ae22bf..b61358370732 100644
index 39abca051..f340237d0 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -860,6 +860,9 @@ extern enum aarch64_processor aarch64_tune;
@@ -790,6 +790,9 @@ extern enum aarch64_processor aarch64_tune;
#ifdef HAVE_POLY_INT_H
struct GTY (()) aarch64_frame
{
@ -191,5 +194,5 @@ index 700524ae22bf..b61358370732 100644
/* The number of extra stack bytes taken up by register varargs.
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From b47766614df3b9df878262efb2ad73aaac108363 Mon Sep 17 00:00:00 2001
From 4b8f3f194e68d0d411eaa6692699d8e5e2b4217d Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:53 +0100
Subject: [PATCH] aarch64: Simplify top of frame allocation
Date: Tue, 12 Sep 2023 16:05:09 +0100
Subject: [PATCH 095/157] [Backport][SME] aarch64: Simplify top of frame
allocation
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=bc9dcdde80915d7585a21daa2b69f4adf4a1e3c1
After previous patches, it no longer really makes sense to allocate
the top of the frame in terms of varargs_and_saved_regs_size and
@ -15,10 +18,10 @@ gcc/
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 97dd077844b4..81935852d5b2 100644
index df8a83b04..3329aa364 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8325,23 +8325,16 @@ aarch64_layout_frame (void)
@@ -8925,23 +8925,16 @@ aarch64_layout_frame (void)
frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
@ -51,5 +54,5 @@ index 97dd077844b4..81935852d5b2 100644
frame.initial_adjust = 0;
frame.final_adjust = 0;
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 08f71b4bb28fb74d20e8d2927a557e8119ce9f4d Mon Sep 17 00:00:00 2001
From 0ab484f5de7d28c0a7166439d403e0983834b120 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:54 +0100
Subject: [PATCH] aarch64: Minor initial adjustment tweak
Date: Tue, 12 Sep 2023 16:05:10 +0100
Subject: [PATCH 096/157] [Backport][SME] aarch64: Minor initial adjustment
tweak
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=ee5466ff4faca2076cc61f1f120d0b5062c8111c
This patch just changes a calculation of initial_adjust
to one that makes it slightly more obvious that the total
@ -16,10 +19,10 @@ gcc/
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 81935852d5b2..4d9fcf3d1623 100644
index 3329aa364..72604dd9d 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8414,11 +8414,10 @@ aarch64_layout_frame (void)
@@ -9014,11 +9014,10 @@ aarch64_layout_frame (void)
{
/* Frame in which all saves are SVE saves:
@ -34,5 +37,5 @@ index 81935852d5b2..4d9fcf3d1623 100644
}
else if (frame.bytes_above_hard_fp.is_constant (&const_above_fp)
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From f22315d5c19e8310e4dc880fd509678fd291fca8 Mon Sep 17 00:00:00 2001
From b4581d1e6a7b94dfbd58871dad51d3f12889081f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:54 +0100
Subject: [PATCH] aarch64: Tweak stack clash boundary condition
Date: Tue, 12 Sep 2023 16:05:10 +0100
Subject: [PATCH 097/157] [Backport][SME] aarch64: Tweak stack clash boundary
condition
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=1785b8077cc03214ebd1db953c870172fcf15966
The AArch64 ABI says that, when stack clash protection is used,
there can be a maximum of 1KiB of unprobed space at sp on entry
@ -43,10 +46,10 @@ gcc/testsuite/
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 4d9fcf3d1623..34c1d8614cd9 100644
index 72604dd9d..ba92a23a7 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9333,9 +9333,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -9943,9 +9943,11 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
HOST_WIDE_INT guard_size
= 1 << param_stack_clash_protection_guard_size;
HOST_WIDE_INT guard_used_by_caller = STACK_CLASH_CALLER_GUARD;
@ -61,7 +64,7 @@ index 4d9fcf3d1623..34c1d8614cd9 100644
account any unprobed space there is above the current SP. There are
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
new file mode 100644
index 000000000000..0d8a25d73a24
index 000000000..0d8a25d73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
@@ -0,0 +1,55 @@
@ -121,5 +124,5 @@ index 000000000000..0d8a25d73a24
+ return 1;
+}
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 15e18831bf98fd25af098b970ebf0c9a6200a34b Mon Sep 17 00:00:00 2001
From ffd483dc6a2a4af495d56cf5ebdbbb3b9ca58820 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:55 +0100
Subject: [PATCH] aarch64: Put LR save probe in first 16 bytes
Date: Tue, 12 Sep 2023 16:05:11 +0100
Subject: [PATCH 098/157] [Backport][SME] aarch64: Put LR save probe in first
16 bytes
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=fee0a18abfdd4874194abd149943fa7c77a29b7c
-fstack-clash-protection uses the save of LR as a probe for the next
allocation. The next allocation could be:
@ -70,10 +73,10 @@ gcc/testsuite/
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 34c1d8614cd9..16433fb70f4f 100644
index ba92a23a7..1ba4c2f89 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8273,26 +8273,34 @@ aarch64_layout_frame (void)
@@ -8873,26 +8873,34 @@ aarch64_layout_frame (void)
bool saves_below_hard_fp_p
= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
frame.bytes_below_hard_fp = offset;
@ -121,7 +124,7 @@ index 34c1d8614cd9..16433fb70f4f 100644
poly_int64 max_int_offset = offset;
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@@ -8370,10 +8378,13 @@ aarch64_layout_frame (void)
@@ -8970,10 +8978,13 @@ aarch64_layout_frame (void)
max_push_offset to 0, because no registers are popped at this time,
so callee_adjust cannot be adjusted. */
HOST_WIDE_INT max_push_offset = 0;
@ -139,7 +142,7 @@ index 34c1d8614cd9..16433fb70f4f 100644
HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
HOST_WIDE_INT const_saved_regs_size;
@@ -9339,29 +9350,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -9949,29 +9960,6 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
= (final_adjustment_p
? guard_used_by_caller + byte_sp_alignment
: guard_size - guard_used_by_caller);
@ -169,7 +172,7 @@ index 34c1d8614cd9..16433fb70f4f 100644
poly_int64 frame_size = frame.frame_size;
/* We should always have a positive probe threshold. */
@@ -9541,8 +9529,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -10151,8 +10139,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
if (final_adjustment_p && rounded_size != 0)
min_probe_threshold = 0;
/* If doing a small final adjustment, we always probe at offset 0.
@ -182,7 +185,7 @@ index 34c1d8614cd9..16433fb70f4f 100644
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
new file mode 100644
index 000000000000..82447d20fff5
index 000000000..82447d20f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
@@ -0,0 +1,100 @@
@ -288,7 +291,7 @@ index 000000000000..82447d20fff5
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
new file mode 100644
index 000000000000..73ac3e4e4eb0
index 000000000..73ac3e4e4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
@@ -0,0 +1,100 @@
@ -394,7 +397,7 @@ index 000000000000..73ac3e4e4eb0
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
new file mode 100644
index 000000000000..690aae8dfd5b
index 000000000..690aae8df
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-20.c
@@ -0,0 +1,3 @@
@ -402,5 +405,5 @@ index 000000000000..690aae8dfd5b
+
+#include "stack-check-prologue-19.c"
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From c4f0e121faa36342f1d21919e54a05ad841c4f86 Mon Sep 17 00:00:00 2001
From c12de24e57cbe26c224bab39698736fa4004f8ff Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:55 +0100
Subject: [PATCH] aarch64: Simplify probe of final frame allocation
Date: Tue, 12 Sep 2023 16:05:11 +0100
Subject: [PATCH 099/157] [Backport][SME] aarch64: Simplify probe of final
frame allocation
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=f87028a905059573ae7fdfe526d034fd70b3bcae
Previous patches ensured that the final frame allocation only needs
a probe when the size is strictly greater than 1KiB. It's therefore
@ -28,10 +31,10 @@ gcc/testsuite/
4 files changed, 9 insertions(+), 13 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 16433fb70f4f..8abf3d7a1e2b 100644
index 1ba4c2f89..6d835dc8f 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -9523,16 +9523,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -10133,16 +10133,12 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
are still safe. */
if (residual)
{
@ -50,7 +53,7 @@ index 16433fb70f4f..8abf3d7a1e2b 100644
aarch64_sub_sp (temp1, temp2, residual, frame_related_p);
if (residual >= min_probe_threshold)
@@ -9543,8 +9539,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
@@ -10153,8 +10149,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
HOST_WIDE_INT_PRINT_DEC " bytes, probing will be required."
"\n", residual);
@ -62,7 +65,7 @@ index 16433fb70f4f..8abf3d7a1e2b 100644
}
}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
index 0d8a25d73a24..f0ec1389771d 100644
index 0d8a25d73..f0ec13897 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-17.c
@@ -33,7 +33,7 @@ int test1(int z) {
@ -75,7 +78,7 @@ index 0d8a25d73a24..f0ec1389771d 100644
** bl g
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
index 82447d20fff5..6383bec5ebcd 100644
index 82447d20f..6383bec5e 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-18.c
@@ -9,7 +9,7 @@ void g();
@ -97,7 +100,7 @@ index 82447d20fff5..6383bec5ebcd 100644
** bl g
** ...
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
index 73ac3e4e4eb0..562039b5e9b8 100644
index 73ac3e4e4..562039b5e 100644
--- a/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
+++ b/gcc/testsuite/gcc.target/aarch64/stack-check-prologue-19.c
@@ -9,7 +9,7 @@ void g();
@ -119,5 +122,5 @@ index 73ac3e4e4eb0..562039b5e9b8 100644
** bl g
** ...
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 6f0ab0a9f46a17b68349ff6035aa776bf65f0575 Mon Sep 17 00:00:00 2001
From 1bf3e9a04411b483c89d2e2f9096ab66800c3b3f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:56 +0100
Subject: [PATCH] aarch64: Explicitly record probe registers in frame info
Date: Tue, 12 Sep 2023 16:05:12 +0100
Subject: [PATCH 100/157] [Backport][SME] aarch64: Explicitly record probe
registers in frame info
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=5ce957484eea15f09503fcffa4dfdfb70ad82f8f
The stack frame is currently divided into three areas:
@ -72,10 +75,10 @@ gcc/testsuite/
3 files changed, 64 insertions(+), 18 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8abf3d7a1e2b..a8d907df8843 100644
index 6d835dc8f..dd80ceba8 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8210,15 +8210,11 @@ aarch64_layout_frame (void)
@@ -8810,15 +8810,11 @@ aarch64_layout_frame (void)
&& !crtl->abi->clobbers_full_reg_p (regno))
frame.reg_offset[regno] = SLOT_REQUIRED;
@ -92,7 +95,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
/* Now assign stack slots for the registers. Start with the predicate
registers, since predicate LDR and STR have a relatively small
@@ -8226,6 +8222,8 @@ aarch64_layout_frame (void)
@@ -8826,6 +8822,8 @@ aarch64_layout_frame (void)
for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
@ -101,7 +104,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
frame.reg_offset[regno] = offset;
offset += BYTES_PER_SVE_PRED;
}
@@ -8263,6 +8261,8 @@ aarch64_layout_frame (void)
@@ -8863,6 +8861,8 @@ aarch64_layout_frame (void)
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
@ -110,7 +113,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
frame.reg_offset[regno] = offset;
offset += vector_save_size;
}
@@ -8272,10 +8272,18 @@ aarch64_layout_frame (void)
@@ -8872,10 +8872,18 @@ aarch64_layout_frame (void)
frame.below_hard_fp_saved_regs_size = offset - frame.bytes_below_saved_regs;
bool saves_below_hard_fp_p
= maybe_ne (frame.below_hard_fp_saved_regs_size, 0);
@ -129,7 +132,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
frame.reg_offset[regno] = offset;
if (frame.wb_push_candidate1 == INVALID_REGNUM)
frame.wb_push_candidate1 = regno;
@@ -8309,6 +8317,8 @@ aarch64_layout_frame (void)
@@ -8909,6 +8917,8 @@ aarch64_layout_frame (void)
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
@ -138,7 +141,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
/* If there is an alignment gap between integer and fp callee-saves,
allocate the last fp register to it if possible. */
if (regno == last_fp_reg
@@ -8332,6 +8342,17 @@ aarch64_layout_frame (void)
@@ -8932,6 +8942,17 @@ aarch64_layout_frame (void)
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
frame.saved_regs_size = offset - frame.bytes_below_saved_regs;
@ -156,7 +159,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
offset += get_frame_size ();
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@@ -8462,6 +8483,25 @@ aarch64_layout_frame (void)
@@ -9062,6 +9083,25 @@ aarch64_layout_frame (void)
frame.final_adjust = frame.bytes_below_saved_regs;
}
@ -182,7 +185,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
/* Make sure the individual adjustments add up to the full frame size. */
gcc_assert (known_eq (frame.initial_adjust
+ frame.callee_adjust
@@ -9039,13 +9079,6 @@ aarch64_get_separate_components (void)
@@ -9639,13 +9679,6 @@ aarch64_get_separate_components (void)
poly_int64 offset = frame.reg_offset[regno];
@ -196,7 +199,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
/* Get the offset relative to the register we'll use. */
if (frame_pointer_needed)
offset -= frame.bytes_below_hard_fp;
@@ -9080,6 +9113,13 @@ aarch64_get_separate_components (void)
@@ -9680,6 +9713,13 @@ aarch64_get_separate_components (void)
bitmap_clear_bit (components, LR_REGNUM);
bitmap_clear_bit (components, SP_REGNUM);
@ -210,7 +213,7 @@ index 8abf3d7a1e2b..a8d907df8843 100644
return components;
}
@@ -9616,8 +9656,8 @@ aarch64_epilogue_uses (int regno)
@@ -10226,8 +10266,8 @@ aarch64_epilogue_uses (int regno)
When probing is needed, we emit a probe at the start of the prologue
and every PARAM_STACK_CLASH_PROTECTION_GUARD_SIZE bytes thereafter.
@ -222,10 +225,10 @@ index 8abf3d7a1e2b..a8d907df8843 100644
For outgoing arguments we probe if the size is larger than 1KB, such that
the ABI specified buffer is maintained for the next callee.
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index b61358370732..46d4693e2064 100644
index f340237d0..af480d9e8 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -957,6 +957,14 @@ struct GTY (()) aarch64_frame
@@ -887,6 +887,14 @@ struct GTY (()) aarch64_frame
This is the register they should use. */
unsigned spare_pred_reg;
@ -241,7 +244,7 @@ index b61358370732..46d4693e2064 100644
/* True if shadow call stack should be enabled for the current function. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
index 3e01ec36c3a4..3530a0d504ba 100644
index 3e01ec36c..3530a0d50 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_3.c
@@ -11,11 +11,10 @@
@ -273,5 +276,5 @@ index 3e01ec36c3a4..3530a0d504ba 100644
** ldr x24, \[sp\], 32
** ret
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 8254e1b9cd500e0c278465a3657543477e9d1250 Mon Sep 17 00:00:00 2001
From 5c33afb2173f68a0166bd180977cd1e547df22dc Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:56 +0100
Subject: [PATCH] aarch64: Remove below_hard_fp_saved_regs_size
Date: Tue, 12 Sep 2023 16:05:12 +0100
Subject: [PATCH 101/157] [Backport][SME] aarch64: Remove
below_hard_fp_saved_regs_size
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=2abfc867d3ba025ac2146bb21b92a93e6325dec1
After previous patches, it's no longer necessary to store
saved_regs_size and below_hard_fp_saved_regs_size in the frame info.
@ -18,10 +21,10 @@ gcc/
2 files changed, 21 insertions(+), 31 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index a8d907df8843..ac3d3b336a37 100644
index dd80ceba8..0894ed325 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8269,9 +8269,8 @@ aarch64_layout_frame (void)
@@ -8869,9 +8869,8 @@ aarch64_layout_frame (void)
/* OFFSET is now the offset of the hard frame pointer from the bottom
of the callee save area. */
@ -33,7 +36,7 @@ index a8d907df8843..ac3d3b336a37 100644
gcc_assert (!saves_below_hard_fp_p
|| (frame.sve_save_and_probe != INVALID_REGNUM
&& known_eq (frame.reg_offset[frame.sve_save_and_probe],
@@ -8341,9 +8340,8 @@ aarch64_layout_frame (void)
@@ -8941,9 +8940,8 @@ aarch64_layout_frame (void)
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@ -45,7 +48,7 @@ index a8d907df8843..ac3d3b336a37 100644
|| (frame.hard_fp_save_and_probe != INVALID_REGNUM
&& known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
frame.bytes_below_hard_fp)));
@@ -8352,7 +8350,7 @@ aarch64_layout_frame (void)
@@ -8952,7 +8950,7 @@ aarch64_layout_frame (void)
The saving of the bottommost register counts as an implicit probe,
which allows us to maintain the invariant described in the comment
at expand_prologue. */
@ -54,7 +57,7 @@ index a8d907df8843..ac3d3b336a37 100644
offset += get_frame_size ();
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
@@ -8409,7 +8407,7 @@ aarch64_layout_frame (void)
@@ -9009,7 +9007,7 @@ aarch64_layout_frame (void)
HOST_WIDE_INT const_size, const_below_saved_regs, const_above_fp;
HOST_WIDE_INT const_saved_regs_size;
@ -63,7 +66,7 @@ index a8d907df8843..ac3d3b336a37 100644
frame.initial_adjust = frame.frame_size;
else if (frame.frame_size.is_constant (&const_size)
&& const_size < max_push_offset
@@ -8422,7 +8420,7 @@ aarch64_layout_frame (void)
@@ -9022,7 +9020,7 @@ aarch64_layout_frame (void)
frame.callee_adjust = const_size;
}
else if (frame.bytes_below_saved_regs.is_constant (&const_below_saved_regs)
@ -72,7 +75,7 @@ index a8d907df8843..ac3d3b336a37 100644
&& const_below_saved_regs + const_saved_regs_size < 512
/* We could handle this case even with data below the saved
registers, provided that that data left us with valid offsets
@@ -8441,8 +8439,7 @@ aarch64_layout_frame (void)
@@ -9041,8 +9039,7 @@ aarch64_layout_frame (void)
frame.initial_adjust = frame.frame_size;
}
else if (saves_below_hard_fp_p
@ -82,7 +85,7 @@ index a8d907df8843..ac3d3b336a37 100644
{
/* Frame in which all saves are SVE saves:
@@ -8464,7 +8461,7 @@ aarch64_layout_frame (void)
@@ -9064,7 +9061,7 @@ aarch64_layout_frame (void)
[save SVE registers relative to SP]
sub sp, sp, bytes_below_saved_regs */
frame.callee_adjust = const_above_fp;
@ -91,7 +94,7 @@ index a8d907df8843..ac3d3b336a37 100644
frame.final_adjust = frame.bytes_below_saved_regs;
}
else
@@ -8479,7 +8476,7 @@ aarch64_layout_frame (void)
@@ -9079,7 +9076,7 @@ aarch64_layout_frame (void)
[save SVE registers relative to SP]
sub sp, sp, bytes_below_saved_regs */
frame.initial_adjust = frame.bytes_above_hard_fp;
@ -100,7 +103,7 @@ index a8d907df8843..ac3d3b336a37 100644
frame.final_adjust = frame.bytes_below_saved_regs;
}
@@ -9621,17 +9618,17 @@ aarch64_epilogue_uses (int regno)
@@ -10231,17 +10228,17 @@ aarch64_epilogue_uses (int regno)
| local variables | <-- frame_pointer_rtx
| |
+-------------------------------+
@ -130,10 +133,10 @@ index a8d907df8843..ac3d3b336a37 100644
| dynamic allocation |
+-------------------------------+
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 46d4693e2064..01f7751bc783 100644
index af480d9e8..292ef2eec 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -871,18 +871,11 @@ struct GTY (()) aarch64_frame
@@ -801,18 +801,11 @@ struct GTY (()) aarch64_frame
STACK_BOUNDARY. */
HOST_WIDE_INT saved_varargs_size;
@ -153,5 +156,5 @@ index 46d4693e2064..01f7751bc783 100644
of the outgoing arguments) and the hard frame pointer. This value is
always a multiple of STACK_BOUNDARY. */
--
2.43.5
2.33.0

View File

@ -1,7 +1,10 @@
From 75c37e031408262263442f5b4cdb83d3777b6422 Mon Sep 17 00:00:00 2001
From b225443d64481bc225e29bf119d99b719c69cd3c Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 12 Sep 2023 16:08:57 +0100
Subject: [PATCH] aarch64: Make stack smash canary protect saved registers
Date: Tue, 12 Sep 2023 16:05:13 +0100
Subject: [PATCH 102/157] [Backport][SME] aarch64: Make stack smash canary
protect saved registers
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3e4afea3b192c205c9a9da99f4cac65c68087eaf
AArch64 normally puts the saved registers near the bottom of the frame,
immediately above any dynamic allocations. But this means that a
@ -38,10 +41,10 @@ gcc/testsuite/
create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ac3d3b336a37..96c3f48fdc49 100644
index 0894ed325..8d4dd2891 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8133,6 +8133,20 @@ aarch64_needs_frame_chain (void)
@@ -8733,6 +8733,20 @@ aarch64_needs_frame_chain (void)
return aarch64_use_frame_pointer;
}
@ -62,7 +65,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
/* Mark the registers that need to be saved by the callee and calculate
the size of the callee-saved registers area and frame record (both FP
and LR may be omitted). */
@@ -8144,6 +8158,7 @@ aarch64_layout_frame (void)
@@ -8744,6 +8758,7 @@ aarch64_layout_frame (void)
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
bool frame_related_fp_reg_p = false;
aarch64_frame &frame = cfun->machine->frame;
@ -70,7 +73,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
frame.emit_frame_chain = aarch64_needs_frame_chain ();
@@ -8210,9 +8225,16 @@ aarch64_layout_frame (void)
@@ -8810,9 +8825,16 @@ aarch64_layout_frame (void)
&& !crtl->abi->clobbers_full_reg_p (regno))
frame.reg_offset[regno] = SLOT_REQUIRED;
@ -87,7 +90,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
frame.bytes_below_saved_regs = offset;
frame.sve_save_and_probe = INVALID_REGNUM;
@@ -8352,15 +8374,18 @@ aarch64_layout_frame (void)
@@ -8952,15 +8974,18 @@ aarch64_layout_frame (void)
at expand_prologue. */
gcc_assert (crtl->is_leaf || maybe_ne (saved_regs_size, 0));
@ -110,7 +113,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
frame.bytes_above_locals = frame.frame_size - top_of_locals;
frame.initial_adjust = 0;
@@ -9615,10 +9640,10 @@ aarch64_epilogue_uses (int regno)
@@ -10225,10 +10250,10 @@ aarch64_epilogue_uses (int regno)
| for register varargs |
| |
+-------------------------------+
@ -123,7 +126,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
+-------------------------------+
| callee-saved registers |
+-------------------------------+
@@ -9630,6 +9655,10 @@ aarch64_epilogue_uses (int regno)
@@ -10240,6 +10265,10 @@ aarch64_epilogue_uses (int regno)
+-------------------------------+
| SVE predicate registers |
+-------------------------------+
@ -134,7 +137,7 @@ index ac3d3b336a37..96c3f48fdc49 100644
| dynamic allocation |
+-------------------------------+
| padding |
@@ -9639,6 +9668,9 @@ aarch64_epilogue_uses (int regno)
@@ -10249,6 +10278,9 @@ aarch64_epilogue_uses (int regno)
+-------------------------------+
| | <-- stack_pointer_rtx (aligned)
@ -144,18 +147,18 @@ index ac3d3b336a37..96c3f48fdc49 100644
Dynamic stack allocations via alloca() decrease stack_pointer_rtx
but leave frame_pointer_rtx and hard_frame_pointer_rtx
unchanged.
@@ -9834,6 +9866,8 @@ aarch64_expand_prologue (void)
@@ -10444,6 +10476,8 @@ aarch64_expand_prologue (void)
gcc_assert (known_eq (bytes_below_sp, final_adjust));
aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
!frame_pointer_needed, true);
+ if (emit_frame_chain && maybe_ne (final_adjust, 0))
+ emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+ aarch64_emit_stack_tie (hard_frame_pointer_rtx);
}
/* Return TRUE if we can use a simple_return insn.
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
new file mode 100644
index 000000000000..e71d820e3654
index 000000000..e71d820e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-8.c
@@ -0,0 +1,95 @@
@ -256,7 +259,7 @@ index 000000000000..e71d820e3654
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
new file mode 100644
index 000000000000..58f322aa480a
index 000000000..58f322aa4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-9.c
@@ -0,0 +1,33 @@
@ -294,5 +297,5 @@ index 000000000000..58f322aa480a
+ return 0;
+}
--
2.43.5
2.33.0

View File

@ -0,0 +1,201 @@
From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 17 Oct 2023 23:45:43 +0100
Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e
The prologue/epilogue pass allows the prologue sequence to contain
jumps. The sequence is then partitioned into basic blocks using
find_many_sub_basic_blocks.
This patch treats epilogues in a similar way. Since only one block
might need to be split, the patch (re)introduces a find_sub_basic_blocks
routine to handle a single block.
The new routine hard-codes the assumption that split_block will chain
the new block immediately after the original block. The routine doesn't
try to replicate the fix for PR81030, since that was specific to
gimple->rtl expansion.
The patch is needed for follow-on aarch64 patches that add conditional
code to the epilogue. The tests are part of those patches.
gcc/
* cfgbuild.h (find_sub_basic_blocks): Declare.
* cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
split out from...
(find_many_sub_basic_blocks): ...here.
(find_sub_basic_blocks): New function.
* function.cc (thread_prologue_and_epilogue_insns): Handle
epilogues that contain jumps.
---
gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
gcc/cfgbuild.h | 1 +
gcc/function.cc | 4 +++
3 files changed, 70 insertions(+), 30 deletions(-)
diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
index 646a06614..58b865f29 100644
--- a/gcc/cfgbuild.cc
+++ b/gcc/cfgbuild.cc
@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
}
}
+/* Update the profile information for BB, which was created by splitting
+ an RTL block that had a non-final jump. */
+
+static void
+update_profile_for_new_sub_basic_block (basic_block bb)
+{
+ edge e;
+ edge_iterator ei;
+
+ bool initialized_src = false, uninitialized_src = false;
+ bb->count = profile_count::zero ();
+ FOR_EACH_EDGE (e, ei, bb->preds)
+ {
+ if (e->count ().initialized_p ())
+ {
+ bb->count += e->count ();
+ initialized_src = true;
+ }
+ else
+ uninitialized_src = true;
+ }
+ /* When some edges are missing with read profile, this is
+ most likely because RTL expansion introduced loop.
+ When profile is guessed we may have BB that is reachable
+ from unlikely path as well as from normal path.
+
+ TODO: We should handle loops created during BB expansion
+ correctly here. For now we assume all those loop to cycle
+ precisely once. */
+ if (!initialized_src
+ || (uninitialized_src
+ && profile_status_for_fn (cfun) < PROFILE_GUESSED))
+ bb->count = profile_count::uninitialized ();
+
+ compute_outgoing_frequencies (bb);
+}
+
/* Assume that some pass has inserted labels or control flow
instructions within a basic block. Split basic blocks as needed
and create edges. */
@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
{
- edge e;
- edge_iterator ei;
-
if (STATE (bb) == BLOCK_ORIGINAL)
continue;
if (STATE (bb) == BLOCK_NEW)
{
- bool initialized_src = false, uninitialized_src = false;
- bb->count = profile_count::zero ();
- FOR_EACH_EDGE (e, ei, bb->preds)
- {
- if (e->count ().initialized_p ())
- {
- bb->count += e->count ();
- initialized_src = true;
- }
- else
- uninitialized_src = true;
- }
- /* When some edges are missing with read profile, this is
- most likely because RTL expansion introduced loop.
- When profile is guessed we may have BB that is reachable
- from unlikely path as well as from normal path.
-
- TODO: We should handle loops created during BB expansion
- correctly here. For now we assume all those loop to cycle
- precisely once. */
- if (!initialized_src
- || (uninitialized_src
- && profile_status_for_fn (cfun) < PROFILE_GUESSED))
- bb->count = profile_count::uninitialized ();
+ update_profile_for_new_sub_basic_block (bb);
+ continue;
}
- /* If nothing changed, there is no need to create new BBs. */
- else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
+ /* If nothing changed, there is no need to create new BBs. */
+ if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
{
/* In rare occassions RTL expansion might have mistakely assigned
a probabilities different from what is in CFG. This happens
@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
update_br_prob_note (bb);
continue;
}
-
compute_outgoing_frequencies (bb);
}
FOR_EACH_BB_FN (bb, cfun)
SET_STATE (bb, 0);
}
+
+/* Like find_many_sub_basic_blocks, but look only within BB. */
+
+void
+find_sub_basic_blocks (basic_block bb)
+{
+ basic_block end_bb = bb->next_bb;
+ find_bb_boundaries (bb);
+ if (bb->next_bb == end_bb)
+ return;
+
+ /* Re-scan and wire in all edges. This expects simple (conditional)
+ jumps at the end of each new basic blocks. */
+ make_edges (bb, end_bb->prev_bb, 1);
+
+ /* Update branch probabilities. Expect only (un)conditional jumps
+ to be created with only the forward edges. */
+ if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
+ {
+ compute_outgoing_frequencies (bb);
+ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
+ update_profile_for_new_sub_basic_block (bb);
+ }
+}
diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
index 85145da7f..53543bb75 100644
--- a/gcc/cfgbuild.h
+++ b/gcc/cfgbuild.h
@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
extern bool control_flow_insn_p (const rtx_insn *);
extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
extern void find_many_sub_basic_blocks (sbitmap);
+extern void find_sub_basic_blocks (basic_block);
#endif /* GCC_CFGBUILD_H */
diff --git a/gcc/function.cc b/gcc/function.cc
index ddab43ca4..f4fc211a0 100644
--- a/gcc/function.cc
+++ b/gcc/function.cc
@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
&& returnjump_p (BB_END (e->src)))
e->flags &= ~EDGE_FALLTHRU;
}
+
+ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
}
else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
{
@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
set_insn_locations (seq, epilogue_location);
emit_insn_before (seq, insn);
+
+ find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
}
}
--
2.33.0

View File

@ -0,0 +1,709 @@
From 554c83414c10909c39e0ad30026ffa4821dd9698 Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 17 Oct 2023 23:46:33 +0100
Subject: [PATCH 104/157] [Backport][SME] aarch64: Use vecs to store register
save order
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=575858508090b18dcbc176db285c9f55227ca4c0
aarch64_save/restore_callee_saves looped over registers in register
number order. This in turn meant that we could only use LDP and STP
for registers that were consecutive both number-wise and
offset-wise (after unsaved registers are excluded).
This patch instead builds lists of the registers that we've decided to
save, in offset order. We can then form LDP/STP pairs regardless of
register number order, which in turn means that we can put the LR save
slot first without losing LDP/STP opportunities.
gcc/
* config/aarch64/aarch64.h (aarch64_frame): Add vectors that
store the list saved GPRs, FPRs and predicate registers.
* config/aarch64/aarch64.cc (aarch64_layout_frame): Initialize
the lists of saved registers. Use them to choose push candidates.
Invalidate pop candidates if we're not going to do a pop.
(aarch64_next_callee_save): Delete.
(aarch64_save_callee_saves): Take a list of registers,
rather than a range. Make !skip_wb select only write-back
candidates.
(aarch64_expand_prologue): Update calls accordingly.
(aarch64_restore_callee_saves): Take a list of registers,
rather than a range. Always skip pop candidates. Also skip
LR if shadow call stacks are enabled.
(aarch64_expand_epilogue): Update calls accordingly.
gcc/testsuite/
* gcc.target/aarch64/sve/pcs/stack_clash_2.c: Expect restores
to happen in offset order.
* gcc.target/aarch64/sve/pcs/stack_clash_2_128.c: Likewise.
* gcc.target/aarch64/sve/pcs/stack_clash_2_256.c: Likewise.
* gcc.target/aarch64/sve/pcs/stack_clash_2_512.c: Likewise.
* gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c: Likewise.
* gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c: Likewise.
---
gcc/config/aarch64/aarch64.cc | 203 +++++++++---------
gcc/config/aarch64/aarch64.h | 9 +-
.../aarch64/sve/pcs/stack_clash_2.c | 6 +-
.../aarch64/sve/pcs/stack_clash_2_1024.c | 6 +-
.../aarch64/sve/pcs/stack_clash_2_128.c | 6 +-
.../aarch64/sve/pcs/stack_clash_2_2048.c | 6 +-
.../aarch64/sve/pcs/stack_clash_2_256.c | 6 +-
.../aarch64/sve/pcs/stack_clash_2_512.c | 6 +-
8 files changed, 128 insertions(+), 120 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8d4dd2891..e10c9d763 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8753,13 +8753,17 @@ aarch64_save_regs_above_locals_p ()
static void
aarch64_layout_frame (void)
{
- int regno, last_fp_reg = INVALID_REGNUM;
+ unsigned regno, last_fp_reg = INVALID_REGNUM;
machine_mode vector_save_mode = aarch64_reg_save_mode (V8_REGNUM);
poly_int64 vector_save_size = GET_MODE_SIZE (vector_save_mode);
bool frame_related_fp_reg_p = false;
aarch64_frame &frame = cfun->machine->frame;
poly_int64 top_of_locals = -1;
+ vec_safe_truncate (frame.saved_gprs, 0);
+ vec_safe_truncate (frame.saved_fprs, 0);
+ vec_safe_truncate (frame.saved_prs, 0);
+
frame.emit_frame_chain = aarch64_needs_frame_chain ();
/* Adjust the outgoing arguments size if required. Keep it in sync with what
@@ -8844,6 +8848,7 @@ aarch64_layout_frame (void)
for (regno = P0_REGNUM; regno <= P15_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
+ vec_safe_push (frame.saved_prs, regno);
if (frame.sve_save_and_probe == INVALID_REGNUM)
frame.sve_save_and_probe = regno;
frame.reg_offset[regno] = offset;
@@ -8865,7 +8870,7 @@ aarch64_layout_frame (void)
If we don't have any vector registers to save, and we know how
big the predicate save area is, we can just round it up to the
next 16-byte boundary. */
- if (last_fp_reg == (int) INVALID_REGNUM && offset.is_constant ())
+ if (last_fp_reg == INVALID_REGNUM && offset.is_constant ())
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
else
{
@@ -8879,10 +8884,11 @@ aarch64_layout_frame (void)
}
/* If we need to save any SVE vector registers, add them next. */
- if (last_fp_reg != (int) INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
+ if (last_fp_reg != INVALID_REGNUM && crtl->abi->id () == ARM_PCS_SVE)
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
+ vec_safe_push (frame.saved_fprs, regno);
if (frame.sve_save_and_probe == INVALID_REGNUM)
frame.sve_save_and_probe = regno;
frame.reg_offset[regno] = offset;
@@ -8903,13 +8909,8 @@ aarch64_layout_frame (void)
auto allocate_gpr_slot = [&](unsigned int regno)
{
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
- frame.hard_fp_save_and_probe = regno;
+ vec_safe_push (frame.saved_gprs, regno);
frame.reg_offset[regno] = offset;
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
- frame.wb_push_candidate1 = regno;
- else if (frame.wb_push_candidate2 == INVALID_REGNUM)
- frame.wb_push_candidate2 = regno;
offset += UNITS_PER_WORD;
};
@@ -8938,8 +8939,7 @@ aarch64_layout_frame (void)
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (known_eq (frame.reg_offset[regno], SLOT_REQUIRED))
{
- if (frame.hard_fp_save_and_probe == INVALID_REGNUM)
- frame.hard_fp_save_and_probe = regno;
+ vec_safe_push (frame.saved_fprs, regno);
/* If there is an alignment gap between integer and fp callee-saves,
allocate the last fp register to it if possible. */
if (regno == last_fp_reg
@@ -8952,21 +8952,25 @@ aarch64_layout_frame (void)
}
frame.reg_offset[regno] = offset;
- if (frame.wb_push_candidate1 == INVALID_REGNUM)
- frame.wb_push_candidate1 = regno;
- else if (frame.wb_push_candidate2 == INVALID_REGNUM
- && frame.wb_push_candidate1 >= V0_REGNUM)
- frame.wb_push_candidate2 = regno;
offset += vector_save_size;
}
offset = aligned_upper_bound (offset, STACK_BOUNDARY / BITS_PER_UNIT);
-
auto saved_regs_size = offset - frame.bytes_below_saved_regs;
- gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size)
- || (frame.hard_fp_save_and_probe != INVALID_REGNUM
- && known_eq (frame.reg_offset[frame.hard_fp_save_and_probe],
- frame.bytes_below_hard_fp)));
+
+ array_slice<unsigned int> push_regs = (!vec_safe_is_empty (frame.saved_gprs)
+ ? frame.saved_gprs
+ : frame.saved_fprs);
+ if (!push_regs.empty ()
+ && known_eq (frame.reg_offset[push_regs[0]], frame.bytes_below_hard_fp))
+ {
+ frame.hard_fp_save_and_probe = push_regs[0];
+ frame.wb_push_candidate1 = push_regs[0];
+ if (push_regs.size () > 1)
+ frame.wb_push_candidate2 = push_regs[1];
+ }
+ else
+ gcc_assert (known_eq (saved_regs_size, below_hard_fp_saved_regs_size));
/* With stack-clash, a register must be saved in non-leaf functions.
The saving of the bottommost register counts as an implicit probe,
@@ -9130,12 +9134,14 @@ aarch64_layout_frame (void)
+ frame.sve_callee_adjust
+ frame.final_adjust, frame.frame_size));
- if (!frame.emit_frame_chain && frame.callee_adjust == 0)
+ if (frame.callee_adjust == 0)
{
- /* We've decided not to associate any register saves with the initial
- stack allocation. */
- frame.wb_pop_candidate1 = frame.wb_push_candidate1 = INVALID_REGNUM;
- frame.wb_pop_candidate2 = frame.wb_push_candidate2 = INVALID_REGNUM;
+ /* We've decided not to do a "real" push and pop. However,
+ setting up the frame chain is treated as being essentially
+ a multi-instruction push. */
+ frame.wb_pop_candidate1 = frame.wb_pop_candidate2 = INVALID_REGNUM;
+ if (!frame.emit_frame_chain)
+ frame.wb_push_candidate1 = frame.wb_push_candidate2 = INVALID_REGNUM;
}
frame.laid_out = true;
@@ -9150,17 +9156,6 @@ aarch64_register_saved_on_entry (int regno)
return known_ge (cfun->machine->frame.reg_offset[regno], 0);
}
-/* Return the next register up from REGNO up to LIMIT for the callee
- to save. */
-
-static unsigned
-aarch64_next_callee_save (unsigned regno, unsigned limit)
-{
- while (regno <= limit && !aarch64_register_saved_on_entry (regno))
- regno ++;
- return regno;
-}
-
/* Push the register number REGNO of mode MODE to the stack with write-back
adjusting the stack by ADJUSTMENT. */
@@ -9424,41 +9419,46 @@ aarch64_add_cfa_expression (rtx_insn *insn, rtx reg,
add_reg_note (insn, REG_CFA_EXPRESSION, gen_rtx_SET (mem, reg));
}
-/* Emit code to save the callee-saved registers from register number START
- to LIMIT to the stack. The stack pointer is currently BYTES_BELOW_SP
- bytes above the bottom of the static frame. Skip any write-back
- candidates if SKIP_WB is true. HARD_FP_VALID_P is true if the hard
- frame pointer has been set up. */
+/* Emit code to save the callee-saved registers in REGS. Skip any
+ write-back candidates if SKIP_WB is true, otherwise consider only
+ write-back candidates.
+
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
+ of the static frame. HARD_FP_VALID_P is true if the hard frame pointer
+ has been set up. */
static void
aarch64_save_callee_saves (poly_int64 bytes_below_sp,
- unsigned start, unsigned limit, bool skip_wb,
+ array_slice<unsigned int> regs, bool skip_wb,
bool hard_fp_valid_p)
{
aarch64_frame &frame = cfun->machine->frame;
rtx_insn *insn;
- unsigned regno;
- unsigned regno2;
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
- for (regno = aarch64_next_callee_save (start, limit);
- regno <= limit;
- regno = aarch64_next_callee_save (regno + 1, limit))
+ auto skip_save_p = [&](unsigned int regno)
+ {
+ if (cfun->machine->reg_is_wrapped_separately[regno])
+ return true;
+
+ if (skip_wb == (regno == frame.wb_push_candidate1
+ || regno == frame.wb_push_candidate2))
+ return true;
+
+ return false;
+ };
+
+ for (unsigned int i = 0; i < regs.size (); ++i)
{
- rtx reg, mem;
+ unsigned int regno = regs[i];
poly_int64 offset;
bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
- if (skip_wb
- && (regno == frame.wb_push_candidate1
- || regno == frame.wb_push_candidate2))
- continue;
-
- if (cfun->machine->reg_is_wrapped_separately[regno])
+ if (skip_save_p (regno))
continue;
machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
+ rtx reg = gen_rtx_REG (mode, regno);
offset = frame.reg_offset[regno] - bytes_below_sp;
rtx base_rtx = stack_pointer_rtx;
poly_int64 sp_offset = offset;
@@ -9485,12 +9485,13 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
}
offset -= fp_offset;
}
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
bool need_cfa_note_p = (base_rtx != stack_pointer_rtx);
+ unsigned int regno2;
if (!aarch64_sve_mode_p (mode)
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- && !cfun->machine->reg_is_wrapped_separately[regno2]
+ && i + 1 < regs.size ()
+ && (regno2 = regs[i + 1], !skip_save_p (regno2))
&& known_eq (GET_MODE_SIZE (mode),
frame.reg_offset[regno2] - frame.reg_offset[regno]))
{
@@ -9516,6 +9517,7 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
}
regno = regno2;
+ ++i;
}
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
{
@@ -9533,49 +9535,57 @@ aarch64_save_callee_saves (poly_int64 bytes_below_sp,
}
}
-/* Emit code to restore the callee registers from register number START
- up to and including LIMIT. The stack pointer is currently BYTES_BELOW_SP
- bytes above the bottom of the static frame. Skip any write-back
- candidates if SKIP_WB is true. Write the appropriate REG_CFA_RESTORE
- notes into CFI_OPS. */
+/* Emit code to restore the callee registers in REGS, ignoring pop candidates
+ and any other registers that are handled separately. Write the appropriate
+ REG_CFA_RESTORE notes into CFI_OPS.
+
+ The stack pointer is currently BYTES_BELOW_SP bytes above the bottom
+ of the static frame. */
static void
-aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
- unsigned limit, bool skip_wb, rtx *cfi_ops)
+aarch64_restore_callee_saves (poly_int64 bytes_below_sp,
+ array_slice<unsigned int> regs, rtx *cfi_ops)
{
aarch64_frame &frame = cfun->machine->frame;
- unsigned regno;
- unsigned regno2;
poly_int64 offset;
rtx anchor_reg = NULL_RTX, ptrue = NULL_RTX;
- for (regno = aarch64_next_callee_save (start, limit);
- regno <= limit;
- regno = aarch64_next_callee_save (regno + 1, limit))
+ auto skip_restore_p = [&](unsigned int regno)
{
- bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
if (cfun->machine->reg_is_wrapped_separately[regno])
- continue;
+ return true;
+
+ if (regno == frame.wb_pop_candidate1
+ || regno == frame.wb_pop_candidate2)
+ return true;
- rtx reg, mem;
+ /* The shadow call stack code restores LR separately. */
+ if (frame.is_scs_enabled && regno == LR_REGNUM)
+ return true;
- if (skip_wb
- && (regno == frame.wb_pop_candidate1
- || regno == frame.wb_pop_candidate2))
+ return false;
+ };
+
+ for (unsigned int i = 0; i < regs.size (); ++i)
+ {
+ unsigned int regno = regs[i];
+ bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
+ if (skip_restore_p (regno))
continue;
machine_mode mode = aarch64_reg_save_mode (regno);
- reg = gen_rtx_REG (mode, regno);
+ rtx reg = gen_rtx_REG (mode, regno);
offset = frame.reg_offset[regno] - bytes_below_sp;
rtx base_rtx = stack_pointer_rtx;
if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
aarch64_adjust_sve_callee_save_base (mode, base_rtx, anchor_reg,
offset, ptrue);
- mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
+ rtx mem = gen_frame_mem (mode, plus_constant (Pmode, base_rtx, offset));
+ unsigned int regno2;
if (!aarch64_sve_mode_p (mode)
- && (regno2 = aarch64_next_callee_save (regno + 1, limit)) <= limit
- && !cfun->machine->reg_is_wrapped_separately[regno2]
+ && i + 1 < regs.size ()
+ && (regno2 = regs[i + 1], !skip_restore_p (regno2))
&& known_eq (GET_MODE_SIZE (mode),
frame.reg_offset[regno2] - frame.reg_offset[regno]))
{
@@ -9588,6 +9598,7 @@ aarch64_restore_callee_saves (poly_int64 bytes_below_sp, unsigned start,
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
regno = regno2;
+ ++i;
}
else if (mode == VNx2DImode && BYTES_BIG_ENDIAN)
emit_insn (gen_aarch64_pred_mov (mode, reg, ptrue, mem));
@@ -10409,13 +10420,10 @@ aarch64_expand_prologue (void)
- frame.bytes_above_hard_fp);
gcc_assert (known_ge (chain_offset, 0));
+ gcc_assert (reg1 == R29_REGNUM && reg2 == R30_REGNUM);
if (callee_adjust == 0)
- {
- reg1 = R29_REGNUM;
- reg2 = R30_REGNUM;
- aarch64_save_callee_saves (bytes_below_sp, reg1, reg2,
- false, false);
- }
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs,
+ false, false);
else
gcc_assert (known_eq (chain_offset, 0));
aarch64_add_offset (Pmode, hard_frame_pointer_rtx,
@@ -10453,8 +10461,7 @@ aarch64_expand_prologue (void)
aarch64_emit_stack_tie (hard_frame_pointer_rtx);
}
- aarch64_save_callee_saves (bytes_below_sp, R0_REGNUM, R30_REGNUM,
- callee_adjust != 0 || emit_frame_chain,
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_gprs, true,
emit_frame_chain);
if (maybe_ne (sve_callee_adjust, 0))
{
@@ -10465,10 +10472,9 @@ aarch64_expand_prologue (void)
!frame_pointer_needed, false);
bytes_below_sp -= sve_callee_adjust;
}
- aarch64_save_callee_saves (bytes_below_sp, P0_REGNUM, P15_REGNUM,
- false, emit_frame_chain);
- aarch64_save_callee_saves (bytes_below_sp, V0_REGNUM, V31_REGNUM,
- callee_adjust != 0 || emit_frame_chain,
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_prs, true,
+ emit_frame_chain);
+ aarch64_save_callee_saves (bytes_below_sp, frame.saved_fprs, true,
emit_frame_chain);
/* We may need to probe the final adjustment if it is larger than the guard
@@ -10514,8 +10520,6 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
poly_int64 bytes_below_hard_fp = frame.bytes_below_hard_fp;
unsigned reg1 = frame.wb_pop_candidate1;
unsigned reg2 = frame.wb_pop_candidate2;
- unsigned int last_gpr = (frame.is_scs_enabled
- ? R29_REGNUM : R30_REGNUM);
rtx cfi_ops = NULL;
rtx_insn *insn;
/* A stack clash protection prologue may not have left EP0_REGNUM or
@@ -10579,10 +10583,8 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
/* Restore the vector registers before the predicate registers,
so that we can use P4 as a temporary for big-endian SVE frames. */
- aarch64_restore_callee_saves (final_adjust, V0_REGNUM, V31_REGNUM,
- callee_adjust != 0, &cfi_ops);
- aarch64_restore_callee_saves (final_adjust, P0_REGNUM, P15_REGNUM,
- false, &cfi_ops);
+ aarch64_restore_callee_saves (final_adjust, frame.saved_fprs, &cfi_ops);
+ aarch64_restore_callee_saves (final_adjust, frame.saved_prs, &cfi_ops);
if (maybe_ne (sve_callee_adjust, 0))
aarch64_add_sp (NULL_RTX, NULL_RTX, sve_callee_adjust, true);
@@ -10590,8 +10592,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
restore x30, we don't need to restore x30 again in the traditional
way. */
aarch64_restore_callee_saves (final_adjust + sve_callee_adjust,
- R0_REGNUM, last_gpr,
- callee_adjust != 0, &cfi_ops);
+ frame.saved_gprs, &cfi_ops);
if (need_barrier_p)
aarch64_emit_stack_tie (stack_pointer_rtx);
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 292ef2eec..1591cde8b 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -787,7 +787,7 @@ extern enum aarch64_processor aarch64_tune;
#define DEFAULT_PCC_STRUCT_RETURN 0
-#ifdef HAVE_POLY_INT_H
+#if defined(HAVE_POLY_INT_H) && defined(GCC_VEC_H)
struct GTY (()) aarch64_frame
{
/* The offset from the bottom of the static frame (the bottom of the
@@ -795,6 +795,13 @@ struct GTY (()) aarch64_frame
needed. */
poly_int64 reg_offset[LAST_SAVED_REGNUM + 1];
+ /* The list of GPRs, FPRs and predicate registers that have nonnegative
+ entries in reg_offset. The registers are listed in order of
+ increasing offset (rather than increasing register number). */
+ vec<unsigned, va_gc_atomic> *saved_gprs;
+ vec<unsigned, va_gc_atomic> *saved_fprs;
+ vec<unsigned, va_gc_atomic> *saved_prs;
+
/* The number of extra stack bytes taken up by register varargs.
This area is allocated by the callee at the very top of the
frame. This value is rounded up to a multiple of
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
index 4622a1eed..bbb45d266 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2.c
@@ -215,9 +215,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr p4, \[sp\]
** addvl sp, sp, #1
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -283,9 +283,9 @@ test_9 (int n)
** addvl sp, x29, #-1
** ldr p4, \[sp\]
** addvl sp, sp, #1
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -319,9 +319,9 @@ test_10 (int n)
** addvl sp, x29, #-1
** ldr p4, \[sp\]
** addvl sp, sp, #1
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
index e31200fc2..9437c7a85 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_1024.c
@@ -176,9 +176,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr z16, \[sp\]
** add sp, sp, #?128
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -234,9 +234,9 @@ test_9 (int n)
** sub sp, x29, #128
** ldr z16, \[sp\]
** add sp, sp, #?128
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -268,9 +268,9 @@ test_10 (int n)
** sub sp, x29, #128
** ldr z16, \[sp\]
** add sp, sp, #?128
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
index 41193b411..b4e1627fa 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_128.c
@@ -176,9 +176,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr p4, \[sp\]
** add sp, sp, #?16
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -234,9 +234,9 @@ test_9 (int n)
** sub sp, x29, #16
** ldr p4, \[sp\]
** add sp, sp, #?16
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -267,9 +267,9 @@ test_10 (int n)
** sub sp, x29, #16
** ldr p4, \[sp\]
** add sp, sp, #?16
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
index f63751678..921209379 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_2048.c
@@ -176,9 +176,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr z16, \[sp\]
** add sp, sp, #?256
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -234,9 +234,9 @@ test_9 (int n)
** sub sp, x29, #256
** ldr z16, \[sp\]
** add sp, sp, #?256
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -268,9 +268,9 @@ test_10 (int n)
** sub sp, x29, #256
** ldr z16, \[sp\]
** add sp, sp, #?256
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
index 6bcbb5772..bd8bef0f0 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_256.c
@@ -176,9 +176,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr z16, \[sp\]
** add sp, sp, #?32
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -234,9 +234,9 @@ test_9 (int n)
** sub sp, x29, #32
** ldr z16, \[sp\]
** add sp, sp, #?32
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -267,9 +267,9 @@ test_10 (int n)
** sub sp, x29, #32
** ldr z16, \[sp\]
** add sp, sp, #?32
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
index dc7df8e6b..2c76ccecd 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pcs/stack_clash_2_512.c
@@ -176,9 +176,9 @@ test_7 (void)
** add sp, sp, #?16
** ldr z16, \[sp\]
** add sp, sp, #?64
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -234,9 +234,9 @@ test_9 (int n)
** sub sp, x29, #64
** ldr z16, \[sp\]
** add sp, sp, #?64
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** mov x12, #?4144
** add sp, sp, x12
** ret
@@ -268,9 +268,9 @@ test_10 (int n)
** sub sp, x29, #64
** ldr z16, \[sp\]
** add sp, sp, #?64
+** ldp x29, x30, \[sp\]
** ldp x24, x25, \[sp, 16\]
** ldr x26, \[sp, 32\]
-** ldp x29, x30, \[sp\]
** add sp, sp, #?3008
** add sp, sp, #?126976
** ret
--
2.33.0

Some files were not shown because too many files have changed in this diff Show More