!504 [Sync] Sync patches from branch openEuler-24.09

From: @huangzifeng6 
Reviewed-by: @li-yancheng 
Signed-off-by: @li-yancheng
This commit is contained in:
openeuler-ci-bot 2024-11-21 07:42:21 +00:00 committed by Gitee
commit c71db90706
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
207 changed files with 12828 additions and 182 deletions

View File

@ -0,0 +1,101 @@
From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
Date: Thu, 25 Jul 2024 19:45:43 +0800
Subject: [PATCH] Improve non-loop disambiguation
This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa.
When dr_may_alias_p is called without a loop context, it tries
to use the tree-affine interface to calculate the difference
between the two addresses and use that difference to check whether
the gap between the accesses is known at compile time. However, as the
example in the PR shows, this doesn't expand SSA_NAMEs and so can easily
be defeated by things like reassociation.
One fix would have been to use aff_combination_expand to expand the
SSA_NAMEs, but we'd then need some way of maintaining the associated
cache. This patch instead reuses the innermost_loop_behavior fields
(which exist even when no loop context is provided).
It might still be useful to do the aff_combination_expand thing too,
if an example turns out to need it.
---
gcc/common.opt | 4 ++++
gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++
gcc/tree-data-ref.cc | 22 +++++++++++++++++++++
3 files changed, 42 insertions(+)
create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
diff --git a/gcc/common.opt b/gcc/common.opt
index b18f0b944..75bf9c9c1 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3217,6 +3217,10 @@ ftree-loop-vectorize
Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize)
Enable loop vectorization on trees.
+falias-analysis-expand-ssa
+Common Var(flag_alias_analysis_expand_ssa) Init(0)
+Enable expanded SSA name analysis during alias analysis.
+
ftree-slp-vectorize
Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
Enable basic block vectorization (SLP) on trees.
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
new file mode 100644
index 000000000..5ff8a8a62
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-falias-analysis-expand-ssa" } */
+
+void f(double *p, long i)
+{
+ p[i+0] += 1;
+ p[i+1] += 1;
+}
+void g(double *p, long i)
+{
+ double *q = p + i;
+ q[0] += 1;
+ q[1] += 1;
+}
+
+/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
index e6ae9e847..a05073c51 100644
--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
disambiguation. */
if (!loop_nest)
{
+ if (flag_alias_analysis_expand_ssa)
+ {
+ tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
+ tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
+
+ if (DR_BASE_ADDRESS (a)
+ && DR_BASE_ADDRESS (b)
+ && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
+ && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
+ && poly_int_tree_p (tree_size_a)
+ && poly_int_tree_p (tree_size_b)
+ && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
+ wi::to_widest (tree_size_a),
+ wi::to_widest (DR_INIT (b)),
+ wi::to_widest (tree_size_b)))
+ {
+ gcc_assert (integer_zerop (DR_STEP (a))
+ && integer_zerop (DR_STEP (b)));
+ return false;
+ }
+ }
+
aff_tree off1, off2;
poly_widest_int size1, size2;
get_inner_reference_aff (DR_REF (a), &off1, &size1);
--
2.33.0

View File

@ -0,0 +1,265 @@
From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
Date: Thu, 25 Jul 2024 20:16:52 +0800
Subject: [PATCH] CHREC multiplication and undefined overflow
This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html
When folding a multiply CHRECs are handled like {a, +, b} * c
is {a*c, +, b*c} but that isn't generally correct when overflow
invokes undefined behavior. The following uses unsigned arithmetic
unless either a is zero or a and b have the same sign.
I've used simple early outs for INTEGER_CSTs and otherwise use
a range-query since we lack a tree_expr_nonpositive_p and
get_range_pos_neg isn't a good fit.
---
gcc/common.opt | 4 ++
gcc/testsuite/gcc.dg/pr68317.c | 6 +-
gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++
gcc/tree-chrec.cc | 81 +++++++++++++++++++++----
gcc/tree-chrec.h | 2 +-
gcc/value-range.cc | 12 ++++
gcc/value-range.h | 2 +
7 files changed, 123 insertions(+), 15 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c
diff --git a/gcc/common.opt b/gcc/common.opt
index b18f0b944..d3af3ba39 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1771,6 +1771,10 @@ floop-interchange
Common Var(flag_loop_interchange) Optimization
Enable loop interchange on trees.
+fchrec-mul-fold-strict-overflow
+Common Var(flag_chrec_mul_fold_strict_overflow) Init(0)
+Enable strict overflow handling during constant folding of multiply CHRECs.
+
floop-block
Common Alias(floop-nest-optimize)
Enable loop nest transforms. Same as -floop-nest-optimize.
diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c
index bd053a752..671a67d95 100644
--- a/gcc/testsuite/gcc.dg/pr68317.c
+++ b/gcc/testsuite/gcc.dg/pr68317.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fdisable-tree-ethread" } */
+/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */
/* Note: Threader will collapse loop. */
@@ -12,8 +12,8 @@ foo ()
{
int32_t index = 0;
- for (index; index <= 10; index--) // expected warning here
+ for (index; index <= 10; index--) /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
/* Result of the following multiply will overflow
when converted to signed int32_t. */
- bar ((0xcafe + index) * 0xdead); /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
+ bar ((0xcafe + index) * 0xdead);
}
diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c
new file mode 100644
index 000000000..9a383d8fc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr114074.c
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+<<<<<<< HEAD
+/* { dg-options "-fchrec-mul-fold-strict-overflow" } */
+=======
+/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */
+>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab
+int a, b, d;
+
+__attribute__((noipa)) void
+foo (void)
+{
+ ++d;
+}
+
+int
+main ()
+{
+ for (a = 0; a > -3; a -= 2)
+ {
+ int c = a;
+ b = __INT_MAX__ - 3000;
+ a = ~c * b;
+ foo ();
+ if (!a)
+ break;
+ a = c;
+ }
+ if (d != 2)
+ __builtin_abort ();
+ return 0;
+}
diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
index c44cea754..3323901bc 100644
--- a/gcc/tree-chrec.cc
+++ b/gcc/tree-chrec.cc
@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3. If not see
#include "gimple.h"
#include "tree-ssa-loop.h"
#include "dumpfile.h"
+#include "value-range.h"
+#include "value-query.h"
#include "tree-scalar-evolution.h"
/* Extended folder for chrecs. */
@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type,
|| automatically_generated_chrec_p (op1))
return chrec_fold_automatically_generated_operands (op0, op1);
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ if (TREE_CODE (op0) != POLYNOMIAL_CHREC
+ && TREE_CODE (op1) == POLYNOMIAL_CHREC)
+ std::swap (op0, op1);
+ }
+
switch (TREE_CODE (op0))
{
case POLYNOMIAL_CHREC:
@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type,
if (integer_zerop (op1))
return build_int_cst (type, 0);
- return build_polynomial_chrec
- (CHREC_VARIABLE (op0),
- chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
- chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the
+ same sign or CHREC_LEFT is zero then folding the multiply into
+ the addition does not have the same behavior on overflow. Use
+ unsigned arithmetic in that case. */
+ value_range rl, rr;
+ if (!ANY_INTEGRAL_TYPE_P (type)
+ || TYPE_OVERFLOW_WRAPS (type)
+ || integer_zerop (CHREC_LEFT (op0))
+ || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST
+ && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST
+ && (tree_int_cst_sgn (CHREC_LEFT (op0))
+ == tree_int_cst_sgn (CHREC_RIGHT (op0))))
+ || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0))
+ && !rl.undefined_p ()
+ && (rl.nonpositive_p () || rl.nonnegative_p ())
+ && get_range_query (cfun)->range_of_expr (rr,
+ CHREC_RIGHT (op0))
+ && !rr.undefined_p ()
+ && ((rl.nonpositive_p () && rr.nonpositive_p ())
+ || (rl.nonnegative_p () && rr.nonnegative_p ()))))
+ {
+ tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1);
+ tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1);
+ return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right);
+ }
+ else
+ {
+ tree utype = unsigned_type_for (type);
+ tree uop1 = chrec_convert_rhs (utype, op1);
+ tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0));
+ tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0));
+ tree left = chrec_fold_multiply (utype, uleft0, uop1);
+ tree right = chrec_fold_multiply (utype, uright0, uop1);
+ tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0),
+ left, right);
+ return chrec_convert_rhs (type, tem);
+ }
+ }
+ else
+ {
+ return build_polynomial_chrec
+ (CHREC_VARIABLE (op0),
+ chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
+ chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
+ }
}
CASE_CONVERT:
@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type,
switch (TREE_CODE (op1))
{
case POLYNOMIAL_CHREC:
- gcc_checking_assert
- (!chrec_contains_symbols_defined_in_loop (op1,
- CHREC_VARIABLE (op1)));
- return build_polynomial_chrec
- (CHREC_VARIABLE (op1),
- chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
- chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
+ if (flag_chrec_mul_fold_strict_overflow)
+ {
+ gcc_unreachable ();
+ }
+ else
+ {
+ gcc_checking_assert
+ (!chrec_contains_symbols_defined_in_loop (op1,
+ CHREC_VARIABLE (op1)));
+ return build_polynomial_chrec
+ (CHREC_VARIABLE (op1),
+ chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
+ chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
+ }
CASE_CONVERT:
if (tree_contains_chrecs (op1, NULL))
diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h
index fcf41710d..cdc97d5d9 100644
--- a/gcc/tree-chrec.h
+++ b/gcc/tree-chrec.h
@@ -63,7 +63,7 @@ extern tree chrec_fold_plus (tree, tree, tree);
extern tree chrec_fold_minus (tree, tree, tree);
extern tree chrec_fold_multiply (tree, tree, tree);
extern tree chrec_convert (tree, tree, gimple *, bool = true, tree = NULL);
-extern tree chrec_convert_rhs (tree, tree, gimple *);
+extern tree chrec_convert_rhs (tree, tree, gimple * = NULL);
extern tree chrec_convert_aggressive (tree, tree, bool *);
/* Operations. */
diff --git a/gcc/value-range.cc b/gcc/value-range.cc
index 000bbcf89..a1dc10a24 100644
--- a/gcc/value-range.cc
+++ b/gcc/value-range.cc
@@ -656,6 +656,18 @@ irange::contains_p (tree cst) const
return false;
}
+bool
+irange::nonnegative_p () const
+{
+ return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
+}
+
+bool
+irange::nonpositive_p () const
+{
+ return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
+}
+
/* Normalize addresses into constants. */
diff --git a/gcc/value-range.h b/gcc/value-range.h
index d4cba22d5..2dc0907de 100644
--- a/gcc/value-range.h
+++ b/gcc/value-range.h
@@ -69,6 +69,8 @@ public:
bool varying_p () const;
bool singleton_p (tree *result = NULL) const;
bool contains_p (tree) const;
+ bool nonnegative_p () const;
+ bool nonpositive_p () const;
// In-place operators.
void union_ (const irange &);
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,882 @@
From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001
From: xingyushuai <xingyushuai@huawei.com>
Date: Fri, 3 Mar 2023 09:31:04 +0800
Subject: [PATCH 001/157] Add hip09 machine discribtion
Here is the patch introducing hip09 machine model
for the scheduler.
---
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 109 +++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++
6 files changed, 774 insertions(+), 1 deletion(-)
create mode 100644 gcc/config/aarch64/hip09.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 70b11eb80..a854bdb24 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 48522606f..fc5a3cbe4 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs =
}
};
+const struct cpu_cost_table hip09_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table ampere1_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 9dc9adc70..238bb6e31 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 5537a537c..e9b3980c4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip09_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip09_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost neoversen2_regmove_cost =
{
1, /* GP2GP */
@@ -947,6 +973,43 @@ static const struct cpu_vector_cost tsv110_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip09_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip09_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip09_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1293,6 +1356,18 @@ static const cpu_prefetch_tune tsv110_prefetch_tune =
-1 /* default_opt_level */
};
+
+static const cpu_prefetch_tune hip09_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1658,6 +1733,40 @@ static const struct tune_params tsv110_tunings =
&tsv110_prefetch_tune
};
+static const struct tune_params hip09_tunings =
+{
+ &hip09_extra_costs,
+ &hip09_addrcost_table,
+ &hip09_regmove_cost,
+ &hip09_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_256, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ &hip09_prefetch_tune
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index d24c8afcf..cf699e4c7 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -477,6 +477,7 @@
(include "thunderx2t99.md")
(include "tsv110.md")
(include "thunderx3t110.md")
+(include "hip09.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md
new file mode 100644
index 000000000..25428de9a
--- /dev/null
+++ b/gcc/config/aarch64/hip09.md
@@ -0,0 +1,558 @@
+;; hip09 pipeline description
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;
+;;Contributed by Yushuai Xing
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip09")
+(define_automaton "hip09_ldst")
+(define_automaton "hip09_fsu")
+
+(define_attr "hip09_type"
+ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla,
+ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt,
+ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes,
+ hip09_neon_load2and4, hip09_neon_load3_3reg,
+ hip09_neon_load4_4reg, hip09_neon_store1and2,
+ hip09_neon_store1_1reg, hip09_neon_store1_2reg,
+ hip09_neon_store1_3reg, hip09_neon_store1_4reg,
+ hip09_neon_store3and4_lane, hip09_neon_store3_3reg,
+ hip09_neon_store4_4reg, unknown"
+ (cond [
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\
+ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\
+ neon_sub_widen,neon_qadd,neon_qadd_q,\
+ neon_add_long,neon_sub_long,\
+ neon_qabs,neon_qabs_q,neon_qneg,\
+ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\
+ neon_compare_q,neon_compare_zero,\
+ neon_compare_zero_q,neon_logic,neon_logic_q,\
+ neon_minmax,neon_minmax_q,neon_tst,\
+ neon_tst_q,neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,neon_ext,\
+ neon_ext_q,neon_rev,neon_rev_q,\
+ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\
+ neon_fp_abs_s_q,neon_fp_abs_d,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_shift_imm_narrow_q,neon_move,neon_move_q")
+ (const_string "hip09_neon_abs")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_add_halve_narrow_q,\
+ neon_sub_halve_narrow_q,neon_reduc_add,\
+ neon_reduc_add_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\
+ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\
+ neon_mla_b_long,neon_sat_mla_b_long,\
+ neon_sat_shift_imm,\
+ neon_sat_shift_imm_q,neon_shift_imm_long,\
+ neon_shift_imm,neon_shift_imm_q,neon_cnt,\
+ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,\
+ neon_tbl2,neon_tbl2_q,neon_to_gp,\
+ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_cvt_widen_s,neon_fp_to_int_s,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d,\
+ neon_fp_to_int_d_q,neon_fp_minmax_s,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d,\
+ neon_fp_minmax_d_q,neon_fp_round_s,\
+ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_fp_cvt_narrow_s_q")
+ (const_string "hip09_neon_fp_arith")
+ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
+ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_h_scalar_long,\
+ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,neon_mla_h_scalar,\
+ neon_mla_h_scalar_q,neon_mla_s_scalar,\
+ neon_mla_h_long,\
+ neon_mla_s_long,neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\
+ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\
+ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\
+ neon_shift_reg_q,neon_sat_shift_reg,\
+ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\
+ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\
+ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_mul_s_q,\
+ neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\
+ neon_fp_mul_s_scalar_q")
+ (const_string "hip09_neon_mul")
+ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\
+ neon_reduc_minmax_q,neon_fp_recps_s,\
+ neon_fp_recps_s_q,neon_fp_recps_d,\
+ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\
+ neon_fp_mla_s,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q")
+ (const_string "hip09_neon_mla")
+ (eq_attr "type" "neon_dot,neon_dot_q")
+ (const_string "hip09_neon_dot")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q")
+ (const_string "hip09_neon_fp_div")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q")
+ (const_string "hip09_neon_fp_sqrt")
+ (eq_attr "type" "neon_dup,neon_dup_q,\
+ neon_ins,neon_ins_q")
+ (const_string "hip09_neon_ins")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+ neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load1_3reg,neon_load1_3reg_q,\
+ neon_load1_4reg,neon_load1_4reg_q")
+ (const_string "hip09_neon_load1")
+ (eq_attr "type" "neon_load1_one_lane,\
+ neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q")
+ (const_string "hip09_neon_load1_lanes")
+ (eq_attr "type" "neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,\
+ neon_load2_one_lane,neon_load2_2reg,\
+ neon_load2_2reg_q,neon_load3_one_lane,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_all_lanes,\
+ neon_load4_all_lanes_q")
+ (const_string "hip09_neon_load2and4")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
+ (const_string "hip09_neon_load3_3reg")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")
+ (const_string "hip09_neon_load4_4reg")
+ (eq_attr "type" "neon_store1_one_lane,\
+ neon_store1_one_lane_q,neon_store2_one_lane,\
+ neon_store2_one_lane_q,neon_store2_2reg,\
+ neon_store2_2reg_q")
+ (const_string "hip09_neon_store1and2")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")
+ (const_string "hip09_neon_store1_1reg")
+ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")
+ (const_string "hip09_neon_store1_2reg")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")
+ (const_string "hip09_neon_store1_3reg")
+ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")
+ (const_string "hip09_neon_store1_4reg")
+ (eq_attr "type" "neon_store3_one_lane,\
+ neon_store3_one_lane_q,neon_store4_one_lane,\
+ neon_store4_one_lane_q")
+ (const_string "hip09_neon_store3and4_lane")
+ (eq_attr "type" "neon_store3_3reg,\
+ neon_store3_3reg_q")
+ (const_string "hip09_neon_store3_3reg")
+ (eq_attr "type" "neon_store4_4reg,\
+ neon_store4_4reg_q")
+ (const_string "hip09_neon_store4_4reg")]
+ (const_string "unknown")))
+
+; The hip09 core is modelled as issues pipeline that has
+; the following functional units.
+; 1. Two pipelines for branch micro operations: BRU1, BRU2
+
+(define_cpu_unit "hip09_bru0" "hip09")
+(define_cpu_unit "hip09_bru1" "hip09")
+
+(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1")
+
+; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4
+
+(define_cpu_unit "hip09_alus0" "hip09")
+(define_cpu_unit "hip09_alus1" "hip09")
+(define_cpu_unit "hip09_alus2" "hip09")
+(define_cpu_unit "hip09_alus3" "hip09")
+
+(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3")
+(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1")
+(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3")
+
+; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2
+
+(define_cpu_unit "hip09_alum0" "hip09")
+(define_cpu_unit "hip09_alum1" "hip09")
+
+(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1")
+
+; 4. Two pipelines for load micro opetations: Load1, Load2
+
+(define_cpu_unit "hip09_load0" "hip09_ldst")
+(define_cpu_unit "hip09_load1" "hip09_ldst")
+
+(define_reservation "hip09_ld01" "hip09_load0|hip09_load1")
+
+; 5. Two pipelines for store micro operations: Store1, Store2
+
+(define_cpu_unit "hip09_store0" "hip09_ldst")
+(define_cpu_unit "hip09_store1" "hip09_ldst")
+
+(define_reservation "hip09_st01" "hip09_store0|hip09_store1")
+
+; 6. Two pipelines for store data micro operations: STD0,STD1
+
+(define_cpu_unit "hip09_store_data0" "hip09_ldst")
+(define_cpu_unit "hip09_store_data1" "hip09_ldst")
+
+(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1")
+
+; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4
+
+(define_cpu_unit "hip09_fsu0" "hip09_fsu")
+(define_cpu_unit "hip09_fsu1" "hip09_fsu")
+(define_cpu_unit "hip09_fsu2" "hip09_fsu")
+(define_cpu_unit "hip09_fsu3" "hip09_fsu")
+
+(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3")
+(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2")
+
+
+; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2
+
+;; Simple Execution Unit:
+;
+;; Simple ALU without shift
+(define_insn_reservation "hip09_alu" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ adc_imm,adc_reg,\
+ alu_sreg,logic_reg,\
+ mov_imm,mov_reg,\
+ csel,rotate_imm,bfm,mov_imm,\
+ clz,rbit,rev"))
+ "hip09_alus0123")
+
+(define_insn_reservation "hip09_alus" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alus_sreg,alus_imm,\
+ adcs_reg,adcs_imm,\
+ logics_imm,logics_reg,adr"))
+ "hip09_alus23")
+
+;; ALU ops with shift and extend
+(define_insn_reservation "hip09_alu_ext_shift" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_ext,alus_ext,\
+ logics_shift_imm,logics_shift_reg,\
+ logic_shift_reg,logic_shift_imm,\
+ "))
+ "hip09_alum01")
+
+;; Multiplies instructions
+(define_insn_reservation "hip09_mult" 3
+ (and (eq_attr "tune" "hip09")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip09_alum01")
+
+;; Integer divide
+(define_insn_reservation "hip09_div" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip09_alum0")
+
+;; Branch execution Unit
+;
+; Branches take two issue slot.
+; No latency as there is no result
+(define_insn_reservation "hip09_branch" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "branch,call"))
+ "hip09_bru01 + hip09_alus23")
+
+;; Load execution Unit
+;
+; Loads of up to two words.
+(define_insn_reservation "hip09_load1" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "load_4,load_8"))
+ "hip09_ld01")
+
+; Stores of up to two words.
+(define_insn_reservation "hip09_store1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "store_4,store_8"))
+ "hip09_st01")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "hip09_fp_arith" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
+ f_mrc"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_cmp" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_ccmp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fccmps,fccmpd"))
+ "hip09_alus01+hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_csel" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcsel,f_mcr"))
+ "hip09_alus01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divs" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divd" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrts" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrts"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrtd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mul" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmuls,fmuld"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_add" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
+ f_rints,f_rintd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mac" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmacs,fmacd"))
+ "hip09_fsu0123")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "hip09_fp_cvt" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvtf2i"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_cvt2" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvti2f"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; FP Load Instructions
+
+(define_insn_reservation "hip09_fp_load" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_fp_load2" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_ldp_q,neon_ldp"))
+ "hip09_ld01+hip09_alus01")
+
+;; FP store instructions
+
+(define_insn_reservation "hip09_fp_store" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_stores,f_stored"))
+ "hip09_st01+hip09_std01")
+
+;; ASIMD integer instructions
+
+(define_insn_reservation "hip09_asimd_base1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_abs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base2" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_arith"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base3" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mul"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base4" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mla"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base5" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mul_s"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_dot" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_dot"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_bfmmla" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mla_s_q"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fdiv" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_div"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fsqrt" 25
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_sqrt"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_pmull" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_pmull"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_dup" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_ins"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; ASIMD load instructions
+
+(define_insn_reservation "hip09_asimd_ld1_reg" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_asimd_ld1_lane" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1_lanes"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld23" 8
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load2and4"))
+"hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld3_mtp" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load3_3reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld4_mtp" 13
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load4_4reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+;; ASIMD store instructions
+
+(define_insn_reservation "hip09_asimd_st12" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1and2"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_1reg" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_1reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_2reg" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_2reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_3reg" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_3reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_4reg" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_4reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st34_lane" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3and4_lane"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st3_mtp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3_3reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st4_mtp" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store4_4reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+;; Cryptography extensions
+
+(define_insn_reservation "hip09_asimd_aes" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "hip09_fsu02")
+
+(define_insn_reservation "hip09_asimd_sha3" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha256_fast,crypto_sha512,\
+ crypto_sm3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1_and256" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
+ crypto_sm4"))
+ "hip09_fsu2")
+
+;; CRC extension.
+
+(define_insn_reservation "hip09_crc" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crc"))
+ "hip09_alum01")
--
2.33.0

View File

@ -0,0 +1,755 @@
From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Mon, 26 Feb 2024 20:34:06 +0800
Subject: [PATCH 002/157] Add hip11 CPU pipeline scheduling
This patch adds an mcpu: hip11. It has been tested on aarch64
and no regressions from this patch.
---
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.cc | 108 ++++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++
gcc/doc/invoke.texi | 2 +-
7 files changed, 634 insertions(+), 2 deletions(-)
create mode 100644 gcc/config/aarch64/hip11.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index a854bdb24..601b72abb 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 |
AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index fc5a3cbe4..0ee427b61 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs =
}
};
+const struct cpu_cost_table hip11_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table a64fx_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 238bb6e31..511422081 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index e9b3980c4..7c62ddb2a 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip11_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip11_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost a64fx_regmove_cost =
{
1, /* GP2GP */
@@ -1010,6 +1036,43 @@ static const struct cpu_vector_cost hip09_vector_cost =
nullptr /* issue_info */
};
+static const advsimd_vec_cost hip11_advsimd_vector_cost =
+{
+ 2, /* int_stmt_cost */
+ 2, /* fp_stmt_cost */
+ 0, /* ld2_st2_permute_cost */
+ 0, /* ld3_st3_permute_cost */
+ 0, /* ld4_st4_permute_cost */
+ 2, /* permute_cost */
+ 3, /* reduc_i8_cost */
+ 3, /* reduc_i16_cost */
+ 3, /* reduc_i32_cost */
+ 3, /* reduc_i64_cost */
+ 3, /* reduc_f16_cost */
+ 3, /* reduc_f32_cost */
+ 3, /* reduc_f64_cost */
+ 3, /* store_elt_extra_cost */
+ 5, /* vec_to_scalar_cost */
+ 5, /* scalar_to_vec_cost */
+ 5, /* align_load_cost */
+ 5, /* unalign_load_cost */
+ 1, /* unalign_store_cost */
+ 1 /* store_cost */
+};
+
+static const struct cpu_vector_cost hip11_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1, /* cond_not_taken_branch_cost */
+ &hip11_advsimd_vector_cost, /* advsimd */
+ nullptr, /* sve */
+ nullptr /* issue_info */
+};
+
static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
{
2, /* int_stmt_cost */
@@ -1368,6 +1431,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip11_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1767,6 +1841,40 @@ static const struct tune_params hip09_tunings =
&hip09_prefetch_tune
};
+static const struct tune_params hip11_tunings =
+{
+ &hip11_extra_costs,
+ &hip11_addrcost_table,
+ &hip11_regmove_cost,
+ &hip11_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_512, /* sve_width */
+ { 4, /* load_int. */
+ 4, /* store_int. */
+ 4, /* load_fp. */
+ 4, /* store_fp. */
+ 4, /* load_pred. */
+ 4 /* store_pred. */
+ }, /* memmov_cost. */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ &hip11_prefetch_tune
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index cf699e4c7..c0c64a798 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -478,6 +478,7 @@
(include "tsv110.md")
(include "thunderx3t110.md")
(include "hip09.md")
+(include "hip11.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md
new file mode 100644
index 000000000..45f91e65b
--- /dev/null
+++ b/gcc/config/aarch64/hip11.md
@@ -0,0 +1,418 @@
+;; hip11 pipeline description
+;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip11")
+
+;; The hip11 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "hip11_alu1_issue" "hip11")
+(define_reservation "hip11_alu1" "hip11_alu1_issue")
+
+(define_cpu_unit "hip11_alu2_issue" "hip11")
+(define_reservation "hip11_alu2" "hip11_alu2_issue")
+
+(define_cpu_unit "hip11_alu3_issue" "hip11")
+(define_reservation "hip11_alu3" "hip11_alu3_issue")
+
+(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "hip11_mdu_issue" "hip11")
+(define_reservation "hip11_mdu" "hip11_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "hip11_fsu")
+
+(define_cpu_unit "hip11_fsu1_issue"
+ "hip11_fsu")
+(define_cpu_unit "hip11_fsu2_issue"
+ "hip11_fsu")
+
+(define_reservation "hip11_fsu1" "hip11_fsu1_issue")
+(define_reservation "hip11_fsu2" "hip11_fsu2_issue")
+(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "hip11_ls1_issue" "hip11")
+(define_cpu_unit "hip11_ls2_issue" "hip11")
+(define_reservation "hip11_ls1" "hip11_ls1_issue")
+(define_reservation "hip11_ls2" "hip11_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue
+ + hip11_mdu_issue + hip11_alu1_issue
+ + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue")
+
+;; Branch execution Unit
+;;
+(define_insn_reservation "hip11_branch" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch"))
+ "hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_return_from_subroutine" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch")
+ (eq_attr "sls_length" "retbr"))
+ "hip11_mdu,(hip11_alu2|hip11_alu3)")
+
+ ;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "hip11_alu" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,csel,\
+ rotate_imm"))
+ "hip11_alu1|hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_alus" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "hip11_alu_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "extend,\
+ alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_alus_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "hip11_mult" 3
+ (and (eq_attr "tune" "hip11")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip11_mdu")
+
+;; Integer divide
+(define_insn_reservation "hip11_div" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_mla" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "mla,smlal,umlal,smull,umull"))
+ "hip11_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "hip11_block" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "block"))
+ "hip11_block")
+
+;; Load-store execution Unit
+;;
+(define_insn_reservation "hip11_load1" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "load_4,load_8,load_16"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_fp_load" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_single" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld1_1reg" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_2reg" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_3reg" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_4reg" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld2" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\
+ neon_load2_all_lanes,neon_load2_all_lanes_q,\
+ neon_load2_2reg,neon_load2_2reg_q,\
+ neon_load2_4reg,neon_load2_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_single" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_multiple" 13
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_single" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
+ neon_load4_all_lanes,neon_load4_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_multiple" 11
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+;; Stores of up to two words.
+(define_insn_reservation "hip11_store1" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "store_4,store_8,store_16,\
+ f_stored,f_stores"))
+ "hip11_ls1|hip11_ls2")
+
+;; Floating-Point Operations.
+(define_insn_reservation "hip11_fp_arith" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\
+ f_minmaxd,fadds,faddd,neon_fcadd"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mul" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_scalar_q,fmuld,fmuls"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_cmp" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fccmpd,fccmps"))
+ "hip11alu,hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_csel" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcsel"))
+ "hip11alu,hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_fcmp" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcmpd,fcmps"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_divs" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivs"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_divd" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrts" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrts"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrtd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_mac" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mov" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmov,neon_dup,neon_dup_q,\
+ neon_from_gp,neon_from_gp_q,\
+ neon_ins,neon_ins_q,\
+ neon_to_gp,neon_to_gp_q,\
+ neon_move,neon_move_q,\
+ neon_rev,neon_rev_q,\
+ neon_permute,neon_permute_q,\
+ neon_shift_imm_narrow_q,\
+ neon_ext,neon_ext_q,\
+ neon_rbit,\
+ crypto_sha3,neon_tbl1,neon_tbl1_q,\
+ neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\
+ neon_move_narrow_q"))
+ "hip11_fsu1")
+
+;; ASIMD instructions
+(define_insn_reservation "hip11_asimd_simple_arithmetic" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\
+ neon_abd,neon_abd_q,\
+ neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\
+ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_compare,neon_compare_q,\
+ neon_compare_zero,neon_compare_zero_q,\
+ neon_minmax,neon_minmax_q,\
+ neon_logic,neon_logic_q,\
+ neon_reduc_add,neon_reduc_add_q,\
+ neon_reduc_minmax,neon_reduc_minmax_q,\
+ neon_fp_to_int_s,neon_fp_to_int_s_q,\
+ neon_fp_to_int_d,neon_fp_to_int_d_q,\
+ neon_fp_cvt_widen_s,\
+ neon_fp_cvt_narrow_d_q,\
+ neon_cls,neon_cls_q,\
+ neon_cnt,neon_cnt_q,\
+ f_rints,f_rintd,f_cvtf2i,f_cvt,\
+ neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_cvt_narrow_s_q,\
+ crypto_aese,crypto_aesmc,\
+ crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha1_slow,\
+ crypto_sha256_fast,\
+ crypto_sha512,crypto_sm3,\
+ neon_qabs,neon_qabs_q,\
+ neon_qneg,neon_qneg_q,\
+ neon_qadd,neon_qadd_q,\
+ neon_qsub,neon_qsub_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_complex_arithmetic" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\
+ neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,\
+ neon_mla_b,neon_mla_b_q,\
+ neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,\
+ neon_mla_h_scalar,neon_mla_h_scalar_q,\
+ neon_mla_s_scalar,neon_mla_s_scalar_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
+ neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\
+ neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\
+ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\
+ neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\
+ neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\
+ crypto_pmull,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q,\
+ neon_shift_reg,neon_shift_reg_q,\
+ neon_shift_imm,neon_shift_imm_q,\
+ neon_shift_imm_long,\
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q,\
+ neon_shift_acc,neon_shift_acc_q,\
+ crypto_sha256_slow"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_compare" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\
+ neon_fp_abs_d,neon_fp_abs_d_q,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_s_q,\
+ neon_fp_minmax_d,neon_fp_minmax_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
+ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
+ neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_fdiv" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fsqrt" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_multiply_add" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\
+ neon_fp_recps_s,neon_fp_recps_s_q,\
+ neon_fp_recps_d,neon_fp_recps_d_q,\
+ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\
+ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_frecpx" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\
+ neon_dot,neon_dot_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_mmla" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mla_s_q"))
+ "hip11_fsu1")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7ca60dd64..17d9e4126 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -19212,7 +19212,7 @@ performance of the code. Permissible values for this option are:
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
@samp{octeontx2f95mm},
-@samp{a64fx},
+@samp{a64fx},@samp{hip11}
@samp{thunderx}, @samp{thunderxt88},
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001
From: xiezhiheng <xiezhiheng@huawei.com>
Date: Fri, 23 Aug 2024 15:14:04 +0800
Subject: [PATCH 004/157] [SME] Remove hip09 and hip11 in aarch64-cores.def to
backport SME
Will apply it in the end.
---
gcc/config/aarch64/aarch64-cores.def | 2 --
1 file changed, 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 601b72abb..70b11eb80 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
/* ARMv8.3-A Architecture Processors. */
@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710", cortexa710, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 |
AARCH64_CORE("cortex-x2", cortexx2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
--
2.33.0

Some files were not shown because too many files have changed in this diff Show More