!517 [Sync] Sync from openEuler/gcc
From: @wumingchuan Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
38e9a59d29
320
0312-Add-late-slp-vectorization-pass-with-additional-chec.patch
Normal file
320
0312-Add-late-slp-vectorization-pass-with-additional-chec.patch
Normal file
@ -0,0 +1,320 @@
|
|||||||
|
From 9df4a0bd76299734ae47f2f4e236b10f6c156994 Mon Sep 17 00:00:00 2001
|
||||||
|
From: d84370931 <dementiev.daniil@h-partners.com>
|
||||||
|
Date: Thu, 14 Nov 2024 17:08:40 +0800
|
||||||
|
Subject: [PATCH 3/8] Add late slp vectorization pass with additional checks.
|
||||||
|
|
||||||
|
Add expansion of data reference offset using affine trees to check
|
||||||
|
if data references may alias.
|
||||||
|
|
||||||
|
Add check if a group of interleaving data references is smaller than
|
||||||
|
max vector register size.
|
||||||
|
|
||||||
|
Add operands swap for commutative operations.
|
||||||
|
Swapping operands is necessary for better vector constructing.
|
||||||
|
For example for operations
|
||||||
|
_1 = a * b;
|
||||||
|
_2 = b * c;
|
||||||
|
Construction vectors (a, c) * (b, b) is more profitable
|
||||||
|
than (a, b) * (b, c).
|
||||||
|
|
||||||
|
Add tests and special param flags for each check:
|
||||||
|
--param=vect-addr-expand-for-alias-check={0,1}
|
||||||
|
--param=vect-swap-operands={0,1}
|
||||||
|
--param=vect-register-size-check={0,1}
|
||||||
|
|
||||||
|
Add enabling flag for late slp pass:
|
||||||
|
-ftree-slp-late
|
||||||
|
---
|
||||||
|
gcc/common.opt | 4 ++
|
||||||
|
gcc/params.opt | 12 ++++++
|
||||||
|
gcc/passes.def | 4 ++
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-alias-expand.c | 12 ++++++
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-op-swap.c | 10 +++++
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-regsize.c | 18 +++++++++
|
||||||
|
gcc/timevar.def | 1 +
|
||||||
|
gcc/tree-data-ref.cc | 12 ++++++
|
||||||
|
gcc/tree-pass.h | 1 +
|
||||||
|
gcc/tree-vect-data-refs.cc | 15 +++++++
|
||||||
|
gcc/tree-vect-slp.cc | 28 +++++++++++++
|
||||||
|
gcc/tree-vectorizer.cc | 39 +++++++++++++++++++
|
||||||
|
12 files changed, 156 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-alias-expand.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-op-swap.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/vect/vect-regsize.c
|
||||||
|
|
||||||
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||||
|
index 78cfc333a..c3c64ceaf 100644
|
||||||
|
--- a/gcc/common.opt
|
||||||
|
+++ b/gcc/common.opt
|
||||||
|
@@ -3268,6 +3268,10 @@ ftree-slp-transpose-vectorize
|
||||||
|
Common Var(flag_tree_slp_transpose_vectorize) Optimization Init(0)
|
||||||
|
Enable basic block vectorization (SLP) for transposed stores and loads on trees.
|
||||||
|
|
||||||
|
+ftree-slp-late
|
||||||
|
+Common Var(flag_slp_late) Init(0) Optimization
|
||||||
|
+Enable additional SLP vectorization pass after reassociation.
|
||||||
|
+
|
||||||
|
fvect-cost-model=
|
||||||
|
Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
|
||||||
|
-fvect-cost-model=[unlimited|dynamic|cheap|very-cheap] Specifies the cost model for vectorization.
|
||||||
|
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||||
|
index 3ddfaf5b2..bb4dc1825 100644
|
||||||
|
--- a/gcc/params.opt
|
||||||
|
+++ b/gcc/params.opt
|
||||||
|
@@ -1213,6 +1213,18 @@ The maximum factor which the loop vectorizer applies to the cost of statements i
|
||||||
|
Common Joined UInteger Var(param_vect_induction_float) Init(1) IntegerRange(0, 1) Param Optimization
|
||||||
|
Enable loop vectorization of floating point inductions.
|
||||||
|
|
||||||
|
+-param=vect-swap-operands=
|
||||||
|
+Common Joined UInteger Var(param_vect_swap_operands) Init(0) IntegerRange(0, 1) Param Optimization
|
||||||
|
+Enable swapping operands for commutative operations in vectorization analysis.
|
||||||
|
+
|
||||||
|
+-param=addr-expand-for-alias-check=
|
||||||
|
+Common Joined UInteger Var(param_addr_expand_for_alias_check) Init(0) IntegerRange(0, 1) Param Optimization
|
||||||
|
+Enable data reference address expansion for alias check.
|
||||||
|
+
|
||||||
|
+-param=vect-register-size-check=
|
||||||
|
+Common Joined UInteger Var(param_vect_register_size_check) Init(0) IntegerRange(0, 1) Param Optimization
|
||||||
|
+Enable checking if a group of interleaving data references may not fit in vector register.
|
||||||
|
+
|
||||||
|
-param=vrp1-mode=
|
||||||
|
Common Joined Var(param_vrp1_mode) Enum(vrp_mode) Init(VRP_MODE_VRP) Param Optimization
|
||||||
|
--param=vrp1-mode=[vrp|ranger] Specifies the mode VRP1 should operate in.
|
||||||
|
diff --git a/gcc/passes.def b/gcc/passes.def
|
||||||
|
index e945af96a..529cc5093 100644
|
||||||
|
--- a/gcc/passes.def
|
||||||
|
+++ b/gcc/passes.def
|
||||||
|
@@ -337,6 +337,10 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
NEXT_PASS (pass_lower_switch);
|
||||||
|
NEXT_PASS (pass_cse_reciprocals);
|
||||||
|
NEXT_PASS (pass_reassoc, false /* early_p */);
|
||||||
|
+ NEXT_PASS (pass_slp_vectorize_late);
|
||||||
|
+ PUSH_INSERT_PASSES_WITHIN (pass_slp_vectorize_late)
|
||||||
|
+ NEXT_PASS (pass_slp_vectorize);
|
||||||
|
+ POP_INSERT_PASSES ()
|
||||||
|
NEXT_PASS (pass_strength_reduction);
|
||||||
|
NEXT_PASS (pass_split_paths);
|
||||||
|
NEXT_PASS (pass_tracer);
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-expand.c b/gcc/testsuite/gcc.dg/vect/vect-alias-expand.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a68f4baf8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-alias-expand.c
|
||||||
|
@@ -0,0 +1,12 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -ftree-vectorize --param=addr-expand-for-alias-check=1 -fdump-tree-slp-details" } */
|
||||||
|
+
|
||||||
|
+extern float arr[2][2];
|
||||||
|
+
|
||||||
|
+void foo (int i, int j, float a, float b)
|
||||||
|
+{
|
||||||
|
+ arr[i][j] *= a;
|
||||||
|
+ arr[i][j+1] *= b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump "Basic block will be vectorized using SLP" "slp2" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-op-swap.c b/gcc/testsuite/gcc.dg/vect/vect-op-swap.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4872dc414
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-op-swap.c
|
||||||
|
@@ -0,0 +1,10 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -ftree-vectorize --param=vect-swap-operands=1 -fdump-tree-slp-details" } */
|
||||||
|
+
|
||||||
|
+void foo (float *res, float a, float b, float c)
|
||||||
|
+{
|
||||||
|
+ res[0] = a * b;
|
||||||
|
+ res[1] = b * c;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump "Swapped operands for" "slp2" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-regsize.c b/gcc/testsuite/gcc.dg/vect/vect-regsize.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bcd81e6df
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-regsize.c
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -ftree-vectorize --param=vect-register-size-check=1 -fdump-tree-slp-details" } */
|
||||||
|
+
|
||||||
|
+extern float arr[256][256][1024];
|
||||||
|
+
|
||||||
|
+void foo (int i, int j, float a, float b)
|
||||||
|
+{
|
||||||
|
+ arr[i][j][0] += a;
|
||||||
|
+ arr[i][j][1] += b;
|
||||||
|
+ arr[i][j+1][0] += a;
|
||||||
|
+ arr[i][j+1][1] += b;
|
||||||
|
+ arr[i+1][j][0] += a;
|
||||||
|
+ arr[i+1][j][1] += b;
|
||||||
|
+ arr[i+1][j+1][0] += a;
|
||||||
|
+ arr[i+1][j+1][1] += b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump "Basic block will be vectorized using SLP" "slp2" } } */
|
||||||
|
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
||||||
|
index fc2b1e1e7..7560e930a 100644
|
||||||
|
--- a/gcc/timevar.def
|
||||||
|
+++ b/gcc/timevar.def
|
||||||
|
@@ -205,6 +205,7 @@ DEFTIMEVAR (TV_SCALAR_CLEANUP , "scalar cleanup")
|
||||||
|
DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
|
||||||
|
DEFTIMEVAR (TV_TREE_VECTORIZATION , "tree vectorization")
|
||||||
|
DEFTIMEVAR (TV_TREE_SLP_VECTORIZATION, "tree slp vectorization")
|
||||||
|
+DEFTIMEVAR (TV_TREE_LATE_SLP , "late slp vectorization")
|
||||||
|
DEFTIMEVAR (TV_GRAPHITE , "Graphite")
|
||||||
|
DEFTIMEVAR (TV_GRAPHITE_TRANSFORMS , "Graphite loop transforms")
|
||||||
|
DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS , "Graphite data dep analysis")
|
||||||
|
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
|
||||||
|
index a05073c51..5eb4ac102 100644
|
||||||
|
--- a/gcc/tree-data-ref.cc
|
||||||
|
+++ b/gcc/tree-data-ref.cc
|
||||||
|
@@ -3021,6 +3021,18 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
|
||||||
|
get_inner_reference_aff (DR_REF (b), &off2, &size2);
|
||||||
|
aff_combination_scale (&off1, -1);
|
||||||
|
aff_combination_add (&off2, &off1);
|
||||||
|
+
|
||||||
|
+ if (param_addr_expand_for_alias_check)
|
||||||
|
+ {
|
||||||
|
+ using tree_expand_map_t = hash_map<tree, name_expansion *>;
|
||||||
|
+ /* Cache used by aff_combination_expand. */
|
||||||
|
+ tree_expand_map_t *cache = NULL;
|
||||||
|
+
|
||||||
|
+ if (off2.n)
|
||||||
|
+ aff_combination_expand (&off2, &cache);
|
||||||
|
+ free_affine_expand_cache (&cache);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (aff_comb_cannot_overlap_p (&off2, size1, size2))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||||
|
index 18b0f8022..2ed79f353 100644
|
||||||
|
--- a/gcc/tree-pass.h
|
||||||
|
+++ b/gcc/tree-pass.h
|
||||||
|
@@ -390,6 +390,7 @@ extern gimple_opt_pass *make_pass_slp_vectorize (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_complete_unroll (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_complete_unrolli (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_pre_slp_scalar_cleanup (gcc::context *ctxt);
|
||||||
|
+extern gimple_opt_pass *make_pass_slp_vectorize_late (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_parallelize_loops (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_loop_prefetch (gcc::context *ctxt);
|
||||||
|
extern gimple_opt_pass *make_pass_iv_optimize (gcc::context *ctxt);
|
||||||
|
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
|
||||||
|
index aae7f62f3..ee58c8f6c 100644
|
||||||
|
--- a/gcc/tree-vect-data-refs.cc
|
||||||
|
+++ b/gcc/tree-vect-data-refs.cc
|
||||||
|
@@ -3234,6 +3234,21 @@ vect_analyze_data_ref_accesses (vec_info *vinfo,
|
||||||
|
!= type_size_a))
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ if (param_vect_register_size_check)
|
||||||
|
+ {
|
||||||
|
+ tree scalar_type = TREE_TYPE (DR_REF (dra));
|
||||||
|
+ tree vec_type = get_related_vectype_for_scalar_type (
|
||||||
|
+ vinfo->vector_mode, scalar_type);
|
||||||
|
+ poly_uint64 vec_size = TYPE_VECTOR_SUBPARTS (vec_type);
|
||||||
|
+
|
||||||
|
+ /* If we have a large interleaving group (especially a group
|
||||||
|
+ of loads with gaps) that does not fit in vector register,
|
||||||
|
+ we should split this group to chunks we support. */
|
||||||
|
+ if (maybe_ge (((unsigned HOST_WIDE_INT)init_b - init_prev)
|
||||||
|
+ / type_size_a, vec_size))
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* If the step (if not zero or non-constant) is smaller than the
|
||||||
|
difference between data-refs' inits this splits groups into
|
||||||
|
suitable sizes. */
|
||||||
|
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
|
||||||
|
index fbd638333..79026fb5b 100644
|
||||||
|
--- a/gcc/tree-vect-slp.cc
|
||||||
|
+++ b/gcc/tree-vect-slp.cc
|
||||||
|
@@ -687,6 +687,34 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap,
|
||||||
|
if (first)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
+ /* If different statements in the group of commutative operations
|
||||||
|
+ have the same arguments but in different places, swap them to
|
||||||
|
+ group the same operands in one vector.
|
||||||
|
+
|
||||||
|
+ Check if swapping is enabled, operation is commutative and has
|
||||||
|
+ two operands of the same type.
|
||||||
|
+ If one of the operands in current statement match the operand
|
||||||
|
+ on another place of the first statement in the group we
|
||||||
|
+ swap operands in current statement. */
|
||||||
|
+ if (param_vect_swap_operands && commutative_op == 0 && !first
|
||||||
|
+ && is_a <bb_vec_info> (vinfo) && number_of_oprnds == 2
|
||||||
|
+ && vect_def_types_match (dts[0], dts[1]))
|
||||||
|
+ {
|
||||||
|
+ slp_oprnd_info oprnd_info0 = (*oprnds_info)[0];
|
||||||
|
+ slp_oprnd_info oprnd_info1 = (*oprnds_info)[1];
|
||||||
|
+ if (oprnd_info1->ops[stmt_num] == oprnd_info0->ops[0]
|
||||||
|
+ || oprnd_info0->ops[stmt_num] == oprnd_info1->ops[0])
|
||||||
|
+ {
|
||||||
|
+ std::swap (oprnd_info0->def_stmts[stmt_num],
|
||||||
|
+ oprnd_info1->def_stmts[stmt_num]);
|
||||||
|
+ std::swap (oprnd_info0->ops[stmt_num],
|
||||||
|
+ oprnd_info1->ops[stmt_num]);
|
||||||
|
+ if (dump_enabled_p ())
|
||||||
|
+ dump_printf_loc (MSG_NOTE, vect_location,
|
||||||
|
+ "Swapped operands for %G", stmt_info->stmt);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Now match the operand definition types to that of the first stmt. */
|
||||||
|
for (i = 0; i < number_of_oprnds;)
|
||||||
|
{
|
||||||
|
diff --git a/gcc/tree-vectorizer.cc b/gcc/tree-vectorizer.cc
|
||||||
|
index a63fa3912..c363ce490 100644
|
||||||
|
--- a/gcc/tree-vectorizer.cc
|
||||||
|
+++ b/gcc/tree-vectorizer.cc
|
||||||
|
@@ -1524,6 +1524,45 @@ make_pass_slp_vectorize (gcc::context *ctxt)
|
||||||
|
return new pass_slp_vectorize (ctxt);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* The late SLP vectorization pass. */
|
||||||
|
+
|
||||||
|
+namespace {
|
||||||
|
+
|
||||||
|
+const pass_data pass_data_slp_vectorize_late =
|
||||||
|
+{
|
||||||
|
+ GIMPLE_PASS, /* type. */
|
||||||
|
+ "slp_late", /* name. */
|
||||||
|
+ OPTGROUP_NONE, /* optinfo_flags. */
|
||||||
|
+ TV_TREE_LATE_SLP, /* tv_id. */
|
||||||
|
+ PROP_cfg, /* properties_required. */
|
||||||
|
+ 0, /* properties_provided. */
|
||||||
|
+ 0, /* properties_destroyed. */
|
||||||
|
+ 0, /* todo_flags_start. */
|
||||||
|
+ 0, /* todo_flags_finish. */
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+class pass_slp_vectorize_late : public gimple_opt_pass
|
||||||
|
+{
|
||||||
|
+public:
|
||||||
|
+ pass_slp_vectorize_late (gcc::context *ctxt)
|
||||||
|
+ : gimple_opt_pass (pass_data_slp_vectorize_late, ctxt)
|
||||||
|
+ {}
|
||||||
|
+
|
||||||
|
+ /* opt_pass methods: */
|
||||||
|
+ virtual bool gate (function *)
|
||||||
|
+ {
|
||||||
|
+ return flag_slp_late != 0;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+}; // class pass_slp_vectorize_late
|
||||||
|
+
|
||||||
|
+} // anon namespace
|
||||||
|
+
|
||||||
|
+gimple_opt_pass *
|
||||||
|
+make_pass_slp_vectorize_late (gcc::context *ctxt)
|
||||||
|
+{
|
||||||
|
+ return new pass_slp_vectorize_late (ctxt);
|
||||||
|
+}
|
||||||
|
|
||||||
|
/* Increase alignment of global arrays to improve vectorization potential.
|
||||||
|
TODO:
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
130
0313-Add-tracer-transformation-for-static-probabilities.patch
Normal file
130
0313-Add-tracer-transformation-for-static-probabilities.patch
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
From ed300a0b07e608efb756b623263f014c2cebdf08 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Egorov Ivan WX1280859 <egorov.ivan@huawei-partners.com>
|
||||||
|
Date: Tue, 26 Nov 2024 14:53:59 +0300
|
||||||
|
Subject: [PATCH 6/8] Add tracer transformation for static probabilities
|
||||||
|
|
||||||
|
---
|
||||||
|
gcc/common.opt | 4 ++++
|
||||||
|
gcc/opts.cc | 4 ++++
|
||||||
|
gcc/params.opt | 8 ++++++++
|
||||||
|
gcc/testsuite/gcc.dg/tracer-static-1.c | 28 ++++++++++++++++++++++++++
|
||||||
|
gcc/tracer.cc | 11 ++++++++++
|
||||||
|
5 files changed, 55 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.dg/tracer-static-1.c
|
||||||
|
|
||||||
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||||
|
index 96888cf1b..db35391c3 100644
|
||||||
|
--- a/gcc/common.opt
|
||||||
|
+++ b/gcc/common.opt
|
||||||
|
@@ -2990,6 +2990,10 @@ ftracer
|
||||||
|
Common Var(flag_tracer) Optimization
|
||||||
|
Perform superblock formation via tail duplication.
|
||||||
|
|
||||||
|
+ftracer-static
|
||||||
|
+Common Var(flag_tracer_static) Init(0) Optimization
|
||||||
|
+Perform superblock formation via tail duplication for a given bb size.
|
||||||
|
+
|
||||||
|
ftrampolines
|
||||||
|
Common Var(flag_trampolines) Init(0)
|
||||||
|
For targets that normally need trampolines for nested functions, always
|
||||||
|
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||||
|
index 84dd8925a..34b84db8f 100644
|
||||||
|
--- a/gcc/opts.cc
|
||||||
|
+++ b/gcc/opts.cc
|
||||||
|
@@ -3180,6 +3180,10 @@ common_handle_option (struct gcc_options *opts,
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
+ case OPT_ftracer_static:
|
||||||
|
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_tracer, true);
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
case OPT_ftree_vectorize:
|
||||||
|
/* Automatically sets -ftree-loop-vectorize and
|
||||||
|
-ftree-slp-vectorize. Nothing more to do here. */
|
||||||
|
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||||
|
index bb4dc1825..e5472dfc8 100644
|
||||||
|
--- a/gcc/params.opt
|
||||||
|
+++ b/gcc/params.opt
|
||||||
|
@@ -1116,6 +1116,10 @@ The percentage of function, weighted by execution frequency, that must be covere
|
||||||
|
Common Joined UInteger Var(param_tracer_max_code_growth) Init(100) Param Optimization
|
||||||
|
Maximal code growth caused by tail duplication (in percent).
|
||||||
|
|
||||||
|
+-param=tracer-max-not-covered-insns-num=
|
||||||
|
+Common Joined UInteger Var(param_tracer_max_not_covered_insns_num) Init(12) Param Optimization
|
||||||
|
+Maximal number of instructions in the block, that must not be covered by trace formation.
|
||||||
|
+
|
||||||
|
-param=tracer-min-branch-probability=
|
||||||
|
Common Joined UInteger Var(param_tracer_min_branch_probability) Init(50) IntegerRange(0, 100) Param Optimization
|
||||||
|
Stop forward growth if the probability of best edge is less than this threshold (in percent). Used when profile feedback is not available.
|
||||||
|
@@ -1128,6 +1132,10 @@ Stop forward growth if the probability of best edge is less than this threshold
|
||||||
|
Common Joined UInteger Var(param_tracer_min_branch_ratio) Init(10) IntegerRange(0, 100) Param Optimization
|
||||||
|
Stop reverse growth if the reverse probability of best edge is less than this threshold (in percent).
|
||||||
|
|
||||||
|
+-param=tracer-min-not-covered-insns-num=
|
||||||
|
+Common Joined UInteger Var(param_tracer_min_not_covered_insns_num) Init(1) Param Optimization
|
||||||
|
+Minimal number of instructions in the block, that must not be covered by trace formation.
|
||||||
|
+
|
||||||
|
-param=tree-reassoc-width=
|
||||||
|
Common Joined UInteger Var(param_tree_reassoc_width) Param Optimization
|
||||||
|
Set the maximum number of instructions executed in parallel in reassociated tree. If 0, use the target dependent heuristic.
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/tracer-static-1.c b/gcc/testsuite/gcc.dg/tracer-static-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..76c863b48
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/tracer-static-1.c
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -ftracer-static -fdump-tree-tracer" } */
|
||||||
|
+
|
||||||
|
+static __attribute__ ((noinline)) int fib (int n)
|
||||||
|
+{
|
||||||
|
+ if (n < 3)
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ long long fib1 = 0, fib2 = 1;
|
||||||
|
+ long long currentFib = 0;
|
||||||
|
+
|
||||||
|
+ for (int i = 3; i <= n; ++i)
|
||||||
|
+ {
|
||||||
|
+ currentFib = fib1 + fib2;
|
||||||
|
+ fib1 = fib2;
|
||||||
|
+ fib2 = currentFib;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return currentFib;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int main (int argc, char** argv)
|
||||||
|
+{
|
||||||
|
+ int n = argc;
|
||||||
|
+ return fib (n);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times "BB\\d+ with n = \\d+ will not be covered by tracer formation" 4 "tracer" } } */
|
||||||
|
\ No newline at end of file
|
||||||
|
diff --git a/gcc/tracer.cc b/gcc/tracer.cc
|
||||||
|
index 4d054fe8f..9b1578cd4 100644
|
||||||
|
--- a/gcc/tracer.cc
|
||||||
|
+++ b/gcc/tracer.cc
|
||||||
|
@@ -304,6 +304,17 @@ tail_duplicate (void)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
analyze_bb (bb, &n);
|
||||||
|
+
|
||||||
|
+ if (flag_tracer_static && n >= param_tracer_min_not_covered_insns_num
|
||||||
|
+ && n <= param_tracer_max_not_covered_insns_num)
|
||||||
|
+ {
|
||||||
|
+ if (dump_file)
|
||||||
|
+ fprintf (dump_file,
|
||||||
|
+ "BB%d with n = %d will not be covered by tracer formation\n",
|
||||||
|
+ bb->index, n);
|
||||||
|
+ continue;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (!ignore_bb_p (bb))
|
||||||
|
blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
56
0314-bugfix-Modify-the-hip09-tune-flags.patch
Normal file
56
0314-bugfix-Modify-the-hip09-tune-flags.patch
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
From e94bf3e1ad12211ec037c9e04a1698e1ed16c87a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
||||||
|
Date: Tue, 3 Dec 2024 21:02:39 +0800
|
||||||
|
Subject: [PATCH 8/8] [bugfix] Modify the hip09 tune flags.
|
||||||
|
|
||||||
|
---
|
||||||
|
gcc/config/aarch64/aarch64-tuning-flags.def | 3 +++
|
||||||
|
gcc/config/aarch64/aarch64.cc | 11 +++++++++--
|
||||||
|
2 files changed, 12 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
|
||||||
|
index b4a8f99a6..293f6fb7e 100644
|
||||||
|
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
|
||||||
|
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
|
||||||
|
@@ -49,6 +49,9 @@ AARCH64_EXTRA_TUNING_OPTION ("no_ldp_combine", NO_LDP_COMBINE)
|
||||||
|
|
||||||
|
AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
|
||||||
|
|
||||||
|
+/* Prefer Advanced SIMD over SVE for auto-vectorization. */
|
||||||
|
+AARCH64_EXTRA_TUNING_OPTION ("prefer_advsimd_autovec", PREFER_ADVSIMD_AUTOVEC)
|
||||||
|
+
|
||||||
|
AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
|
||||||
|
|
||||||
|
AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
|
||||||
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||||
|
index 1d479f270..829e0da8f 100644
|
||||||
|
--- a/gcc/config/aarch64/aarch64.cc
|
||||||
|
+++ b/gcc/config/aarch64/aarch64.cc
|
||||||
|
@@ -1934,8 +1934,7 @@ static const struct tune_params hip09_tunings =
|
||||||
|
2, /* min_div_recip_mul_df. */
|
||||||
|
0, /* max_case_values. */
|
||||||
|
tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||||
|
- (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
|
||||||
|
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
|
||||||
|
+ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */
|
||||||
|
&hip09_prefetch_tune
|
||||||
|
};
|
||||||
|
|
||||||
|
@@ -20250,6 +20249,14 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||||||
|
SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||||
|
param_sched_autopref_queue_depth, queue_depth);
|
||||||
|
|
||||||
|
+ /* If the core wants only AdvancedSIMD autovectorization, do this through
|
||||||
|
+ aarch64_autovec_preference. If the user set it explicitly, they should
|
||||||
|
+ know what they want. */
|
||||||
|
+ if (aarch64_tune_params.extra_tuning_flags
|
||||||
|
+ & AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC)
|
||||||
|
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||||
|
+ aarch64_autovec_preference, 1);
|
||||||
|
+
|
||||||
|
/* If using Advanced SIMD only for autovectorization disable SVE vector costs
|
||||||
|
comparison. */
|
||||||
|
if (aarch64_autovec_preference == 1)
|
||||||
|
--
|
||||||
|
2.33.0
|
||||||
|
|
||||||
14
gcc.spec
14
gcc.spec
@ -2,7 +2,7 @@
|
|||||||
%global gcc_major 12
|
%global gcc_major 12
|
||||||
# Note, gcc_release must be integer, if you want to add suffixes to
|
# Note, gcc_release must be integer, if you want to add suffixes to
|
||||||
# %%{release}, append them after %%{gcc_release} on Release: line.
|
# %%{release}, append them after %%{gcc_release} on Release: line.
|
||||||
%global gcc_release 50
|
%global gcc_release 51
|
||||||
|
|
||||||
%global _unpackaged_files_terminate_build 0
|
%global _unpackaged_files_terminate_build 0
|
||||||
%global _performance_build 1
|
%global _performance_build 1
|
||||||
@ -417,6 +417,9 @@ Patch308: 0308-Fix-enum-INPUT-MIDDLE-FINAL-aes_stage.patch
|
|||||||
Patch309: 0309-CSPGO-Add-context-sensitive-PGO.patch
|
Patch309: 0309-CSPGO-Add-context-sensitive-PGO.patch
|
||||||
Patch310: 0310-CFGO-Add-cfgo-pgo-optimization.patch
|
Patch310: 0310-CFGO-Add-cfgo-pgo-optimization.patch
|
||||||
Patch311: 0311-PATCH-Add-if-split-optimization-pass.patch
|
Patch311: 0311-PATCH-Add-if-split-optimization-pass.patch
|
||||||
|
Patch312: 0312-Add-late-slp-vectorization-pass-with-additional-chec.patch
|
||||||
|
Patch313: 0313-Add-tracer-transformation-for-static-probabilities.patch
|
||||||
|
Patch314: 0314-bugfix-Modify-the-hip09-tune-flags.patch
|
||||||
|
|
||||||
# Part 1001-1999
|
# Part 1001-1999
|
||||||
%ifarch sw_64
|
%ifarch sw_64
|
||||||
@ -1514,6 +1517,9 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
|||||||
%patch -P309 -p1
|
%patch -P309 -p1
|
||||||
%patch -P310 -p1
|
%patch -P310 -p1
|
||||||
%patch -P311 -p1
|
%patch -P311 -p1
|
||||||
|
%patch -P312 -p1
|
||||||
|
%patch -P313 -p1
|
||||||
|
%patch -P314 -p1
|
||||||
|
|
||||||
%ifarch sw_64
|
%ifarch sw_64
|
||||||
%patch -P1001 -p1
|
%patch -P1001 -p1
|
||||||
@ -4135,6 +4141,12 @@ end
|
|||||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||||
|
|
||||||
%changelog
|
%changelog
|
||||||
|
* Wed Dec 04 2024 Mingchuan Wu <wumingchuan1992@foxmail.com> - 12.3.1-51
|
||||||
|
- Type:Sync
|
||||||
|
- ID:NA
|
||||||
|
- SUG:NA
|
||||||
|
- DESC:Sync [late-SLP], [tracer-opt], [bugfix4hip09] patches from openeuler/gcc.
|
||||||
|
|
||||||
* Thu Nov 28 2024 swcompiler <lc@wxiat.com> - 12.3.1-50
|
* Thu Nov 28 2024 swcompiler <lc@wxiat.com> - 12.3.1-50
|
||||||
- Type: Sw64
|
- Type: Sw64
|
||||||
- DESC:
|
- DESC:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user