202 lines
6.6 KiB
Diff
202 lines
6.6 KiB
Diff
|
|
From 31433584b018cb2dc81e2366351a57bf5e1c4e44 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Richard Sandiford <richard.sandiford@arm.com>
|
||
|
|
Date: Tue, 17 Oct 2023 23:45:43 +0100
|
||
|
|
Subject: [PATCH 103/157] [Backport][SME] Handle epilogues that contain jumps
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=aeb3f0436f8ae84e593eda9641fe4e6fdf0afb3e
|
||
|
|
|
||
|
|
The prologue/epilogue pass allows the prologue sequence to contain
|
||
|
|
jumps. The sequence is then partitioned into basic blocks using
|
||
|
|
find_many_sub_basic_blocks.
|
||
|
|
|
||
|
|
This patch treats epilogues in a similar way. Since only one block
|
||
|
|
might need to be split, the patch (re)introduces a find_sub_basic_blocks
|
||
|
|
routine to handle a single block.
|
||
|
|
|
||
|
|
The new routine hard-codes the assumption that split_block will chain
|
||
|
|
the new block immediately after the original block. The routine doesn't
|
||
|
|
try to replicate the fix for PR81030, since that was specific to
|
||
|
|
gimple->rtl expansion.
|
||
|
|
|
||
|
|
The patch is needed for follow-on aarch64 patches that add conditional
|
||
|
|
code to the epilogue. The tests are part of those patches.
|
||
|
|
|
||
|
|
gcc/
|
||
|
|
* cfgbuild.h (find_sub_basic_blocks): Declare.
|
||
|
|
* cfgbuild.cc (update_profile_for_new_sub_basic_block): New function,
|
||
|
|
split out from...
|
||
|
|
(find_many_sub_basic_blocks): ...here.
|
||
|
|
(find_sub_basic_blocks): New function.
|
||
|
|
* function.cc (thread_prologue_and_epilogue_insns): Handle
|
||
|
|
epilogues that contain jumps.
|
||
|
|
---
|
||
|
|
gcc/cfgbuild.cc | 95 +++++++++++++++++++++++++++++++++----------------
|
||
|
|
gcc/cfgbuild.h | 1 +
|
||
|
|
gcc/function.cc | 4 +++
|
||
|
|
3 files changed, 70 insertions(+), 30 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/gcc/cfgbuild.cc b/gcc/cfgbuild.cc
|
||
|
|
index 646a06614..58b865f29 100644
|
||
|
|
--- a/gcc/cfgbuild.cc
|
||
|
|
+++ b/gcc/cfgbuild.cc
|
||
|
|
@@ -693,6 +693,43 @@ compute_outgoing_frequencies (basic_block b)
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Update the profile information for BB, which was created by splitting
|
||
|
|
+ an RTL block that had a non-final jump. */
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+update_profile_for_new_sub_basic_block (basic_block bb)
|
||
|
|
+{
|
||
|
|
+ edge e;
|
||
|
|
+ edge_iterator ei;
|
||
|
|
+
|
||
|
|
+ bool initialized_src = false, uninitialized_src = false;
|
||
|
|
+ bb->count = profile_count::zero ();
|
||
|
|
+ FOR_EACH_EDGE (e, ei, bb->preds)
|
||
|
|
+ {
|
||
|
|
+ if (e->count ().initialized_p ())
|
||
|
|
+ {
|
||
|
|
+ bb->count += e->count ();
|
||
|
|
+ initialized_src = true;
|
||
|
|
+ }
|
||
|
|
+ else
|
||
|
|
+ uninitialized_src = true;
|
||
|
|
+ }
|
||
|
|
+ /* When some edges are missing with read profile, this is
|
||
|
|
+ most likely because RTL expansion introduced loop.
|
||
|
|
+ When profile is guessed we may have BB that is reachable
|
||
|
|
+ from unlikely path as well as from normal path.
|
||
|
|
+
|
||
|
|
+ TODO: We should handle loops created during BB expansion
|
||
|
|
+ correctly here. For now we assume all those loop to cycle
|
||
|
|
+ precisely once. */
|
||
|
|
+ if (!initialized_src
|
||
|
|
+ || (uninitialized_src
|
||
|
|
+ && profile_status_for_fn (cfun) < PROFILE_GUESSED))
|
||
|
|
+ bb->count = profile_count::uninitialized ();
|
||
|
|
+
|
||
|
|
+ compute_outgoing_frequencies (bb);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Assume that some pass has inserted labels or control flow
|
||
|
|
instructions within a basic block. Split basic blocks as needed
|
||
|
|
and create edges. */
|
||
|
|
@@ -744,40 +781,15 @@ find_many_sub_basic_blocks (sbitmap blocks)
|
||
|
|
if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
|
||
|
|
FOR_BB_BETWEEN (bb, min, max->next_bb, next_bb)
|
||
|
|
{
|
||
|
|
- edge e;
|
||
|
|
- edge_iterator ei;
|
||
|
|
-
|
||
|
|
if (STATE (bb) == BLOCK_ORIGINAL)
|
||
|
|
continue;
|
||
|
|
if (STATE (bb) == BLOCK_NEW)
|
||
|
|
{
|
||
|
|
- bool initialized_src = false, uninitialized_src = false;
|
||
|
|
- bb->count = profile_count::zero ();
|
||
|
|
- FOR_EACH_EDGE (e, ei, bb->preds)
|
||
|
|
- {
|
||
|
|
- if (e->count ().initialized_p ())
|
||
|
|
- {
|
||
|
|
- bb->count += e->count ();
|
||
|
|
- initialized_src = true;
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- uninitialized_src = true;
|
||
|
|
- }
|
||
|
|
- /* When some edges are missing with read profile, this is
|
||
|
|
- most likely because RTL expansion introduced loop.
|
||
|
|
- When profile is guessed we may have BB that is reachable
|
||
|
|
- from unlikely path as well as from normal path.
|
||
|
|
-
|
||
|
|
- TODO: We should handle loops created during BB expansion
|
||
|
|
- correctly here. For now we assume all those loop to cycle
|
||
|
|
- precisely once. */
|
||
|
|
- if (!initialized_src
|
||
|
|
- || (uninitialized_src
|
||
|
|
- && profile_status_for_fn (cfun) < PROFILE_GUESSED))
|
||
|
|
- bb->count = profile_count::uninitialized ();
|
||
|
|
+ update_profile_for_new_sub_basic_block (bb);
|
||
|
|
+ continue;
|
||
|
|
}
|
||
|
|
- /* If nothing changed, there is no need to create new BBs. */
|
||
|
|
- else if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
|
||
|
|
+ /* If nothing changed, there is no need to create new BBs. */
|
||
|
|
+ if (EDGE_COUNT (bb->succs) == n_succs[bb->index])
|
||
|
|
{
|
||
|
|
/* In rare occassions RTL expansion might have mistakely assigned
|
||
|
|
a probabilities different from what is in CFG. This happens
|
||
|
|
@@ -788,10 +800,33 @@ find_many_sub_basic_blocks (sbitmap blocks)
|
||
|
|
update_br_prob_note (bb);
|
||
|
|
continue;
|
||
|
|
}
|
||
|
|
-
|
||
|
|
compute_outgoing_frequencies (bb);
|
||
|
|
}
|
||
|
|
|
||
|
|
FOR_EACH_BB_FN (bb, cfun)
|
||
|
|
SET_STATE (bb, 0);
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+/* Like find_many_sub_basic_blocks, but look only within BB. */
|
||
|
|
+
|
||
|
|
+void
|
||
|
|
+find_sub_basic_blocks (basic_block bb)
|
||
|
|
+{
|
||
|
|
+ basic_block end_bb = bb->next_bb;
|
||
|
|
+ find_bb_boundaries (bb);
|
||
|
|
+ if (bb->next_bb == end_bb)
|
||
|
|
+ return;
|
||
|
|
+
|
||
|
|
+ /* Re-scan and wire in all edges. This expects simple (conditional)
|
||
|
|
+ jumps at the end of each new basic blocks. */
|
||
|
|
+ make_edges (bb, end_bb->prev_bb, 1);
|
||
|
|
+
|
||
|
|
+ /* Update branch probabilities. Expect only (un)conditional jumps
|
||
|
|
+ to be created with only the forward edges. */
|
||
|
|
+ if (profile_status_for_fn (cfun) != PROFILE_ABSENT)
|
||
|
|
+ {
|
||
|
|
+ compute_outgoing_frequencies (bb);
|
||
|
|
+ for (bb = bb->next_bb; bb != end_bb; bb = bb->next_bb)
|
||
|
|
+ update_profile_for_new_sub_basic_block (bb);
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/cfgbuild.h b/gcc/cfgbuild.h
|
||
|
|
index 85145da7f..53543bb75 100644
|
||
|
|
--- a/gcc/cfgbuild.h
|
||
|
|
+++ b/gcc/cfgbuild.h
|
||
|
|
@@ -24,5 +24,6 @@ extern bool inside_basic_block_p (const rtx_insn *);
|
||
|
|
extern bool control_flow_insn_p (const rtx_insn *);
|
||
|
|
extern void rtl_make_eh_edge (sbitmap, basic_block, rtx);
|
||
|
|
extern void find_many_sub_basic_blocks (sbitmap);
|
||
|
|
+extern void find_sub_basic_blocks (basic_block);
|
||
|
|
|
||
|
|
#endif /* GCC_CFGBUILD_H */
|
||
|
|
diff --git a/gcc/function.cc b/gcc/function.cc
|
||
|
|
index ddab43ca4..f4fc211a0 100644
|
||
|
|
--- a/gcc/function.cc
|
||
|
|
+++ b/gcc/function.cc
|
||
|
|
@@ -6126,6 +6126,8 @@ thread_prologue_and_epilogue_insns (void)
|
||
|
|
&& returnjump_p (BB_END (e->src)))
|
||
|
|
e->flags &= ~EDGE_FALLTHRU;
|
||
|
|
}
|
||
|
|
+
|
||
|
|
+ find_sub_basic_blocks (BLOCK_FOR_INSN (epilogue_seq));
|
||
|
|
}
|
||
|
|
else if (next_active_insn (BB_END (exit_fallthru_edge->src)))
|
||
|
|
{
|
||
|
|
@@ -6234,6 +6236,8 @@ thread_prologue_and_epilogue_insns (void)
|
||
|
|
set_insn_locations (seq, epilogue_location);
|
||
|
|
|
||
|
|
emit_insn_before (seq, insn);
|
||
|
|
+
|
||
|
|
+ find_sub_basic_blocks (BLOCK_FOR_INSN (insn));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|