418 lines
16 KiB
Diff
418 lines
16 KiB
Diff
From a8f10b4b73c2624599765edf7ff19d53eca15135 Mon Sep 17 00:00:00 2001
|
||
From: Tamar Christina <tamar.christina@arm.com>
|
||
Date: Mon, 12 Dec 2022 15:16:50 +0000
|
||
Subject: [PATCH 146/157] [Backport][SME] middle-end: Add new tbranch optab to
|
||
add support for bit-test-and-branch operations
|
||
|
||
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc582d2ef32e2d3723c68d111f4e49607631f34d
|
||
|
||
This adds a new test-and-branch optab that can be used to do a conditional test
|
||
of a bit and branch. This is similar to the cbranch optab but instead can
|
||
test any arbitrary bit inside the register.
|
||
|
||
This patch recognizes boolean comparisons and single bit mask tests.
|
||
|
||
gcc/ChangeLog:
|
||
|
||
* dojump.cc (do_jump): Pass along value.
|
||
(do_jump_by_parts_greater_rtx): Likewise.
|
||
(do_jump_by_parts_zero_rtx): Likewise.
|
||
(do_jump_by_parts_equality_rtx): Likewise.
|
||
(do_compare_rtx_and_jump): Likewise.
|
||
(do_compare_and_jump): Likewise.
|
||
* dojump.h (do_compare_rtx_and_jump): New.
|
||
* optabs.cc (emit_cmp_and_jump_insn_1): Refactor to take optab to check.
|
||
(validate_test_and_branch): New.
|
||
(emit_cmp_and_jump_insns): Optiobally take a value, and when value is
|
||
supplied then check if it's suitable for tbranch.
|
||
* optabs.def (tbranch_eq$a4, tbranch_ne$a4): New.
|
||
* doc/md.texi (tbranch_@var{op}@var{mode}4): Document it.
|
||
* optabs.h (emit_cmp_and_jump_insns): New.
|
||
* tree.h (tree_zero_one_valued_p): New.
|
||
---
|
||
gcc/doc/md.texi | 7 +++
|
||
gcc/dojump.cc | 52 +++++++++++++++-------
|
||
gcc/dojump.h | 4 ++
|
||
gcc/optabs.cc | 114 ++++++++++++++++++++++++++++++++++++++++++++----
|
||
gcc/optabs.def | 2 +
|
||
gcc/optabs.h | 4 ++
|
||
gcc/tree.h | 1 +
|
||
7 files changed, 159 insertions(+), 25 deletions(-)
|
||
|
||
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
|
||
index c0cf0ec64..2193900e7 100644
|
||
--- a/gcc/doc/md.texi
|
||
+++ b/gcc/doc/md.texi
|
||
@@ -7299,6 +7299,13 @@ case, you can and should make operand 1's predicate reject some operators
|
||
in the @samp{cstore@var{mode}4} pattern, or remove the pattern altogether
|
||
from the machine description.
|
||
|
||
+@cindex @code{tbranch_@var{op}@var{mode}3} instruction pattern
|
||
+@item @samp{tbranch_@var{op}@var{mode}3}
|
||
+Conditional branch instruction combined with a bit test-and-compare
|
||
+instruction. Operand 0 is the operand of the comparison. Operand 1 is the bit
|
||
+position of Operand 1 to test. Operand 3 is the @code{code_label} to jump to.
|
||
+@var{op} is one of @var{eq} or @var{ne}.
|
||
+
|
||
@cindex @code{cbranch@var{mode}4} instruction pattern
|
||
@item @samp{cbranch@var{mode}4}
|
||
Conditional branch instruction combined with a compare instruction.
|
||
diff --git a/gcc/dojump.cc b/gcc/dojump.cc
|
||
index 0c880d653..604b28537 100644
|
||
--- a/gcc/dojump.cc
|
||
+++ b/gcc/dojump.cc
|
||
@@ -621,7 +621,7 @@ do_jump (tree exp, rtx_code_label *if_false_label,
|
||
}
|
||
do_compare_rtx_and_jump (temp, CONST0_RTX (GET_MODE (temp)),
|
||
NE, TYPE_UNSIGNED (TREE_TYPE (exp)),
|
||
- GET_MODE (temp), NULL_RTX,
|
||
+ exp, GET_MODE (temp), NULL_RTX,
|
||
if_false_label, if_true_label, prob);
|
||
}
|
||
|
||
@@ -689,7 +689,7 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
|
||
|
||
/* All but high-order word must be compared as unsigned. */
|
||
do_compare_rtx_and_jump (op0_word, op1_word, code, (unsignedp || i > 0),
|
||
- word_mode, NULL_RTX, NULL, if_true_label,
|
||
+ NULL, word_mode, NULL_RTX, NULL, if_true_label,
|
||
prob);
|
||
|
||
/* Emit only one comparison for 0. Do not emit the last cond jump. */
|
||
@@ -697,8 +697,8 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
|
||
break;
|
||
|
||
/* Consider lower words only if these are equal. */
|
||
- do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, word_mode,
|
||
- NULL_RTX, NULL, if_false_label,
|
||
+ do_compare_rtx_and_jump (op0_word, op1_word, NE, unsignedp, NULL,
|
||
+ word_mode, NULL_RTX, NULL, if_false_label,
|
||
prob.invert ());
|
||
}
|
||
|
||
@@ -757,7 +757,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
|
||
|
||
if (part != 0)
|
||
{
|
||
- do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, word_mode,
|
||
+ do_compare_rtx_and_jump (part, const0_rtx, EQ, 1, NULL, word_mode,
|
||
NULL_RTX, if_false_label, if_true_label, prob);
|
||
return;
|
||
}
|
||
@@ -768,7 +768,7 @@ do_jump_by_parts_zero_rtx (scalar_int_mode mode, rtx op0,
|
||
|
||
for (i = 0; i < nwords; i++)
|
||
do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
|
||
- const0_rtx, EQ, 1, word_mode, NULL_RTX,
|
||
+ const0_rtx, EQ, 1, NULL, word_mode, NULL_RTX,
|
||
if_false_label, NULL, prob);
|
||
|
||
if (if_true_label)
|
||
@@ -811,8 +811,8 @@ do_jump_by_parts_equality_rtx (scalar_int_mode mode, rtx op0, rtx op1,
|
||
|
||
for (i = 0; i < nwords; i++)
|
||
do_compare_rtx_and_jump (operand_subword_force (op0, i, mode),
|
||
- operand_subword_force (op1, i, mode),
|
||
- EQ, 0, word_mode, NULL_RTX,
|
||
+ operand_subword_force (op1, i, mode),
|
||
+ EQ, 0, NULL, word_mode, NULL_RTX,
|
||
if_false_label, NULL, prob);
|
||
|
||
if (if_true_label)
|
||
@@ -964,6 +964,23 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
|
||
rtx_code_label *if_false_label,
|
||
rtx_code_label *if_true_label,
|
||
profile_probability prob)
|
||
+{
|
||
+ do_compare_rtx_and_jump (op0, op1, code, unsignedp, NULL, mode, size,
|
||
+ if_false_label, if_true_label, prob);
|
||
+}
|
||
+
|
||
+/* Like do_compare_and_jump but expects the values to compare as two rtx's.
|
||
+ The decision as to signed or unsigned comparison must be made by the caller.
|
||
+
|
||
+ If MODE is BLKmode, SIZE is an RTX giving the size of the objects being
|
||
+ compared. */
|
||
+
|
||
+void
|
||
+do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
|
||
+ tree val, machine_mode mode, rtx size,
|
||
+ rtx_code_label *if_false_label,
|
||
+ rtx_code_label *if_true_label,
|
||
+ profile_probability prob)
|
||
{
|
||
rtx tem;
|
||
rtx_code_label *dummy_label = NULL;
|
||
@@ -1179,8 +1196,10 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
|
||
}
|
||
else
|
||
dest_label = if_false_label;
|
||
- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
|
||
- size, dest_label, NULL, first_prob);
|
||
+
|
||
+ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
|
||
+ val, mode, size, dest_label, NULL,
|
||
+ first_prob);
|
||
}
|
||
/* For !and_them we want to split:
|
||
if (x) goto t; // prob;
|
||
@@ -1194,8 +1213,9 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
|
||
else
|
||
{
|
||
profile_probability first_prob = prob.split (cprob);
|
||
- do_compare_rtx_and_jump (op0, op1, first_code, unsignedp, mode,
|
||
- size, NULL, if_true_label, first_prob);
|
||
+ do_compare_rtx_and_jump (op0, op1, first_code, unsignedp,
|
||
+ val, mode, size, NULL,
|
||
+ if_true_label, first_prob);
|
||
if (orig_code == NE && can_compare_p (UNEQ, mode, ccp_jump))
|
||
{
|
||
/* x != y can be split into x unord y || x ltgt y
|
||
@@ -1217,7 +1237,7 @@ do_compare_rtx_and_jump (rtx op0, rtx op1, enum rtx_code code, int unsignedp,
|
||
}
|
||
}
|
||
|
||
- emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp,
|
||
+ emit_cmp_and_jump_insns (op0, op1, code, size, mode, unsignedp, val,
|
||
if_true_label, prob);
|
||
}
|
||
|
||
@@ -1291,9 +1311,9 @@ do_compare_and_jump (tree treeop0, tree treeop1, enum rtx_code signed_code,
|
||
op1 = new_op1;
|
||
}
|
||
|
||
- do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode,
|
||
- ((mode == BLKmode)
|
||
- ? expr_size (treeop0) : NULL_RTX),
|
||
+ do_compare_rtx_and_jump (op0, op1, code, unsignedp, treeop0, mode,
|
||
+ ((mode == BLKmode)
|
||
+ ? expr_size (treeop0) : NULL_RTX),
|
||
if_false_label, if_true_label, prob);
|
||
}
|
||
|
||
diff --git a/gcc/dojump.h b/gcc/dojump.h
|
||
index e379cceb3..d1d79c490 100644
|
||
--- a/gcc/dojump.h
|
||
+++ b/gcc/dojump.h
|
||
@@ -71,6 +71,10 @@ extern void jumpifnot (tree exp, rtx_code_label *label,
|
||
extern void jumpifnot_1 (enum tree_code, tree, tree, rtx_code_label *,
|
||
profile_probability);
|
||
|
||
+extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int, tree,
|
||
+ machine_mode, rtx, rtx_code_label *,
|
||
+ rtx_code_label *, profile_probability);
|
||
+
|
||
extern void do_compare_rtx_and_jump (rtx, rtx, enum rtx_code, int,
|
||
machine_mode, rtx, rtx_code_label *,
|
||
rtx_code_label *, profile_probability);
|
||
diff --git a/gcc/optabs.cc b/gcc/optabs.cc
|
||
index 3d8fa3abd..b441137de 100644
|
||
--- a/gcc/optabs.cc
|
||
+++ b/gcc/optabs.cc
|
||
@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3. If not see
|
||
#include "libfuncs.h"
|
||
#include "internal-fn.h"
|
||
#include "langhooks.h"
|
||
+#include "gimple.h"
|
||
+#include "ssa.h"
|
||
|
||
static void prepare_float_lib_cmp (rtx, rtx, enum rtx_code, rtx *,
|
||
machine_mode *);
|
||
@@ -4621,7 +4623,8 @@ prepare_operand (enum insn_code icode, rtx x, int opnum, machine_mode mode,
|
||
|
||
static void
|
||
emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
|
||
- profile_probability prob)
|
||
+ direct_optab cmp_optab, profile_probability prob,
|
||
+ bool test_branch)
|
||
{
|
||
machine_mode optab_mode;
|
||
enum mode_class mclass;
|
||
@@ -4630,12 +4633,17 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
|
||
|
||
mclass = GET_MODE_CLASS (mode);
|
||
optab_mode = (mclass == MODE_CC) ? CCmode : mode;
|
||
- icode = optab_handler (cbranch_optab, optab_mode);
|
||
+ icode = optab_handler (cmp_optab, optab_mode);
|
||
|
||
gcc_assert (icode != CODE_FOR_nothing);
|
||
- gcc_assert (insn_operand_matches (icode, 0, test));
|
||
- insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
|
||
- XEXP (test, 1), label));
|
||
+ gcc_assert (test_branch || insn_operand_matches (icode, 0, test));
|
||
+ if (test_branch)
|
||
+ insn = emit_jump_insn (GEN_FCN (icode) (XEXP (test, 0),
|
||
+ XEXP (test, 1), label));
|
||
+ else
|
||
+ insn = emit_jump_insn (GEN_FCN (icode) (test, XEXP (test, 0),
|
||
+ XEXP (test, 1), label));
|
||
+
|
||
if (prob.initialized_p ()
|
||
&& profile_status_for_fn (cfun) != PROFILE_ABSENT
|
||
&& insn
|
||
@@ -4645,6 +4653,68 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
|
||
add_reg_br_prob_note (insn, prob);
|
||
}
|
||
|
||
+/* PTEST points to a comparison that compares its first operand with zero.
|
||
+ Check to see if it can be performed as a bit-test-and-branch instead.
|
||
+ On success, return the instruction that performs the bit-test-and-branch
|
||
+ and replace the second operand of *PTEST with the bit number to test.
|
||
+ On failure, return CODE_FOR_nothing and leave *PTEST unchanged.
|
||
+
|
||
+ Note that the comparison described by *PTEST should not be taken
|
||
+ literally after a successful return. *PTEST is just a convenient
|
||
+ place to store the two operands of the bit-and-test.
|
||
+
|
||
+ VAL must contain the original tree expression for the first operand
|
||
+ of *PTEST. */
|
||
+
|
||
+static enum insn_code
|
||
+validate_test_and_branch (tree val, rtx *ptest, machine_mode *pmode, optab *res)
|
||
+{
|
||
+ if (!val || TREE_CODE (val) != SSA_NAME)
|
||
+ return CODE_FOR_nothing;
|
||
+
|
||
+ machine_mode mode = TYPE_MODE (TREE_TYPE (val));
|
||
+ rtx test = *ptest;
|
||
+ direct_optab optab;
|
||
+
|
||
+ if (GET_CODE (test) == EQ)
|
||
+ optab = tbranch_eq_optab;
|
||
+ else if (GET_CODE (test) == NE)
|
||
+ optab = tbranch_ne_optab;
|
||
+ else
|
||
+ return CODE_FOR_nothing;
|
||
+
|
||
+ *res = optab;
|
||
+
|
||
+ /* If the target supports the testbit comparison directly, great. */
|
||
+ auto icode = direct_optab_handler (optab, mode);
|
||
+ if (icode == CODE_FOR_nothing)
|
||
+ return icode;
|
||
+
|
||
+ if (tree_zero_one_valued_p (val))
|
||
+ {
|
||
+ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 : 0;
|
||
+ XEXP (test, 1) = gen_int_mode (pos, mode);
|
||
+ *ptest = test;
|
||
+ *pmode = mode;
|
||
+ return icode;
|
||
+ }
|
||
+
|
||
+ wide_int wcst = get_nonzero_bits (val);
|
||
+ if (wcst == -1)
|
||
+ return CODE_FOR_nothing;
|
||
+
|
||
+ int bitpos;
|
||
+
|
||
+ if ((bitpos = wi::exact_log2 (wcst)) == -1)
|
||
+ return CODE_FOR_nothing;
|
||
+
|
||
+ auto pos = BITS_BIG_ENDIAN ? GET_MODE_BITSIZE (mode) - 1 - bitpos : bitpos;
|
||
+ XEXP (test, 1) = gen_int_mode (pos, mode);
|
||
+ *ptest = test;
|
||
+ *pmode = mode;
|
||
+ return icode;
|
||
+}
|
||
+
|
||
/* Generate code to compare X with Y so that the condition codes are
|
||
set and to jump to LABEL if the condition is true. If X is a
|
||
constant and Y is not a constant, then the comparison is swapped to
|
||
@@ -4662,11 +4732,13 @@ emit_cmp_and_jump_insn_1 (rtx test, machine_mode mode, rtx label,
|
||
It will be potentially converted into an unsigned variant based on
|
||
UNSIGNEDP to select a proper jump instruction.
|
||
|
||
- PROB is the probability of jumping to LABEL. */
|
||
+ PROB is the probability of jumping to LABEL. If the comparison is against
|
||
+ zero then VAL contains the expression from which the non-zero RTL is
|
||
+ derived. */
|
||
|
||
void
|
||
emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
|
||
- machine_mode mode, int unsignedp, rtx label,
|
||
+ machine_mode mode, int unsignedp, tree val, rtx label,
|
||
profile_probability prob)
|
||
{
|
||
rtx op0 = x, op1 = y;
|
||
@@ -4691,10 +4763,34 @@ emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
|
||
|
||
prepare_cmp_insn (op0, op1, comparison, size, unsignedp, OPTAB_LIB_WIDEN,
|
||
&test, &mode);
|
||
- emit_cmp_and_jump_insn_1 (test, mode, label, prob);
|
||
+
|
||
+ /* Check if we're comparing a truth type with 0, and if so check if
|
||
+ the target supports tbranch. */
|
||
+ machine_mode tmode = mode;
|
||
+ direct_optab optab;
|
||
+ if (op1 == CONST0_RTX (GET_MODE (op1))
|
||
+ && validate_test_and_branch (val, &test, &tmode,
|
||
+ &optab) != CODE_FOR_nothing)
|
||
+ {
|
||
+ emit_cmp_and_jump_insn_1 (test, tmode, label, optab, prob, true);
|
||
+ return;
|
||
+ }
|
||
+
|
||
+ emit_cmp_and_jump_insn_1 (test, mode, label, cbranch_optab, prob, false);
|
||
}
|
||
|
||
-
|
||
+/* Overloaded version of emit_cmp_and_jump_insns in which VAL is unknown. */
|
||
+
|
||
+void
|
||
+emit_cmp_and_jump_insns (rtx x, rtx y, enum rtx_code comparison, rtx size,
|
||
+ machine_mode mode, int unsignedp, rtx label,
|
||
+ profile_probability prob)
|
||
+{
|
||
+ emit_cmp_and_jump_insns (x, y, comparison, size, mode, unsignedp, NULL,
|
||
+ label, prob);
|
||
+}
|
||
+
|
||
+
|
||
/* Emit a library call comparison between floating point X and Y.
|
||
COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.). */
|
||
|
||
diff --git a/gcc/optabs.def b/gcc/optabs.def
|
||
index 801310eba..dbf529434 100644
|
||
--- a/gcc/optabs.def
|
||
+++ b/gcc/optabs.def
|
||
@@ -220,6 +220,8 @@ OPTAB_D (reload_in_optab, "reload_in$a")
|
||
OPTAB_D (reload_out_optab, "reload_out$a")
|
||
|
||
OPTAB_DC(cbranch_optab, "cbranch$a4", COMPARE)
|
||
+OPTAB_D (tbranch_eq_optab, "tbranch_eq$a3")
|
||
+OPTAB_D (tbranch_ne_optab, "tbranch_ne$a3")
|
||
OPTAB_D (addcc_optab, "add$acc")
|
||
OPTAB_D (negcc_optab, "neg$acc")
|
||
OPTAB_D (notcc_optab, "not$acc")
|
||
diff --git a/gcc/optabs.h b/gcc/optabs.h
|
||
index cfd7c742d..cd55604bc 100644
|
||
--- a/gcc/optabs.h
|
||
+++ b/gcc/optabs.h
|
||
@@ -268,6 +268,10 @@ extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
|
||
machine_mode, int, rtx,
|
||
profile_probability prob
|
||
= profile_probability::uninitialized ());
|
||
+extern void emit_cmp_and_jump_insns (rtx, rtx, enum rtx_code, rtx,
|
||
+ machine_mode, int, tree, rtx,
|
||
+ profile_probability prob
|
||
+ = profile_probability::uninitialized ());
|
||
|
||
/* Generate code to indirectly jump to a location given in the rtx LOC. */
|
||
extern void emit_indirect_jump (rtx);
|
||
diff --git a/gcc/tree.h b/gcc/tree.h
|
||
index 3ff7732dc..07af584d6 100644
|
||
--- a/gcc/tree.h
|
||
+++ b/gcc/tree.h
|
||
@@ -4627,6 +4627,7 @@ extern tree signed_or_unsigned_type_for (int, tree);
|
||
extern tree signed_type_for (tree);
|
||
extern tree unsigned_type_for (tree);
|
||
extern bool is_truth_type_for (tree, tree);
|
||
+extern bool tree_zero_one_valued_p (tree);
|
||
extern tree truth_type_for (tree);
|
||
extern tree build_pointer_type_for_mode (tree, machine_mode, bool);
|
||
extern tree build_pointer_type (tree);
|
||
--
|
||
2.33.0
|
||
|