251 lines
6.3 KiB
Diff
251 lines
6.3 KiB
Diff
|
|
From da06b276b6ae281efad2ec3b982e09b1f4015917 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Tamar Christina <tamar.christina@arm.com>
|
||
|
|
Date: Mon, 12 Dec 2022 15:18:56 +0000
|
||
|
|
Subject: [PATCH 082/157] [Backport][SME] AArch64: Support new tbranch optab.
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=17ae956c0fa6baac3d22764019d5dd5ebf5c2b11
|
||
|
|
|
||
|
|
This implements the new tbranch optab for AArch64.
|
||
|
|
|
||
|
|
we cannot emit one big RTL for the final instruction immediately.
|
||
|
|
The reason that all comparisons in the AArch64 backend expand to separate CC
|
||
|
|
compares, and separate testing of the operands is for ifcvt.
|
||
|
|
|
||
|
|
The separate CC compare is needed so ifcvt can produce csel, cset etc from the
|
||
|
|
compares. Unlike say combine, ifcvt can not do recog on a parallel with a
|
||
|
|
clobber. Should we emit the instruction directly then ifcvt will not be able
|
||
|
|
to say, make a csel, because we have no patterns which handle zero_extract and
|
||
|
|
compare. (unlike combine ifcvt cannot transform the extract into an AND).
|
||
|
|
|
||
|
|
While you could provide various patterns for this (and I did try) you end up
|
||
|
|
with broken patterns because you can't add the clobber to the CC register. If
|
||
|
|
you do, ifcvt recog fails.
|
||
|
|
|
||
|
|
i.e.
|
||
|
|
|
||
|
|
int
|
||
|
|
f1 (int x)
|
||
|
|
{
|
||
|
|
if (x & 1)
|
||
|
|
return 1;
|
||
|
|
return x;
|
||
|
|
}
|
||
|
|
|
||
|
|
We lose csel here.
|
||
|
|
|
||
|
|
Secondly the reason the compare with an explicit CC mode is needed is so that
|
||
|
|
ifcvt can transform the operation into a version that doesn't require the flags
|
||
|
|
to be set. But it only does so if it know the explicit usage of the CC reg.
|
||
|
|
|
||
|
|
For instance
|
||
|
|
|
||
|
|
int
|
||
|
|
foo (int a, int b)
|
||
|
|
{
|
||
|
|
return ((a & (1 << 25)) ? 5 : 4);
|
||
|
|
}
|
||
|
|
|
||
|
|
Doesn't require a comparison, the optimal form is:
|
||
|
|
|
||
|
|
foo(int, int):
|
||
|
|
ubfx x0, x0, 25, 1
|
||
|
|
add w0, w0, 4
|
||
|
|
ret
|
||
|
|
|
||
|
|
and no compare is actually needed. If you represent the instruction using an
|
||
|
|
ANDS instead of a zero_extract then you get close, but you end up with an ands
|
||
|
|
followed by an add, which is a slower operation.
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
* config/aarch64/aarch64.md (*tb<optab><mode>1): Rename to...
|
||
|
|
(*tb<optab><ALLI:mode><GPI:mode>1): ... this.
|
||
|
|
(tbranch_<code><mode>4): New.
|
||
|
|
* config/aarch64/iterators.md(ZEROM, zerom): New.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog:
|
||
|
|
|
||
|
|
* gcc.target/aarch64/tbz_1.c: New test.
|
||
|
|
---
|
||
|
|
gcc/config/aarch64/aarch64.md | 33 ++++++--
|
||
|
|
gcc/config/aarch64/iterators.md | 2 +
|
||
|
|
gcc/testsuite/gcc.target/aarch64/tbz_1.c | 95 ++++++++++++++++++++++++
|
||
|
|
3 files changed, 122 insertions(+), 8 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||
|
|
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||
|
|
index 079c8a3f9..2becc888e 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.md
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.md
|
||
|
|
@@ -953,12 +953,29 @@
|
||
|
|
(const_int 1)))]
|
||
|
|
)
|
||
|
|
|
||
|
|
-(define_insn "*tb<optab><mode>1"
|
||
|
|
+(define_expand "tbranch_<code><mode>3"
|
||
|
|
[(set (pc) (if_then_else
|
||
|
|
- (EQL (zero_extract:DI (match_operand:GPI 0 "register_operand" "r")
|
||
|
|
- (const_int 1)
|
||
|
|
- (match_operand 1
|
||
|
|
- "aarch64_simd_shift_imm_<mode>" "n"))
|
||
|
|
+ (EQL (match_operand:ALLI 0 "register_operand")
|
||
|
|
+ (match_operand 1 "aarch64_simd_shift_imm_<mode>"))
|
||
|
|
+ (label_ref (match_operand 2 ""))
|
||
|
|
+ (pc)))]
|
||
|
|
+ ""
|
||
|
|
+{
|
||
|
|
+ rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
|
||
|
|
+ rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
|
||
|
|
+ rtx val = GEN_INT (1UL << UINTVAL (operands[1]));
|
||
|
|
+ emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
|
||
|
|
+ operands[1] = const0_rtx;
|
||
|
|
+ operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
|
||
|
|
+ operands[1]);
|
||
|
|
+})
|
||
|
|
+
|
||
|
|
+(define_insn "*tb<optab><ALLI:mode><GPI:mode>1"
|
||
|
|
+ [(set (pc) (if_then_else
|
||
|
|
+ (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
|
||
|
|
+ (const_int 1)
|
||
|
|
+ (match_operand 1
|
||
|
|
+ "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
|
||
|
|
(const_int 0))
|
||
|
|
(label_ref (match_operand 2 "" ""))
|
||
|
|
(pc)))
|
||
|
|
@@ -969,15 +986,15 @@
|
||
|
|
{
|
||
|
|
if (get_attr_far_branch (insn) == 1)
|
||
|
|
return aarch64_gen_far_branch (operands, 2, "Ltb",
|
||
|
|
- "<inv_tb>\\t%<w>0, %1, ");
|
||
|
|
+ "<inv_tb>\\t%<ALLI:w>0, %1, ");
|
||
|
|
else
|
||
|
|
{
|
||
|
|
operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
|
||
|
|
- return "tst\t%<w>0, %1\;<bcond>\t%l2";
|
||
|
|
+ return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
- return "<tbz>\t%<w>0, %1, %l2";
|
||
|
|
+ return "<tbz>\t%<ALLI:w>0, %1, %l2";
|
||
|
|
}
|
||
|
|
[(set_attr "type" "branch")
|
||
|
|
(set (attr "length")
|
||
|
|
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
||
|
|
index 226dea48a..b616f5c9a 100644
|
||
|
|
--- a/gcc/config/aarch64/iterators.md
|
||
|
|
+++ b/gcc/config/aarch64/iterators.md
|
||
|
|
@@ -1104,6 +1104,8 @@
|
||
|
|
|
||
|
|
;; Give the number of bits in the mode
|
||
|
|
(define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
|
||
|
|
+(define_mode_attr ZEROM [(QI "SI") (HI "SI") (SI "SI") (DI "DI")])
|
||
|
|
+(define_mode_attr zerom [(QI "si") (HI "si") (SI "si") (DI "di")])
|
||
|
|
|
||
|
|
;; Give the ordinal of the MSB in the mode
|
||
|
|
(define_mode_attr sizem1 [(QI "#7") (HI "#15") (SI "#31") (DI "#63")
|
||
|
|
diff --git a/gcc/testsuite/gcc.target/aarch64/tbz_1.c b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..39deb58e2
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.target/aarch64/tbz_1.c
|
||
|
|
@@ -0,0 +1,95 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-additional-options "-O2 -std=c99 -fno-unwind-tables -fno-asynchronous-unwind-tables" } */
|
||
|
|
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
|
||
|
|
+
|
||
|
|
+#include <stdbool.h>
|
||
|
|
+
|
||
|
|
+void h(void);
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g1:
|
||
|
|
+** tbnz w[0-9]+, #?0, .L([0-9]+)
|
||
|
|
+** ret
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g1(bool x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x, 0))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g2:
|
||
|
|
+** tbz w[0-9]+, #?0, .L([0-9]+)
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g2(bool x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g3_ge:
|
||
|
|
+** tbnz w[0-9]+, #?31, .L[0-9]+
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g3_ge(int x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x >= 0, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g3_gt:
|
||
|
|
+** cmp w[0-9]+, 0
|
||
|
|
+** ble .L[0-9]+
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g3_gt(int x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x > 0, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g3_lt:
|
||
|
|
+** tbz w[0-9]+, #?31, .L[0-9]+
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g3_lt(int x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x < 0, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g3_le:
|
||
|
|
+** cmp w[0-9]+, 0
|
||
|
|
+** bgt .L[0-9]+
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g3_le(int x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x <= 0, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/*
|
||
|
|
+** g5:
|
||
|
|
+** mov w[0-9]+, 65279
|
||
|
|
+** tst w[0-9]+, w[0-9]+
|
||
|
|
+** beq .L[0-9]+
|
||
|
|
+** b h
|
||
|
|
+** ...
|
||
|
|
+*/
|
||
|
|
+void g5(int x)
|
||
|
|
+{
|
||
|
|
+ if (__builtin_expect (x & 0xfeff, 1))
|
||
|
|
+ h ();
|
||
|
|
+}
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|