185 lines
6.7 KiB
Diff
185 lines
6.7 KiB
Diff
|
|
From f6652dbebf81372884e9fd8b68627fc7a94d8d3b Mon Sep 17 00:00:00 2001
|
||
|
|
From: Roger Sayle <roger@nextmovesoftware.com>
|
||
|
|
Date: Fri, 27 May 2022 08:57:46 +0100
|
||
|
|
Subject: [PATCH 145/157] [Backport][SME] Canonicalize X&-Y as X*Y in match.pd
|
||
|
|
when Y is [0,1].
|
||
|
|
|
||
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=8fb94fc6097c0a934aac0d89c9c5e2038da67655
|
||
|
|
|
||
|
|
"For every pessimization, there's an equal and opposite optimization".
|
||
|
|
|
||
|
|
In the review of my original patch for PR middle-end/98865, Richard
|
||
|
|
Biener pointed out that match.pd shouldn't be transforming X*Y into
|
||
|
|
X&-Y as the former is considered cheaper by tree-ssa's cost model
|
||
|
|
(operator count). A corollary of this is that we should instead be
|
||
|
|
transforming X&-Y into the cheaper X*Y as a preferred canonical form
|
||
|
|
(especially as RTL expansion now intelligently selects the appropriate
|
||
|
|
implementation based on the target's costs).
|
||
|
|
|
||
|
|
With this patch we now generate identical code for:
|
||
|
|
int foo(int x, int y) { return -(x&1) & y; }
|
||
|
|
int bar(int x, int y) { return (x&1) * y; }
|
||
|
|
|
||
|
|
specifically on x86_64-pc-linux-gnu both use and/neg/and with -O2,
|
||
|
|
but both use and/mul with -Os.
|
||
|
|
|
||
|
|
One minor wrinkle/improvement is that this patch includes three
|
||
|
|
additional optimizations (that account for the change in canonical
|
||
|
|
form) to continue to optimize PR92834 and PR94786.
|
||
|
|
|
||
|
|
2022-05-27 Roger Sayle <roger@nextmovesoftware.com>
|
||
|
|
|
||
|
|
gcc/ChangeLog
|
||
|
|
* match.pd (match_zero_one_valued_p): New predicate.
|
||
|
|
(mult @0 @1): Use zero_one_valued_p for optimization to the
|
||
|
|
expression "bit_and @0 @1".
|
||
|
|
(bit_and (negate zero_one_valued_p@0) @1): Optimize to MULT_EXPR.
|
||
|
|
(plus @0 (mult (minus @1 @0) zero_one_valued_p@2)): New transform.
|
||
|
|
(minus @0 (mult (minus @0 @1) zero_one_valued_p@2)): Likewise.
|
||
|
|
(bit_xor @0 (mult (bit_xor @0 @1) zero_one_valued_p@2)): Likewise.
|
||
|
|
Remove three redundant transforms obsoleted by the three above.
|
||
|
|
|
||
|
|
gcc/testsuite/ChangeLog
|
||
|
|
* gcc.dg/pr98865.c: New test case.
|
||
|
|
---
|
||
|
|
gcc/match.pd | 86 ++++++++++++++++------------------
|
||
|
|
gcc/testsuite/gcc.dg/pr98865.c | 14 ++++++
|
||
|
|
2 files changed, 55 insertions(+), 45 deletions(-)
|
||
|
|
create mode 100644 gcc/testsuite/gcc.dg/pr98865.c
|
||
|
|
|
||
|
|
diff --git a/gcc/match.pd b/gcc/match.pd
|
||
|
|
index aee58e47b..6d3165bcd 100644
|
||
|
|
--- a/gcc/match.pd
|
||
|
|
+++ b/gcc/match.pd
|
||
|
|
@@ -285,14 +285,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||
|
|
|| !COMPLEX_FLOAT_TYPE_P (type)))
|
||
|
|
(negate @0)))
|
||
|
|
|
||
|
|
-/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */
|
||
|
|
-(simplify
|
||
|
|
- (mult SSA_NAME@1 SSA_NAME@2)
|
||
|
|
- (if (INTEGRAL_TYPE_P (type)
|
||
|
|
- && get_nonzero_bits (@1) == 1
|
||
|
|
- && get_nonzero_bits (@2) == 1)
|
||
|
|
- (bit_and @1 @2)))
|
||
|
|
-
|
||
|
|
/* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
|
||
|
|
unless the target has native support for the former but not the latter. */
|
||
|
|
(simplify
|
||
|
|
@@ -1790,6 +1782,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||
|
|
(bit_not (bit_not @0))
|
||
|
|
@0)
|
||
|
|
|
||
|
|
+(match zero_one_valued_p
|
||
|
|
+ @0
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type) && tree_nonzero_bits (@0) == 1)))
|
||
|
|
+(match zero_one_valued_p
|
||
|
|
+ truth_valued_p@0)
|
||
|
|
+
|
||
|
|
+/* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 }. */
|
||
|
|
+(simplify
|
||
|
|
+ (mult zero_one_valued_p@0 zero_one_valued_p@1)
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type))
|
||
|
|
+ (bit_and @0 @1)))
|
||
|
|
+
|
||
|
|
+/* Transform X & -Y into X * Y when Y is { 0 or 1 }. */
|
||
|
|
+(simplify
|
||
|
|
+ (bit_and:c (convert? (negate zero_one_valued_p@0)) @1)
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type)
|
||
|
|
+ && INTEGRAL_TYPE_P (TREE_TYPE (@0))
|
||
|
|
+ && TREE_CODE (TREE_TYPE (@0)) != BOOLEAN_TYPE
|
||
|
|
+ && !TYPE_UNSIGNED (TREE_TYPE (@0)))
|
||
|
|
+ (mult (convert @0) @1)))
|
||
|
|
+
|
||
|
|
/* Convert ~ (-A) to A - 1. */
|
||
|
|
(simplify
|
||
|
|
(bit_not (convert? (negate @0)))
|
||
|
|
@@ -3281,44 +3294,27 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||
|
|
(cmp @0 (minmax:c @0 @1))
|
||
|
|
{ constant_boolean_node (cmp == GE_EXPR || cmp == LE_EXPR, type); } ))
|
||
|
|
|
||
|
|
-/* Undo fancy way of writing max/min or other ?: expressions,
|
||
|
|
- like a - ((a - b) & -(a < b)), in this case into (a < b) ? b : a.
|
||
|
|
+/* Undo fancy ways of writing max/min or other ?: expressions, like
|
||
|
|
+ a - ((a - b) & -(a < b)) and a - (a - b) * (a < b) into (a < b) ? b : a.
|
||
|
|
People normally use ?: and that is what we actually try to optimize. */
|
||
|
|
-(for cmp (simple_comparison)
|
||
|
|
- (simplify
|
||
|
|
- (minus @0 (bit_and:c (minus @0 @1)
|
||
|
|
- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
|
||
|
|
- (if (INTEGRAL_TYPE_P (type)
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
|
||
|
|
- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
|
||
|
|
- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
|
||
|
|
- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
|
||
|
|
- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
- (cond (cmp @2 @3) @1 @0)))
|
||
|
|
- (simplify
|
||
|
|
- (plus:c @0 (bit_and:c (minus @1 @0)
|
||
|
|
- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
|
||
|
|
- (if (INTEGRAL_TYPE_P (type)
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
|
||
|
|
- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
|
||
|
|
- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
|
||
|
|
- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
|
||
|
|
- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
- (cond (cmp @2 @3) @1 @0)))
|
||
|
|
- /* Similarly with ^ instead of - though in that case with :c. */
|
||
|
|
- (simplify
|
||
|
|
- (bit_xor:c @0 (bit_and:c (bit_xor:c @0 @1)
|
||
|
|
- (convert? (negate@4 (convert? (cmp@5 @2 @3))))))
|
||
|
|
- (if (INTEGRAL_TYPE_P (type)
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@4))
|
||
|
|
- && TREE_CODE (TREE_TYPE (@4)) != BOOLEAN_TYPE
|
||
|
|
- && INTEGRAL_TYPE_P (TREE_TYPE (@5))
|
||
|
|
- && (TYPE_PRECISION (TREE_TYPE (@4)) >= TYPE_PRECISION (type)
|
||
|
|
- || !TYPE_UNSIGNED (TREE_TYPE (@4)))
|
||
|
|
- && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
- (cond (cmp @2 @3) @1 @0))))
|
||
|
|
+/* Transform A + (B-A)*cmp into cmp ? B : A. */
|
||
|
|
+(simplify
|
||
|
|
+ (plus:c @0 (mult:c (minus @1 @0) zero_one_valued_p@2))
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type)
|
||
|
|
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
+ (cond (convert:boolean_type_node @2) @1 @0)))
|
||
|
|
+/* Transform A - (A-B)*cmp into cmp ? B : A. */
|
||
|
|
+(simplify
|
||
|
|
+ (minus @0 (mult:c (minus @0 @1) zero_one_valued_p@2))
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type)
|
||
|
|
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
+ (cond (convert:boolean_type_node @2) @1 @0)))
|
||
|
|
+/* Transform A ^ (A^B)*cmp into cmp ? B : A. */
|
||
|
|
+(simplify
|
||
|
|
+ (bit_xor:c @0 (mult:c (bit_xor:c @0 @1) zero_one_valued_p@2))
|
||
|
|
+ (if (INTEGRAL_TYPE_P (type)
|
||
|
|
+ && (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
|
||
|
|
+ (cond (convert:boolean_type_node @2) @1 @0)))
|
||
|
|
|
||
|
|
/* Simplifications of shift and rotates. */
|
||
|
|
|
||
|
|
diff --git a/gcc/testsuite/gcc.dg/pr98865.c b/gcc/testsuite/gcc.dg/pr98865.c
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..95f727033
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/testsuite/gcc.dg/pr98865.c
|
||
|
|
@@ -0,0 +1,14 @@
|
||
|
|
+/* { dg-do compile } */
|
||
|
|
+/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||
|
|
+
|
||
|
|
+int foo(int x, int y)
|
||
|
|
+{
|
||
|
|
+ return -(x&1) & y;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+int bar(int x, int y)
|
||
|
|
+{
|
||
|
|
+ return (x&1) * y;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* { dg-final { scan-tree-dump-times " \\* " 2 "optimized" } } */
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|