gcc/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
Peng Fan f653243538 LoongArch: Sync to upstream
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
2024-11-12 08:44:01 +08:00

149 lines
3.9 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001
From: Li Wei <liwei@loongson.cn>
Date: Tue, 28 Nov 2023 15:38:37 +0800
Subject: [PATCH 053/188] LoongArch: Accelerate optimization of scalar
signed/unsigned popcount.
In LoongArch, the vector popcount has corresponding instructions, while
the scalar does not. Currently, the scalar popcount is calculated
through a loop, and the value of a non-power of two needs to be iterated
several times, so the vector popcount instruction is considered for
optimization.
gcc/ChangeLog:
* config/loongarch/loongarch.md (v2di): Used to simplify the
following templates.
(popcount<mode>2): New.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/popcnt.c: New test.
* gcc.target/loongarch/popcount.c: New test.
---
gcc/config/loongarch/loongarch.md | 27 +++++++++++-
gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++
gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++
3 files changed, 83 insertions(+), 2 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 11577f407..cfd7a8ec6 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1512,7 +1512,30 @@
(set_attr "cnv_mode" "D2S")
(set_attr "mode" "SF")])
-
+;; In vector registers, popcount can be implemented directly through
+;; the vector instruction [X]VPCNT. For GP registers, we can implement
+;; it through the following method. Compared with loop implementation
+;; of popcount, the following method has better performance.
+
+;; This attribute used for get connection of scalar mode and corresponding
+;; vector mode.
+(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
+
+(define_expand "popcount<mode>2"
+ [(set (match_operand:GPR 0 "register_operand")
+ (popcount:GPR (match_operand:GPR 1 "register_operand")))]
+ "ISA_HAS_LSX"
+{
+ rtx in = operands[1];
+ rtx out = operands[0];
+ rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
+ gen_reg_rtx (V2DImode);
+ emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
+ emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
+ emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
+ DONE;
+})
+
;;
;; ....................
;;
@@ -3879,7 +3902,7 @@
(any_extend:SI (match_dup 3)))])]
"")
-
+
(define_mode_iterator QHSD [QI HI SI DI])
diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c
new file mode 100644
index 000000000..a10fca420
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlsx" } */
+/* { dg-final { scan-assembler-not {popcount} } } */
+/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
+/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
+
+int
+foo (int x)
+{
+ return __builtin_popcount (x);
+}
+
+long
+foo1 (long x)
+{
+ return __builtin_popcountl (x);
+}
+
+long long
+foo2 (long long x)
+{
+ return __builtin_popcountll (x);
+}
+
+int
+foo3 (int *p)
+{
+ return __builtin_popcount (*p);
+}
+
+unsigned
+foo4 (int x)
+{
+ return __builtin_popcount (x);
+}
+
+unsigned long
+foo5 (int x)
+{
+ return __builtin_popcount (x);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c
new file mode 100644
index 000000000..390ff0676
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/popcount.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
+/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
+
+int
+PopCount (long b)
+{
+ int c = 0;
+
+ while (b)
+ {
+ b &= b - 1;
+ c++;
+ }
+
+ return c;
+}
--
2.43.0