149 lines
3.9 KiB
Diff
149 lines
3.9 KiB
Diff
|
|
From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001
|
|||
|
|
From: Li Wei <liwei@loongson.cn>
|
|||
|
|
Date: Tue, 28 Nov 2023 15:38:37 +0800
|
|||
|
|
Subject: [PATCH 053/188] LoongArch: Accelerate optimization of scalar
|
|||
|
|
signed/unsigned popcount.
|
|||
|
|
|
|||
|
|
In LoongArch, the vector popcount has corresponding instructions, while
|
|||
|
|
the scalar does not. Currently, the scalar popcount is calculated
|
|||
|
|
through a loop, and the value of a non-power of two needs to be iterated
|
|||
|
|
several times, so the vector popcount instruction is considered for
|
|||
|
|
optimization.
|
|||
|
|
|
|||
|
|
gcc/ChangeLog:
|
|||
|
|
|
|||
|
|
* config/loongarch/loongarch.md (v2di): Used to simplify the
|
|||
|
|
following templates.
|
|||
|
|
(popcount<mode>2): New.
|
|||
|
|
|
|||
|
|
gcc/testsuite/ChangeLog:
|
|||
|
|
|
|||
|
|
* gcc.target/loongarch/popcnt.c: New test.
|
|||
|
|
* gcc.target/loongarch/popcount.c: New test.
|
|||
|
|
---
|
|||
|
|
gcc/config/loongarch/loongarch.md | 27 +++++++++++-
|
|||
|
|
gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++
|
|||
|
|
gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++
|
|||
|
|
3 files changed, 83 insertions(+), 2 deletions(-)
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c
|
|||
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c
|
|||
|
|
|
|||
|
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
|||
|
|
index 11577f407..cfd7a8ec6 100644
|
|||
|
|
--- a/gcc/config/loongarch/loongarch.md
|
|||
|
|
+++ b/gcc/config/loongarch/loongarch.md
|
|||
|
|
@@ -1512,7 +1512,30 @@
|
|||
|
|
(set_attr "cnv_mode" "D2S")
|
|||
|
|
(set_attr "mode" "SF")])
|
|||
|
|
|
|||
|
|
-
|
|||
|
|
+;; In vector registers, popcount can be implemented directly through
|
|||
|
|
+;; the vector instruction [X]VPCNT. For GP registers, we can implement
|
|||
|
|
+;; it through the following method. Compared with loop implementation
|
|||
|
|
+;; of popcount, the following method has better performance.
|
|||
|
|
+
|
|||
|
|
+;; This attribute used for get connection of scalar mode and corresponding
|
|||
|
|
+;; vector mode.
|
|||
|
|
+(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
|
|||
|
|
+
|
|||
|
|
+(define_expand "popcount<mode>2"
|
|||
|
|
+ [(set (match_operand:GPR 0 "register_operand")
|
|||
|
|
+ (popcount:GPR (match_operand:GPR 1 "register_operand")))]
|
|||
|
|
+ "ISA_HAS_LSX"
|
|||
|
|
+{
|
|||
|
|
+ rtx in = operands[1];
|
|||
|
|
+ rtx out = operands[0];
|
|||
|
|
+ rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
|
|||
|
|
+ gen_reg_rtx (V2DImode);
|
|||
|
|
+ emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
|
|||
|
|
+ emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
|
|||
|
|
+ emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
|
|||
|
|
+ DONE;
|
|||
|
|
+})
|
|||
|
|
+
|
|||
|
|
;;
|
|||
|
|
;; ....................
|
|||
|
|
;;
|
|||
|
|
@@ -3879,7 +3902,7 @@
|
|||
|
|
(any_extend:SI (match_dup 3)))])]
|
|||
|
|
"")
|
|||
|
|
|
|||
|
|
-
|
|||
|
|
+
|
|||
|
|
|
|||
|
|
(define_mode_iterator QHSD [QI HI SI DI])
|
|||
|
|
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..a10fca420
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c
|
|||
|
|
@@ -0,0 +1,41 @@
|
|||
|
|
+/* { dg-do compile } */
|
|||
|
|
+/* { dg-options "-O2 -mlsx" } */
|
|||
|
|
+/* { dg-final { scan-assembler-not {popcount} } } */
|
|||
|
|
+/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
|
|||
|
|
+/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
|
|||
|
|
+
|
|||
|
|
+int
|
|||
|
|
+foo (int x)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcount (x);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+long
|
|||
|
|
+foo1 (long x)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcountl (x);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+long long
|
|||
|
|
+foo2 (long long x)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcountll (x);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+int
|
|||
|
|
+foo3 (int *p)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcount (*p);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+unsigned
|
|||
|
|
+foo4 (int x)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcount (x);
|
|||
|
|
+}
|
|||
|
|
+
|
|||
|
|
+unsigned long
|
|||
|
|
+foo5 (int x)
|
|||
|
|
+{
|
|||
|
|
+ return __builtin_popcount (x);
|
|||
|
|
+}
|
|||
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c
|
|||
|
|
new file mode 100644
|
|||
|
|
index 000000000..390ff0676
|
|||
|
|
--- /dev/null
|
|||
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/popcount.c
|
|||
|
|
@@ -0,0 +1,17 @@
|
|||
|
|
+/* { dg-do compile } */
|
|||
|
|
+/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
|
|||
|
|
+/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
|
|||
|
|
+
|
|||
|
|
+int
|
|||
|
|
+PopCount (long b)
|
|||
|
|
+{
|
|||
|
|
+ int c = 0;
|
|||
|
|
+
|
|||
|
|
+ while (b)
|
|||
|
|
+ {
|
|||
|
|
+ b &= b - 1;
|
|||
|
|
+ c++;
|
|||
|
|
+ }
|
|||
|
|
+
|
|||
|
|
+ return c;
|
|||
|
|
+}
|
|||
|
|
--
|
|||
|
|
2.43.0
|
|||
|
|
|