151 lines
5.8 KiB
Diff
151 lines
5.8 KiB
Diff
From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001
|
|
From: Xi Ruoyao <xry111@xry111.site>
|
|
Date: Mon, 20 Nov 2023 03:51:56 +0800
|
|
Subject: [PATCH 050/188] LoongArch: Use LSX for scalar FP rounding with
|
|
explicit rounding mode
|
|
|
|
In LoongArch FP base ISA there is only the frint.{s/d} instruction which
|
|
reads the global rounding mode. Utilize LSX for explicit rounding mode
|
|
even if the operand is scalar. It seems wasting the CPU power, but
|
|
still much faster than calling the library function.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
* config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator.
|
|
(VLSX_FOR_FMODE): New mode attribute.
|
|
(<simd_for_scalar_frint_pattern><mode>2): New expander,
|
|
expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}.
|
|
|
|
gcc/testsuite/ChangeLog:
|
|
|
|
* gcc.target/loongarch/vect-frint-scalar.c: New test.
|
|
* gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test.
|
|
---
|
|
gcc/config/loongarch/simd.md | 28 ++++++++++++
|
|
.../loongarch/vect-frint-scalar-no-inexact.c | 23 ++++++++++
|
|
.../gcc.target/loongarch/vect-frint-scalar.c | 43 +++++++++++++++++++
|
|
3 files changed, 94 insertions(+)
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
|
|
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
|
index 4ecf7a55e..843b1a41f 100644
|
|
--- a/gcc/config/loongarch/simd.md
|
|
+++ b/gcc/config/loongarch/simd.md
|
|
@@ -169,6 +169,34 @@
|
|
UNSPEC_SIMD_FRINTRZ))]
|
|
"")
|
|
|
|
+;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int-
|
|
+;; builtin-inexact. The base FP instruction set lacks these operations.
|
|
+;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still
|
|
+;; much faster than calling a libc function: on LA464 and LA664 there is a
|
|
+;; 3x ~ 5x speed up.
|
|
+;;
|
|
+;; Note that a vreplvei instruction is needed or we'll also operate on the
|
|
+;; junk in high bits of the vector register and produce random FP exceptions.
|
|
+
|
|
+(define_int_iterator LSX_SCALAR_FRINT
|
|
+ [UNSPEC_SIMD_FRINTRP
|
|
+ UNSPEC_SIMD_FRINTRZ
|
|
+ UNSPEC_SIMD_FRINTRM
|
|
+ UNSPEC_SIMD_FRINTRNE])
|
|
+
|
|
+(define_mode_attr VLSX_FOR_FMODE [(DF "V2DF") (SF "V4SF")])
|
|
+
|
|
+(define_expand "<simd_frint_pattern><mode>2"
|
|
+ [(set (match_dup 2)
|
|
+ (vec_duplicate:<VLSX_FOR_FMODE>
|
|
+ (match_operand:ANYF 1 "register_operand")))
|
|
+ (set (match_dup 2)
|
|
+ (unspec:<VLSX_FOR_FMODE> [(match_dup 2)] LSX_SCALAR_FRINT))
|
|
+ (set (match_operand:ANYF 0 "register_operand")
|
|
+ (vec_select:ANYF (match_dup 2) (parallel [(const_int 0)])))]
|
|
+ "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
|
|
+ "operands[2] = gen_reg_rtx (<VLSX_FOR_FMODE>mode);")
|
|
+
|
|
;; <x>vftint.{/rp/rz/rm}
|
|
(define_insn
|
|
"<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>"
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
|
new file mode 100644
|
|
index 000000000..002e3b92d
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
|
@@ -0,0 +1,23 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */
|
|
+
|
|
+#include "vect-frint-scalar.c"
|
|
+
|
|
+/* cannot use LSX for these with -fno-fp-int-builtin-inexact,
|
|
+ call library function. */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */
|
|
+
|
|
+/* nearbyint is not allowed to rasie FE_INEXACT for decades */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
|
|
+
|
|
+/* rint should just use basic FP operation */
|
|
+/* { dg-final { scan-assembler "\tfrint\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tfrint\.d" } } */
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
|
new file mode 100644
|
|
index 000000000..c7cb40be7
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
|
@@ -0,0 +1,43 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -mlsx" } */
|
|
+
|
|
+#define test(func, suffix) \
|
|
+__typeof__ (1.##suffix) \
|
|
+_##func##suffix (__typeof__ (1.##suffix) x) \
|
|
+{ \
|
|
+ return __builtin_##func##suffix (x); \
|
|
+}
|
|
+
|
|
+test (ceil, f)
|
|
+test (ceil, )
|
|
+test (floor, f)
|
|
+test (floor, )
|
|
+test (trunc, f)
|
|
+test (trunc, )
|
|
+test (roundeven, f)
|
|
+test (roundeven, )
|
|
+test (nearbyint, f)
|
|
+test (nearbyint, )
|
|
+test (rint, f)
|
|
+test (rint, )
|
|
+
|
|
+/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */
|
|
+/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */
|
|
+
|
|
+/* must do vreplvei first */
|
|
+/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */
|
|
+/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */
|
|
+
|
|
+/* nearbyint is not allowed to rasie FE_INEXACT for decades */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
|
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
|
|
+
|
|
+/* rint should just use basic FP operation */
|
|
+/* { dg-final { scan-assembler "\tfrint\.s" } } */
|
|
+/* { dg-final { scan-assembler "\tfrint\.d" } } */
|
|
--
|
|
2.43.0
|
|
|