gcc/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
Peng Fan f653243538 LoongArch: Sync to upstream
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
2024-11-12 08:44:01 +08:00

731 lines
28 KiB
Diff
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001
From: Jiahao Xu <xujiahao@loongson.cn>
Date: Wed, 6 Dec 2023 15:04:49 +0800
Subject: [PATCH 059/188] LoongArch: Add support for LoongArch V1.1 approximate
instructions.
This patch adds define_insn/builtins/intrinsics for these instructions, and add option
-mfrecipe to control instruction generation.
gcc/ChangeLog:
* config/loongarch/genopts/isa-evolution.in (fecipe): Add.
* config/loongarch/larchintrin.h (__frecipe_s): New intrinsic.
(__frecipe_d): Ditto.
(__frsqrte_s): Ditto.
(__frsqrte_d): Ditto.
* config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern.
(lasx_xvfrsqrte_<flasxfmt>): Ditto.
* config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic.
(__lasx_xvfrecipe_d): Ditto.
(__lasx_xvfrsqrte_s): Ditto.
(__lasx_xvfrsqrte_d): Ditto.
* config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates.
(LSX_EXT_BUILTIN): New macro.
(LASX_EXT_BUILTIN): Ditto.
* config/loongarch/loongarch-cpucfg-map.h: Regenerate.
* config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe".
* config/loongarch/loongarch-def.cc: Regenerate.
* config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate.
* config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE.
* config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern.
(loongarch_frsqrte_<fmt>): Ditto.
* config/loongarch/loongarch.opt: Regenerate.
* config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern.
(lsx_vfrsqrte_<flsxfmt>): Ditto.
* config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic.
(__lsx_vfrecipe_d): Ditto.
(__lsx_vfrsqrte_s): Ditto.
(__lsx_vfrsqrte_d): Ditto.
* doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics.
gcc/testsuite/ChangeLog:
* gcc.target/loongarch/larch-frecipe-builtin.c: New test.
* gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test.
* gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test.
---
gcc/config/loongarch/genopts/isa-evolution.in | 1 +
gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++
gcc/config/loongarch/lasx.md | 24 +++++++++++
gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++
gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++
gcc/config/loongarch/loongarch-c.cc | 3 ++
gcc/config/loongarch/loongarch-cpucfg-map.h | 1 +
gcc/config/loongarch/loongarch-def.cc | 3 +-
gcc/config/loongarch/loongarch-str.h | 1 +
gcc/config/loongarch/loongarch.cc | 1 +
gcc/config/loongarch/loongarch.md | 35 +++++++++++++++-
gcc/config/loongarch/loongarch.opt | 4 ++
gcc/config/loongarch/lsx.md | 24 +++++++++++
gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++
gcc/doc/extend.texi | 35 ++++++++++++++++
.../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++
.../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++
.../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++
18 files changed, 365 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
index a6bc3f87f..11a198b64 100644
--- a/gcc/config/loongarch/genopts/isa-evolution.in
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -1,3 +1,4 @@
+2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions.
2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions.
2 28 lamcas Support amcas[_db].{b/h/w/d} instructions.
diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
index 2833f1487..22035e767 100644
--- a/gcc/config/loongarch/larchintrin.h
+++ b/gcc/config/loongarch/larchintrin.h
@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2)
}
#endif
+#ifdef __loongarch_frecipe
+/* Assembly instruction format: fd, fj. */
+/* Data types in instruction templates: SF, SF. */
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__frecipe_s (float _1)
+{
+ __builtin_loongarch_frecipe_s ((float) _1);
+}
+
+/* Assembly instruction format: fd, fj. */
+/* Data types in instruction templates: DF, DF. */
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__frecipe_d (double _1)
+{
+ __builtin_loongarch_frecipe_d ((double) _1);
+}
+
+/* Assembly instruction format: fd, fj. */
+/* Data types in instruction templates: SF, SF. */
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__frsqrte_s (float _1)
+{
+ __builtin_loongarch_frsqrte_s ((float) _1);
+}
+
+/* Assembly instruction format: fd, fj. */
+/* Data types in instruction templates: DF, DF. */
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__frsqrte_d (double _1)
+{
+ __builtin_loongarch_frsqrte_d ((double) _1);
+}
+#endif
+
/* Assembly instruction format: ui15. */
/* Data types in instruction templates: USI. */
#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1))
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index de7c88f14..b1416f6c3 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -40,8 +40,10 @@
UNSPEC_LASX_XVFCVTL
UNSPEC_LASX_XVFLOGB
UNSPEC_LASX_XVFRECIP
+ UNSPEC_LASX_XVFRECIPE
UNSPEC_LASX_XVFRINT
UNSPEC_LASX_XVFRSQRT
+ UNSPEC_LASX_XVFRSQRTE
UNSPEC_LASX_XVFCMP_SAF
UNSPEC_LASX_XVFCMP_SEQ
UNSPEC_LASX_XVFCMP_SLE
@@ -1633,6 +1635,17 @@
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "lasx_xvfrecipe_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRECIPE))]
+ "ISA_HAS_LASX && TARGET_FRECIPE"
+ "xvfrecipe.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lasx_xvfrsqrt_<flasxfmt>"
[(set (match_operand:FLASX 0 "register_operand" "=f")
(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
@@ -1642,6 +1655,17 @@
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "lasx_xvfrsqrte_<flasxfmt>"
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
+ UNSPEC_LASX_XVFRSQRTE))]
+ "ISA_HAS_LASX && TARGET_FRECIPE"
+ "xvfrsqrte.<flasxfmt>\t%u0,%u1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>"
[(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
index 7bce2c757..5e65e76e7 100644
--- a/gcc/config/loongarch/lasxintrin.h
+++ b/gcc/config/loongarch/lasxintrin.h
@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1)
return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1);
}
+#if defined(__loongarch_frecipe)
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V8SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_xvfrecipe_s (__m256 _1)
+{
+ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V4DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_xvfrecipe_d (__m256d _1)
+{
+ return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1);
+}
+
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V8SF, V8SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256 __lasx_xvfrsqrte_s (__m256 _1)
+{
+ return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1);
+}
+
+/* Assembly instruction format: xd, xj. */
+/* Data types in instruction templates: V4DF, V4DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m256d __lasx_xvfrsqrte_d (__m256d _1)
+{
+ return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1);
+}
+#endif
+
/* Assembly instruction format: xd, xj. */
/* Data types in instruction templates: V8SF, V8SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
index f4523c8bf..bc156bd36 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -120,6 +120,9 @@ struct loongarch_builtin_description
AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
AVAIL_ALL (lsx, ISA_HAS_LSX)
AVAIL_ALL (lasx, ISA_HAS_LASX)
+AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI)
+AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE)
+AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
/* Construct a loongarch_builtin_description from the given arguments.
@@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
"__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \
FUNCTION_TYPE, loongarch_builtin_avail_lsx }
+ /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN>
+ for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description
+ field. AVAIL is the name of the availability predicate, without the leading
+ loongarch_builtin_avail_. */
+#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
+ { CODE_FOR_lsx_ ## INSN, \
+ "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \
+ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
+
/* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_<INSN>
for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description
@@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
"__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \
FUNCTION_TYPE, loongarch_builtin_avail_lasx }
+/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_<INSN>
+ for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description
+ field. AVAIL is the name of the availability predicate, without the leading
+ loongarch_builtin_avail_. */
+#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
+ { CODE_FOR_lasx_ ## INSN, \
+ "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \
+ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
+
/* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_<INSN>
for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description
field. */
@@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default),
DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default),
+ /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */
+
+ DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe),
+ DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe),
+ DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe),
+ DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe),
+
+ /* Built-in functions for new LSX instructions. */
+
+ LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
+ LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
+ LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
+ LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
+
+ /* Built-in functions for new LASX instructions. */
+
+ LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
+ LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
+ LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
+ LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
+
/* Built-in functions for LSX. */
LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI),
LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI),
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
index 76c8ea8db..a89477a74 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
else
builtin_define ("__loongarch_frlen=0");
+ if (TARGET_HARD_FLOAT && TARGET_FRECIPE)
+ builtin_define ("__loongarch_frecipe");
+
if (ISA_HAS_LSX)
{
builtin_define ("__loongarch_simd");
diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
index 02ff16712..148333c24 100644
--- a/gcc/config/loongarch/loongarch-cpucfg-map.h
+++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
@@ -29,6 +29,7 @@ static constexpr struct {
unsigned int cpucfg_bit;
HOST_WIDE_INT isa_evolution_bit;
} cpucfg_map[] = {
+ { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE },
{ 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
{ 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
{ 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
index bc6997e45..c41804a18 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -60,7 +60,8 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
.fpu_ (ISA_EXT_FPU64)
.simd_ (ISA_EXT_SIMD_LASX)
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
+ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
+ | OPTION_MASK_ISA_FRECIPE));
static inline loongarch_cache la464_cache ()
{
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
index 7144bbe28..a8821acb0 100644
--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see
#define STR_EXPLICIT_RELOCS_NONE "none"
#define STR_EXPLICIT_RELOCS_ALWAYS "always"
+#define OPTSTR_FRECIPE "frecipe"
#define OPTSTR_DIV32 "div32"
#define OPTSTR_LAM_BH "lam-bh"
#define OPTSTR_LAMCAS "lamcas"
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 3c8ae9a42..ce1c0a8bd 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void)
loongarch_cpu_strings [la_target.cpu_tune]);
fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
loongarch_isa_base_strings [la_target.isa.base]);
+ DUMP_FEATURE (TARGET_FRECIPE);
DUMP_FEATURE (TARGET_DIV32);
DUMP_FEATURE (TARGET_LAM_BH);
DUMP_FEATURE (TARGET_LAMCAS);
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index afc3c591f..9080cec1c 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -59,6 +59,12 @@
;; Stack tie
UNSPEC_TIE
+ ;; RSQRT
+ UNSPEC_RSQRTE
+
+ ;; RECIP
+ UNSPEC_RECIPE
+
;; CRC
UNSPEC_CRC
UNSPEC_CRCC
@@ -220,6 +226,7 @@
;; fmadd floating point multiply-add
;; fdiv floating point divide
;; frdiv floating point reciprocal divide
+;; frecipe floating point approximate reciprocal
;; fabs floating point absolute value
;; flogb floating point exponent extract
;; fneg floating point negation
@@ -229,6 +236,7 @@
;; fscaleb floating point scale
;; fsqrt floating point square root
;; frsqrt floating point reciprocal square root
+;; frsqrte floating point approximate reciprocal square root
;; multi multiword sequence (or user asm statements)
;; atomic atomic memory update instruction
;; syncloop memory atomic operation implemented as a sync loop
@@ -238,8 +246,8 @@
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
shift,slt,signext,clz,trap,imul,idiv,move,
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
- fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost,
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
+ fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost,
simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
@@ -908,6 +916,18 @@
[(set_attr "type" "frdiv")
(set_attr "mode" "<UNITMODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "loongarch_frecipe_<fmt>"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ UNSPEC_RECIPE))]
+ "TARGET_FRECIPE"
+ "frecipe.<fmt>\t%0,%1"
+ [(set_attr "type" "frecipe")
+ (set_attr "mode" "<UNITMODE>")
+ (set_attr "insn_count" "1")])
+
;; Integer division and modulus.
(define_expand "<optab><mode>3"
[(set (match_operand:GPR 0 "register_operand")
@@ -1133,6 +1153,17 @@
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
(set_attr "insn_count" "1")])
+
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "loongarch_frsqrte_<fmt>"
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+ UNSPEC_RSQRTE))]
+ "TARGET_FRECIPE"
+ "frsqrte.<fmt>\t%0,%1"
+ [(set_attr "type" "frsqrte")
+ (set_attr "mode" "<UNITMODE>")])
;;
;; ....................
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 7fe36feb9..e7bc8bed4 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -260,6 +260,10 @@ default value is 4.
Variable
HOST_WIDE_INT isa_evolution = 0
+mfrecipe
+Target Mask(ISA_FRECIPE) Var(isa_evolution)
+Support frecipe.{s/d} and frsqrte.{s/d} instructions.
+
mdiv32
Target Mask(ISA_DIV32) Var(isa_evolution)
Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index ce6ec6d69..37bdc6910 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -42,8 +42,10 @@
UNSPEC_LSX_VFCVTL
UNSPEC_LSX_VFLOGB
UNSPEC_LSX_VFRECIP
+ UNSPEC_LSX_VFRECIPE
UNSPEC_LSX_VFRINT
UNSPEC_LSX_VFRSQRT
+ UNSPEC_LSX_VFRSQRTE
UNSPEC_LSX_VFCMP_SAF
UNSPEC_LSX_VFCMP_SEQ
UNSPEC_LSX_VFCMP_SLE
@@ -1546,6 +1548,17 @@
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Instructions.
+
+(define_insn "lsx_vfrecipe_<flsxfmt>"
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+ UNSPEC_LSX_VFRECIPE))]
+ "ISA_HAS_LSX && TARGET_FRECIPE"
+ "vfrecipe.<flsxfmt>\t%w0,%w1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lsx_vfrsqrt_<flsxfmt>"
[(set (match_operand:FLSX 0 "register_operand" "=f")
(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
@@ -1555,6 +1568,17 @@
[(set_attr "type" "simd_fdiv")
(set_attr "mode" "<MODE>")])
+;; Approximate Reciprocal Square Root Instructions.
+
+(define_insn "lsx_vfrsqrte_<flsxfmt>"
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
+ UNSPEC_LSX_VFRSQRTE))]
+ "ISA_HAS_LSX && TARGET_FRECIPE"
+ "vfrsqrte.<flsxfmt>\t%w0,%w1"
+ [(set_attr "type" "simd_fdiv")
+ (set_attr "mode" "<MODE>")])
+
(define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>"
[(set (match_operand:<VIMODE> 0 "register_operand" "=f")
(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")]
diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
index 29553c093..57a6fc40a 100644
--- a/gcc/config/loongarch/lsxintrin.h
+++ b/gcc/config/loongarch/lsxintrin.h
@@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1)
return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1);
}
+#if defined(__loongarch_frecipe)
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V4SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lsx_vfrecipe_s (__m128 _1)
+{
+ return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1);
+}
+
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V2DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lsx_vfrecipe_d (__m128d _1)
+{
+ return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1);
+}
+
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V4SF, V4SF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128 __lsx_vfrsqrte_s (__m128 _1)
+{
+ return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1);
+}
+
+/* Assembly instruction format: vd, vj. */
+/* Data types in instruction templates: V2DF, V2DF. */
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__m128d __lsx_vfrsqrte_d (__m128d _1)
+{
+ return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1);
+}
+#endif
+
/* Assembly instruction format: vd, vj. */
/* Data types in instruction templates: V4SF, V4SF. */
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7edd3974d..bb042ae78 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -16187,6 +16187,14 @@ The intrinsics provided are listed below:
void __builtin_loongarch_break (imm0_32767)
@end smallexample
+These instrisic functions are available by using @option{-mfrecipe}.
+@smallexample
+ float __builtin_loongarch_frecipe_s (float);
+ double __builtin_loongarch_frecipe_d (double);
+ float __builtin_loongarch_frsqrte_s (float);
+ double __builtin_loongarch_frsqrte_d (double);
+@end smallexample
+
@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
but the access instruction is not distinguished. So GCC renames the control
instructions when implementing intrinsics.
@@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}.
void __break (imm0_32767)
@end smallexample
+These instrisic functions are available by including @code{larchintrin.h} and
+using @option{-mfrecipe}.
+@smallexample
+ float __frecipe_s (float);
+ double __frecipe_d (double);
+ float __frsqrte_s (float);
+ double __frsqrte_d (double);
+@end smallexample
+
Returns the value that is currently set in the @samp{tp} register.
@smallexample
void * __builtin_thread_pointer (void)
@@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255);
__m128i __lsx_vxor_v (__m128i, __m128i);
@end smallexample
+These instrisic functions are available by including @code{lsxintrin.h} and
+using @option{-mfrecipe} and @option{-mlsx}.
+@smallexample
+__m128d __lsx_vfrecipe_d (__m128d);
+__m128 __lsx_vfrecipe_s (__m128);
+__m128d __lsx_vfrsqrte_d (__m128d);
+__m128 __lsx_vfrsqrte_s (__m128);
+@end smallexample
+
@node LoongArch ASX Vector Intrinsics
@subsection LoongArch ASX Vector Intrinsics
@@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255);
__m256i __lasx_xvxor_v (__m256i, __m256i);
@end smallexample
+These instrisic functions are available by including @code{lasxintrin.h} and
+using @option{-mfrecipe} and @option{-mlasx}.
+@smallexample
+__m256d __lasx_xvfrecipe_d (__m256d);
+__m256 __lasx_xvfrecipe_s (__m256);
+__m256d __lasx_xvfrsqrte_d (__m256d);
+__m256 __lasx_xvfrsqrte_s (__m256);
+@end smallexample
+
@node MIPS DSP Built-in Functions
@subsection MIPS DSP Built-in Functions
diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
new file mode 100644
index 000000000..b9329f346
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
@@ -0,0 +1,28 @@
+/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */
+/* { dg-do compile } */
+/* { dg-options "-mfrecipe" } */
+/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */
+/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */
+/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */
+/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */
+
+float
+test_frecipe_s (float _1)
+{
+ return __builtin_loongarch_frecipe_s (_1);
+}
+double
+test_frecipe_d (double _1)
+{
+ return __builtin_loongarch_frecipe_d (_1);
+}
+float
+test_frsqrte_s (float _1)
+{
+ return __builtin_loongarch_frsqrte_s (_1);
+}
+double
+test_frsqrte_d (double _1)
+{
+ return __builtin_loongarch_frsqrte_d (_1);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
new file mode 100644
index 000000000..522535b45
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
@@ -0,0 +1,30 @@
+/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */
+/* { dg-do compile } */
+/* { dg-options "-mlasx -mfrecipe" } */
+/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */
+/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */
+
+#include <lasxintrin.h>
+
+v8f32
+__lasx_xvfrecipe_s (v8f32 _1)
+{
+ return __builtin_lasx_xvfrecipe_s (_1);
+}
+v4f64
+__lasx_xvfrecipe_d (v4f64 _1)
+{
+ return __builtin_lasx_xvfrecipe_d (_1);
+}
+v8f32
+__lasx_xvfrsqrte_s (v8f32 _1)
+{
+ return __builtin_lasx_xvfrsqrte_s (_1);
+}
+v4f64
+__lasx_xvfrsqrte_d (v4f64 _1)
+{
+ return __builtin_lasx_xvfrsqrte_d (_1);
+}
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
new file mode 100644
index 000000000..4ad0cb0ff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
@@ -0,0 +1,30 @@
+/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */
+/* { dg-do compile } */
+/* { dg-options "-mlsx -mfrecipe" } */
+/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */
+/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */
+
+#include <lsxintrin.h>
+
+v4f32
+__lsx_vfrecipe_s (v4f32 _1)
+{
+ return __builtin_lsx_vfrecipe_s (_1);
+}
+v2f64
+__lsx_vfrecipe_d (v2f64 _1)
+{
+ return __builtin_lsx_vfrecipe_d (_1);
+}
+v4f32
+__lsx_vfrsqrte_s (v4f32 _1)
+{
+ return __builtin_lsx_vfrsqrte_s (_1);
+}
+v2f64
+__lsx_vfrsqrte_d (v2f64 _1)
+{
+ return __builtin_lsx_vfrsqrte_d (_1);
+}
--
2.43.0