!496 LoongArch: Sync from gcc upstream
From: @ticat-fp Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
This commit is contained in:
commit
41aeeefa83
464
0001-LoongArch-Reimplement-multilib-build-option-handling.patch
Normal file
464
0001-LoongArch-Reimplement-multilib-build-option-handling.patch
Normal file
@ -0,0 +1,464 @@
|
|||||||
|
From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yang Yujie <yangyujie@loongson.cn>
|
||||||
|
Date: Wed, 13 Sep 2023 17:52:14 +0800
|
||||||
|
Subject: [PATCH 001/188] LoongArch: Reimplement multilib build option
|
||||||
|
handling.
|
||||||
|
|
||||||
|
Library build options from --with-multilib-list used to be processed with
|
||||||
|
*self_spec, which missed the driver's initial canonicalization. This
|
||||||
|
caused limitations on CFLAGS override and the use of driver-only options
|
||||||
|
like -m[no]-lsx.
|
||||||
|
|
||||||
|
The problem is solved by promoting the injection rules of --with-multilib-list
|
||||||
|
options to the first element of DRIVER_SELF_SPECS, to make them execute before
|
||||||
|
the canonialization. The library-build options are also hard-coded in
|
||||||
|
the driver and can be used conveniently by the builders of other non-gcc
|
||||||
|
libraries via the use of -fmultiflags.
|
||||||
|
|
||||||
|
Bootstrapped and tested on loongarch64-linux-gnu.
|
||||||
|
|
||||||
|
ChangeLog:
|
||||||
|
|
||||||
|
* config-ml.in: Remove unneeded loongarch clause.
|
||||||
|
* configure.ac: Register custom makefile fragments mt-loongarch-*
|
||||||
|
for loongarch targets.
|
||||||
|
* configure: Regenerate.
|
||||||
|
|
||||||
|
config/ChangeLog:
|
||||||
|
|
||||||
|
* mt-loongarch-mlib: New file. Pass -fmultiflags when building
|
||||||
|
target libraries (FLAGS_FOR_TARGET).
|
||||||
|
* mt-loongarch-elf: New file.
|
||||||
|
* mt-loongarch-gnu: New file.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG.
|
||||||
|
* config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS
|
||||||
|
before the driver canonicalization routines.
|
||||||
|
* config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc.
|
||||||
|
to loongarch-driver.h
|
||||||
|
* config/loongarch/t-linux: Move multilib-related definitions to
|
||||||
|
t-multilib.
|
||||||
|
* config/loongarch/t-multilib: New file. Inject library build
|
||||||
|
options obtained from --with-multilib-list.
|
||||||
|
* config/loongarch/t-loongarch: Same.
|
||||||
|
---
|
||||||
|
config-ml.in | 10 ----
|
||||||
|
config/mt-loongarch-elf | 1 +
|
||||||
|
config/mt-loongarch-gnu | 2 +
|
||||||
|
config/mt-loongarch-mlib | 1 +
|
||||||
|
configure | 6 +++
|
||||||
|
configure.ac | 6 +++
|
||||||
|
gcc/config.gcc | 6 +--
|
||||||
|
gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch.h | 50 ------------------
|
||||||
|
gcc/config/loongarch/t-linux | 66 +++---------------------
|
||||||
|
gcc/config/loongarch/t-loongarch | 2 +-
|
||||||
|
gcc/config/loongarch/t-multilib | 68 +++++++++++++++++++++++++
|
||||||
|
12 files changed, 137 insertions(+), 123 deletions(-)
|
||||||
|
create mode 100644 config/mt-loongarch-elf
|
||||||
|
create mode 100644 config/mt-loongarch-gnu
|
||||||
|
create mode 100644 config/mt-loongarch-mlib
|
||||||
|
create mode 100644 gcc/config/loongarch/t-multilib
|
||||||
|
|
||||||
|
diff --git a/config-ml.in b/config-ml.in
|
||||||
|
index ad0db0781..68854a4f1 100644
|
||||||
|
--- a/config-ml.in
|
||||||
|
+++ b/config-ml.in
|
||||||
|
@@ -301,16 +301,6 @@ arm-*-*)
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
-loongarch*-*)
|
||||||
|
- old_multidirs="${multidirs}"
|
||||||
|
- multidirs=""
|
||||||
|
- for x in ${old_multidirs}; do
|
||||||
|
- case "$x" in
|
||||||
|
- `${CC-gcc} --print-multi-directory`) : ;;
|
||||||
|
- *) multidirs="${multidirs} ${x}" ;;
|
||||||
|
- esac
|
||||||
|
- done
|
||||||
|
- ;;
|
||||||
|
m68*-*-*)
|
||||||
|
if [ x$enable_softfloat = xno ]
|
||||||
|
then
|
||||||
|
diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bbf29bb57
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/config/mt-loongarch-elf
|
||||||
|
@@ -0,0 +1 @@
|
||||||
|
+include $(srcdir)/config/mt-loongarch-mlib
|
||||||
|
diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..dfefb44ed
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/config/mt-loongarch-gnu
|
||||||
|
@@ -0,0 +1,2 @@
|
||||||
|
+include $(srcdir)/config/mt-gnu
|
||||||
|
+include $(srcdir)/config/mt-loongarch-mlib
|
||||||
|
diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4cfe568f1
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/config/mt-loongarch-mlib
|
||||||
|
@@ -0,0 +1 @@
|
||||||
|
+FLAGS_FOR_TARGET += -fmultiflags
|
||||||
|
diff --git a/configure b/configure
|
||||||
|
index aff62c464..81b4a3cec 100755
|
||||||
|
--- a/configure
|
||||||
|
+++ b/configure
|
||||||
|
@@ -9548,6 +9548,12 @@ case "${target}" in
|
||||||
|
spu-*-*)
|
||||||
|
target_makefile_frag="config/mt-spu"
|
||||||
|
;;
|
||||||
|
+ loongarch*-*linux* | loongarch*-*gnu*)
|
||||||
|
+ target_makefile_frag="config/mt-loongarch-gnu"
|
||||||
|
+ ;;
|
||||||
|
+ loongarch*-*elf*)
|
||||||
|
+ target_makefile_frag="config/mt-loongarch-elf"
|
||||||
|
+ ;;
|
||||||
|
mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
|
||||||
|
target_makefile_frag="config/mt-sde"
|
||||||
|
;;
|
||||||
|
diff --git a/configure.ac b/configure.ac
|
||||||
|
index f310d75ca..9f8dbd319 100644
|
||||||
|
--- a/configure.ac
|
||||||
|
+++ b/configure.ac
|
||||||
|
@@ -2729,6 +2729,12 @@ case "${target}" in
|
||||||
|
spu-*-*)
|
||||||
|
target_makefile_frag="config/mt-spu"
|
||||||
|
;;
|
||||||
|
+ loongarch*-*linux* | loongarch*-*gnu*)
|
||||||
|
+ target_makefile_frag="config/mt-loongarch-gnu"
|
||||||
|
+ ;;
|
||||||
|
+ loongarch*-*elf*)
|
||||||
|
+ target_makefile_frag="config/mt-loongarch-elf"
|
||||||
|
+ ;;
|
||||||
|
mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
|
||||||
|
target_makefile_frag="config/mt-sde"
|
||||||
|
;;
|
||||||
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||||
|
index 3f870e966..e34a5fbb9 100644
|
||||||
|
--- a/gcc/config.gcc
|
||||||
|
+++ b/gcc/config.gcc
|
||||||
|
@@ -2510,7 +2510,7 @@ loongarch*-*-linux*)
|
||||||
|
tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
|
||||||
|
tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
|
||||||
|
extra_options="${extra_options} linux-android.opt"
|
||||||
|
- tmake_file="${tmake_file} loongarch/t-linux"
|
||||||
|
+ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
|
||||||
|
gnu_ld=yes
|
||||||
|
gas=yes
|
||||||
|
|
||||||
|
@@ -2522,7 +2522,7 @@ loongarch*-*-linux*)
|
||||||
|
loongarch*-*-elf*)
|
||||||
|
tm_file="elfos.h newlib-stdint.h ${tm_file}"
|
||||||
|
tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
|
||||||
|
- tmake_file="${tmake_file} loongarch/t-linux"
|
||||||
|
+ tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
|
||||||
|
gnu_ld=yes
|
||||||
|
gas=yes
|
||||||
|
|
||||||
|
@@ -5241,7 +5241,7 @@ case "${target}" in
|
||||||
|
loongarch_multilib_list_sane=no
|
||||||
|
|
||||||
|
# This one goes to TM_MULTILIB_CONFIG, for use in t-linux.
|
||||||
|
- loongarch_multilib_list_make=""
|
||||||
|
+ loongarch_multilib_list_make="${abi_base},"
|
||||||
|
|
||||||
|
# This one goes to tm_defines, for use in loongarch-driver.c.
|
||||||
|
loongarch_multilib_list_c=""
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
index 6cfe0efb5..e7d083677 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
|
||||||
|
+#ifndef SUBTARGET_CPP_SPEC
|
||||||
|
+#define SUBTARGET_CPP_SPEC ""
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef SUBTARGET_CC1_SPEC
|
||||||
|
+#define SUBTARGET_CC1_SPEC ""
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef SUBTARGET_ASM_SPEC
|
||||||
|
+#define SUBTARGET_ASM_SPEC ""
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#define EXTRA_SPECS \
|
||||||
|
+ {"early_self_spec", ""}, \
|
||||||
|
+ {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
|
||||||
|
+ {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
|
||||||
|
+ {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+#undef CPP_SPEC
|
||||||
|
+#define CPP_SPEC \
|
||||||
|
+ "%(subtarget_cpp_spec)"
|
||||||
|
+
|
||||||
|
+#undef CC1_SPEC
|
||||||
|
+#define CC1_SPEC \
|
||||||
|
+ "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
|
||||||
|
+ "%(subtarget_cc1_spec)"
|
||||||
|
+
|
||||||
|
+#undef ASM_SPEC
|
||||||
|
+#define ASM_SPEC \
|
||||||
|
+ "%{mabi=*} %(subtarget_asm_spec)"
|
||||||
|
+
|
||||||
|
+
|
||||||
|
extern const char*
|
||||||
|
la_driver_init (int argc, const char **argv);
|
||||||
|
|
||||||
|
@@ -45,7 +78,16 @@ driver_get_normalized_m_opts (int argc, const char **argv);
|
||||||
|
#define LA_SET_PARM_SPEC(NAME) \
|
||||||
|
" %{m" OPTSTR_##NAME "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \
|
||||||
|
|
||||||
|
+/* For MLIB_SELF_SPECS. */
|
||||||
|
+#include "loongarch-multilib.h"
|
||||||
|
+
|
||||||
|
+#ifndef MLIB_SELF_SPECS
|
||||||
|
+#define MLIB_SELF_SPECS ""
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#define DRIVER_HANDLE_MACHINE_OPTIONS \
|
||||||
|
+ " %(early_self_spec)", \
|
||||||
|
+ MLIB_SELF_SPECS \
|
||||||
|
" %:driver_init()" \
|
||||||
|
" %{c|S|E|nostdlib: %:set_no_link()}" \
|
||||||
|
" %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index c7e91a06d..a443a6427 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -64,56 +64,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define NM_FLAGS "-Bn"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
-/* SUBTARGET_ASM_SPEC is always passed to the assembler. It may be
|
||||||
|
- overridden by subtargets. */
|
||||||
|
-
|
||||||
|
-#ifndef SUBTARGET_ASM_SPEC
|
||||||
|
-#define SUBTARGET_ASM_SPEC ""
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
-#undef ASM_SPEC
|
||||||
|
-#define ASM_SPEC "%{mabi=*} %{subtarget_asm_spec}"
|
||||||
|
-
|
||||||
|
-/* Extra switches sometimes passed to the linker. */
|
||||||
|
-
|
||||||
|
-#ifndef LINK_SPEC
|
||||||
|
-#define LINK_SPEC ""
|
||||||
|
-#endif /* LINK_SPEC defined */
|
||||||
|
-
|
||||||
|
-/* Specs for the compiler proper. */
|
||||||
|
-
|
||||||
|
-/* CC1_SPEC is the set of arguments to pass to the compiler proper. */
|
||||||
|
-
|
||||||
|
-#undef CC1_SPEC
|
||||||
|
-#define CC1_SPEC "%{,ada:-gnatea} %{m*} \
|
||||||
|
-%{G*} \
|
||||||
|
-%(subtarget_cc1_spec) %{,ada:-gnatez}"
|
||||||
|
-
|
||||||
|
-/* Preprocessor specs. */
|
||||||
|
-
|
||||||
|
-/* SUBTARGET_CPP_SPEC is passed to the preprocessor. It may be
|
||||||
|
- overridden by subtargets. */
|
||||||
|
-#ifndef SUBTARGET_CPP_SPEC
|
||||||
|
-#define SUBTARGET_CPP_SPEC ""
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
-#define CPP_SPEC "%(subtarget_cpp_spec)"
|
||||||
|
-
|
||||||
|
-/* This macro defines names of additional specifications to put in the specs
|
||||||
|
- that can be used in various specifications like CC1_SPEC. Its definition
|
||||||
|
- is an initializer with a subgrouping for each command option.
|
||||||
|
-
|
||||||
|
- Each subgrouping contains a string constant, that defines the
|
||||||
|
- specification name, and a string constant that used by the GCC driver
|
||||||
|
- program.
|
||||||
|
-
|
||||||
|
- Do not define this macro if it does not need to do anything. */
|
||||||
|
-
|
||||||
|
-#define EXTRA_SPECS \
|
||||||
|
- {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
|
||||||
|
- {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
|
||||||
|
- {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
|
||||||
|
-
|
||||||
|
/* Registers may have a prefix which can be ignored when matching
|
||||||
|
user asm and register definitions. */
|
||||||
|
#ifndef REGISTER_PREFIX
|
||||||
|
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
|
||||||
|
index 62a870b66..7cd7cde25 100644
|
||||||
|
--- a/gcc/config/loongarch/t-linux
|
||||||
|
+++ b/gcc/config/loongarch/t-linux
|
||||||
|
@@ -16,68 +16,16 @@
|
||||||
|
# along with GCC; see the file COPYING3. If not see
|
||||||
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
-# Multilib
|
||||||
|
-MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s
|
||||||
|
-MULTILIB_DIRNAMES = base/lp64d base/lp64f base/lp64s
|
||||||
|
-
|
||||||
|
-# The GCC driver always gets all abi-related options on the command line.
|
||||||
|
-# (see loongarch-driver.c:driver_get_normalized_m_opts)
|
||||||
|
-comma=,
|
||||||
|
-MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
|
||||||
|
- $(firstword $(subst /, ,$(mlib))))
|
||||||
|
-
|
||||||
|
-SPECS = specs.install
|
||||||
|
-
|
||||||
|
-# temporary self_spec when building libraries (e.g. libgcc)
|
||||||
|
-gen_mlib_spec = $(if $(word 2,$1),\
|
||||||
|
- %{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))})
|
||||||
|
-
|
||||||
|
-# clean up the result of DRIVER_SELF_SPEC to avoid conflict
|
||||||
|
-lib_build_self_spec = %<march=* %<mtune=* %<mcmodel=* %<mfpu=* %<msimd=*
|
||||||
|
-
|
||||||
|
-# append user-specified build options from --with-multilib-list
|
||||||
|
-lib_build_self_spec += $(foreach mlib,\
|
||||||
|
- $(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
|
||||||
|
- $(call gen_mlib_spec,$(subst /, ,$(mlib))))
|
||||||
|
-
|
||||||
|
-specs: specs.install
|
||||||
|
- sed '/^*self_spec:$$/{ n;s/^$$/$(lib_build_self_spec)/g; }' $< > $@
|
||||||
|
-
|
||||||
|
-# Do some preparation before regression tests:
|
||||||
|
-# remove lib-build-specs / make symlinks for the toplevel multilib variant
|
||||||
|
-
|
||||||
|
-LA_DEFAULT_MULTISUBDIR = $(shell $(GCC_FOR_TARGET) --print-multi-dir)
|
||||||
|
-.PHONY: remove-lib-specs
|
||||||
|
-check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs
|
||||||
|
-remove-lib-specs:
|
||||||
|
- -mv -f specs.install specs 2>/dev/null
|
||||||
|
- -mv $(LA_DEFAULT_MULTISUBDIR)/* ./
|
||||||
|
- -mkdir -p ../$(target_noncanonical)/`dirname $(LA_DEFAULT_MULTISUBDIR)`
|
||||||
|
- -$(LN_S) .. ../$(target_noncanonical)/$(LA_DEFAULT_MULTISUBDIR)
|
||||||
|
-
|
||||||
|
-# Multiarch
|
||||||
|
-ifneq ($(call if_multiarch,yes),yes)
|
||||||
|
- # Define LA_DISABLE_MULTIARCH if multiarch is disabled.
|
||||||
|
- tm_defines += LA_DISABLE_MULTIARCH
|
||||||
|
-else
|
||||||
|
- # Only define MULTIARCH_DIRNAME when multiarch is enabled,
|
||||||
|
- # or it would always introduce ${target} into the search path.
|
||||||
|
- MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET)
|
||||||
|
-endif
|
||||||
|
+MULTIOSDIR_lp64d := ../lib64$(call if_multiarch,:loongarch64-linux-gnu)
|
||||||
|
+MULTIOSDIR_lp64f := ../lib64/f32$(call if_multiarch,:loongarch64-linux-gnuf32)
|
||||||
|
+MULTIOSDIR_lp64s := ../lib64/sf$(call if_multiarch,:loongarch64-linux-gnusf)
|
||||||
|
|
||||||
|
# Don't define MULTILIB_OSDIRNAMES if multilib is disabled.
|
||||||
|
ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
|
||||||
|
|
||||||
|
- MULTILIB_OSDIRNAMES = \
|
||||||
|
- mabi.lp64d=../lib64$\
|
||||||
|
- $(call if_multiarch,:loongarch64-linux-gnu)
|
||||||
|
-
|
||||||
|
- MULTILIB_OSDIRNAMES += \
|
||||||
|
- mabi.lp64f=../lib64/f32$\
|
||||||
|
- $(call if_multiarch,:loongarch64-linux-gnuf32)
|
||||||
|
-
|
||||||
|
- MULTILIB_OSDIRNAMES += \
|
||||||
|
- mabi.lp64s=../lib64/sf$\
|
||||||
|
- $(call if_multiarch,:loongarch64-linux-gnusf)
|
||||||
|
+ MULTILIB_OSDIRNAMES = .=$(MULTIOSDIR_$(mlib_default))
|
||||||
|
+ MULTILIB_OSDIRNAMES += mabi.lp64d=$(MULTIOSDIR_lp64d)
|
||||||
|
+ MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f)
|
||||||
|
+ MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s)
|
||||||
|
|
||||||
|
endif
|
||||||
|
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
|
||||||
|
index e73f4f437..28cfb49df 100644
|
||||||
|
--- a/gcc/config/loongarch/t-loongarch
|
||||||
|
+++ b/gcc/config/loongarch/t-loongarch
|
||||||
|
@@ -16,7 +16,7 @@
|
||||||
|
# along with GCC; see the file COPYING3. If not see
|
||||||
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
-TM_H += $(srcdir)/config/loongarch/loongarch-driver.h
|
||||||
|
+TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h
|
||||||
|
OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
|
||||||
|
$(srcdir)/config/loongarch/loongarch-tune.h
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/t-multilib b/gcc/config/loongarch/t-multilib
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bf6c18298
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/t-multilib
|
||||||
|
@@ -0,0 +1,68 @@
|
||||||
|
+# Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+#
|
||||||
|
+# This file is part of GCC.
|
||||||
|
+#
|
||||||
|
+# GCC is free software; you can redistribute it and/or modify
|
||||||
|
+# it under the terms of the GNU General Public License as published by
|
||||||
|
+# the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+# any later version.
|
||||||
|
+#
|
||||||
|
+# GCC is distributed in the hope that it will be useful,
|
||||||
|
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+# GNU General Public License for more details.
|
||||||
|
+#
|
||||||
|
+# You should have received a copy of the GNU General Public License
|
||||||
|
+# along with GCC; see the file COPYING3. If not see
|
||||||
|
+# <http://www.gnu.org/licenses/>.
|
||||||
|
+
|
||||||
|
+# Helper definitions
|
||||||
|
+comma=,
|
||||||
|
+null :=
|
||||||
|
+space := $(null) #
|
||||||
|
+exclude_1st = $(wordlist 2,$(words $1),$1)
|
||||||
|
+
|
||||||
|
+# Common definitions
|
||||||
|
+mlib_all := lp64d lp64f lp64s
|
||||||
|
+$(foreach i,$(mlib_all),$(eval MULTISUBDIR_$i := base/$i))
|
||||||
|
+
|
||||||
|
+mlib_default := $(firstword $(subst $(comma), ,$(TM_MULTILIB_CONFIG)))
|
||||||
|
+mlib_all := $(filter-out $(mlib_default),$(mlib_all))
|
||||||
|
+
|
||||||
|
+MULTILIB_OPTIONS := $(subst $(space),/,$(foreach i,$(mlib_all),mabi=$(i)))
|
||||||
|
+MULTILIB_DIRNAMES := $(foreach i,$(mlib_all),$(MULTISUBDIR_$(i)))
|
||||||
|
+
|
||||||
|
+# Customize builds with --with-multilib-list
|
||||||
|
+MULTILIB_REQUIRED := $(foreach i,$(call exclude_1st,\
|
||||||
|
+ $(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\
|
||||||
|
+ $(firstword $(subst /, ,$(i))))
|
||||||
|
+
|
||||||
|
+## spec rules for building libraries, triggered by -fmultiflags
|
||||||
|
+gen_mlib_spec = $(if $(word 2,$1),\
|
||||||
|
+ %{$(firstword $1):$(patsubst %,-%,$(call exclude_1st,$1)}))
|
||||||
|
+
|
||||||
|
+lib_build_spec = $(foreach mlib,\
|
||||||
|
+ $(call exclude_1st,$(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\
|
||||||
|
+ $(call gen_mlib_spec,$(subst /, ,$(mlib))))
|
||||||
|
+
|
||||||
|
+default_mlib_spec := %{fmultiflags:%{!mabi=*:-mabi=$(mlib_default)}}
|
||||||
|
+lib_build_spec := %{fmultiflags:$(lib_build_spec)}
|
||||||
|
+
|
||||||
|
+ifneq ($(TM_MULTILIB_CONFIG),)
|
||||||
|
+loongarch-multilib.h:
|
||||||
|
+ @echo "#define MLIB_SELF_SPECS" \
|
||||||
|
+ "\"$(default_mlib_spec)\"," \
|
||||||
|
+ "\"$(lib_build_spec)\"," > $@
|
||||||
|
+else
|
||||||
|
+loongarch-multilib.h: ; @touch $@
|
||||||
|
+endif
|
||||||
|
+
|
||||||
|
+# Multiarch
|
||||||
|
+ifneq ($(call if_multiarch,yes),yes)
|
||||||
|
+ # Define LA_DISABLE_MULTIARCH if multiarch is disabled.
|
||||||
|
+ tm_defines += LA_DISABLE_MULTIARCH
|
||||||
|
+else
|
||||||
|
+ # Only define MULTIARCH_DIRNAME when multiarch is enabled,
|
||||||
|
+ # or it would always introduce ${target} into the search path.
|
||||||
|
+ MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET)
|
||||||
|
+endif
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
192
0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
Normal file
192
0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
Normal file
@ -0,0 +1,192 @@
|
|||||||
|
From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Fri, 15 Sep 2023 10:22:49 +0800
|
||||||
|
Subject: [PATCH 002/188] LoongArch: Check whether binutils supports the relax
|
||||||
|
function. If supported, explicit relocs are turned off by default.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.in: Regenerate.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in: Add compilation option
|
||||||
|
mrelax. And set the initial value of explicit-relocs according to the
|
||||||
|
detection status.
|
||||||
|
* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
|
||||||
|
--no-relax option to the linker.
|
||||||
|
* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
|
||||||
|
-mno-relax, pass the -mno-relax option to the assembler.
|
||||||
|
* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* configure: Regenerate.
|
||||||
|
* configure.ac: Add detection of support for binutils relax function.
|
||||||
|
---
|
||||||
|
gcc/config.in | 6 ++++
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++++-
|
||||||
|
gcc/config/loongarch/gnu-user.h | 3 +-
|
||||||
|
gcc/config/loongarch/loongarch-driver.h | 2 +-
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 +++
|
||||||
|
gcc/config/loongarch/loongarch.opt | 7 ++++-
|
||||||
|
gcc/configure | 31 +++++++++++++++++++
|
||||||
|
gcc/configure.ac | 4 +++
|
||||||
|
8 files changed, 60 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config.in b/gcc/config.in
|
||||||
|
index 0dff36199..0c55e67e7 100644
|
||||||
|
--- a/gcc/config.in
|
||||||
|
+++ b/gcc/config.in
|
||||||
|
@@ -637,6 +637,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if your assembler supports -mrelax option. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_AS_MRELAX_OPTION
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define if your assembler supports .mspabi_attribute. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_AS_MSPABI_ATTRIBUTE
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index 2ef1b1e3b..f18733c24 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
|
||||||
|
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
|
||||||
|
|
||||||
|
mexplicit-relocs
|
||||||
|
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
|
||||||
|
+Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
|
||||||
|
Use %reloc() assembly operators.
|
||||||
|
|
||||||
|
; The code model option names for -mcmodel.
|
||||||
|
@@ -214,3 +214,8 @@ Specify the code model.
|
||||||
|
mdirect-extern-access
|
||||||
|
Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
|
||||||
|
Avoid using the GOT to access external symbols.
|
||||||
|
+
|
||||||
|
+mrelax
|
||||||
|
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
+required to materialize symbol addresses.
|
||||||
|
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
|
||||||
|
index 44e4f2575..60ef75601 100644
|
||||||
|
--- a/gcc/config/loongarch/gnu-user.h
|
||||||
|
+++ b/gcc/config/loongarch/gnu-user.h
|
||||||
|
@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
"%{!shared: %{static} " \
|
||||||
|
"%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
|
||||||
|
"-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
|
||||||
|
- "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
|
||||||
|
+ "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \
|
||||||
|
+ "%{mno-relax: --no-relax}"
|
||||||
|
|
||||||
|
|
||||||
|
/* Similar to standard Linux, but adding -ffast-math support. */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
index e7d083677..59fa3263d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
#undef ASM_SPEC
|
||||||
|
#define ASM_SPEC \
|
||||||
|
- "%{mabi=*} %(subtarget_asm_spec)"
|
||||||
|
+ "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
|
||||||
|
|
||||||
|
|
||||||
|
extern const char*
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 624e246bb..f2b59abe6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define HAVE_AS_EXPLICIT_RELOCS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef HAVE_AS_MRELAX_OPTION
|
||||||
|
+#define HAVE_AS_MRELAX_OPTION 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#endif /* LOONGARCH_OPTS_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index f2d21c9f3..78f2baf3a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
|
||||||
|
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
|
||||||
|
|
||||||
|
mexplicit-relocs
|
||||||
|
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
|
||||||
|
+Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
|
||||||
|
Use %reloc() assembly operators.
|
||||||
|
|
||||||
|
; The code model option names for -mcmodel.
|
||||||
|
@@ -221,3 +221,8 @@ Specify the code model.
|
||||||
|
mdirect-extern-access
|
||||||
|
Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
|
||||||
|
Avoid using the GOT to access external symbols.
|
||||||
|
+
|
||||||
|
+mrelax
|
||||||
|
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
+required to materialize symbol addresses.
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 2a5d3aaf3..8ae8a924a 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
|
||||||
|
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5
|
||||||
|
+$as_echo_n "checking assembler for -mrelax option... " >&6; }
|
||||||
|
+if ${gcc_cv_as_loongarch_relax+:} false; then :
|
||||||
|
+ $as_echo_n "(cached) " >&6
|
||||||
|
+else
|
||||||
|
+ gcc_cv_as_loongarch_relax=no
|
||||||
|
+ if test x$gcc_cv_as != x; then
|
||||||
|
+ $as_echo '.text' > conftest.s
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5'
|
||||||
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
+ (eval $ac_try) 2>&5
|
||||||
|
+ ac_status=$?
|
||||||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
+ test $ac_status = 0; }; }
|
||||||
|
+ then
|
||||||
|
+ gcc_cv_as_loongarch_relax=yes
|
||||||
|
+ else
|
||||||
|
+ echo "configure: failed program was" >&5
|
||||||
|
+ cat conftest.s >&5
|
||||||
|
+ fi
|
||||||
|
+ rm -f conftest.o conftest.s
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5
|
||||||
|
+$as_echo "$gcc_cv_as_loongarch_relax" >&6; }
|
||||||
|
+if test $gcc_cv_as_loongarch_relax = yes; then
|
||||||
|
+
|
||||||
|
+$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
|
||||||
|
+
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index ba2bf1ffc..f7161e66e 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -5322,6 +5322,10 @@ x:
|
||||||
|
.cfi_endproc],,
|
||||||
|
[AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
|
||||||
|
[Define if your assembler supports eh_frame pcrel encoding.])])
|
||||||
|
+ gcc_GAS_CHECK_FEATURE([-mrelax option], gcc_cv_as_loongarch_relax,
|
||||||
|
+ [-mrelax], [.text],,
|
||||||
|
+ [AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
|
||||||
|
+ [Define if your assembler supports -mrelax option.])])
|
||||||
|
;;
|
||||||
|
s390*-*-*)
|
||||||
|
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
115
0003-Modify-gas-uleb128-support-test.patch
Normal file
115
0003-Modify-gas-uleb128-support-test.patch
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001
|
||||||
|
From: mengqinggang <mengqinggang@loongson.cn>
|
||||||
|
Date: Fri, 15 Sep 2023 12:04:04 +0800
|
||||||
|
Subject: [PATCH 003/188] Modify gas uleb128 support test
|
||||||
|
|
||||||
|
Some assemblers (GNU as for LoongArch) generates relocations for leb128
|
||||||
|
symbol arithmetic for relaxation, we need to disable relaxation probing
|
||||||
|
leb128 support then.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* configure: Regenerate.
|
||||||
|
* configure.ac: Checking assembler for -mno-relax support.
|
||||||
|
Disable relaxation when probing leb128 support.
|
||||||
|
|
||||||
|
co-authored-by: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
---
|
||||||
|
gcc/configure | 42 +++++++++++++++++++++++++++++++++++++++++-
|
||||||
|
gcc/configure.ac | 17 ++++++++++++++++-
|
||||||
|
2 files changed, 57 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 8ae8a924a..430d44dc3 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -24441,6 +24441,46 @@ _ACEOF
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
+# Some assemblers (GNU as for LoongArch) generates relocations for
|
||||||
|
+# leb128 symbol arithmetic for relaxation, we need to disable relaxation
|
||||||
|
+# probing leb128 support then.
|
||||||
|
+case $target in
|
||||||
|
+ loongarch*-*-*)
|
||||||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5
|
||||||
|
+$as_echo_n "checking assembler for -mno-relax support... " >&6; }
|
||||||
|
+if ${gcc_cv_as_mno_relax+:} false; then :
|
||||||
|
+ $as_echo_n "(cached) " >&6
|
||||||
|
+else
|
||||||
|
+ gcc_cv_as_mno_relax=no
|
||||||
|
+ if test x$gcc_cv_as != x; then
|
||||||
|
+ $as_echo '.text' > conftest.s
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5'
|
||||||
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
+ (eval $ac_try) 2>&5
|
||||||
|
+ ac_status=$?
|
||||||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
+ test $ac_status = 0; }; }
|
||||||
|
+ then
|
||||||
|
+ gcc_cv_as_mno_relax=yes
|
||||||
|
+ else
|
||||||
|
+ echo "configure: failed program was" >&5
|
||||||
|
+ cat conftest.s >&5
|
||||||
|
+ fi
|
||||||
|
+ rm -f conftest.o conftest.s
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5
|
||||||
|
+$as_echo "$gcc_cv_as_mno_relax" >&6; }
|
||||||
|
+if test $gcc_cv_as_mno_relax = yes; then
|
||||||
|
+ check_leb128_asflags=-mno-relax
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+ ;;
|
||||||
|
+ *)
|
||||||
|
+ check_leb128_asflags=
|
||||||
|
+ ;;
|
||||||
|
+esac
|
||||||
|
+
|
||||||
|
# Check if we have .[us]leb128, and support symbol arithmetic with it.
|
||||||
|
# Older versions of GAS and some non-GNU assemblers, have a bugs handling
|
||||||
|
# these directives, even when they appear to accept them.
|
||||||
|
@@ -24459,7 +24499,7 @@ L1:
|
||||||
|
L2:
|
||||||
|
.uleb128 0x8000000000000000
|
||||||
|
' > conftest.s
|
||||||
|
- if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5'
|
||||||
|
{ { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
(eval $ac_try) 2>&5
|
||||||
|
ac_status=$?
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index f7161e66e..4b24db190 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix)
|
||||||
|
|
||||||
|
gcc_AC_INITFINI_ARRAY
|
||||||
|
|
||||||
|
+# Some assemblers (GNU as for LoongArch) generates relocations for
|
||||||
|
+# leb128 symbol arithmetic for relaxation, we need to disable relaxation
|
||||||
|
+# probing leb128 support then.
|
||||||
|
+case $target in
|
||||||
|
+ loongarch*-*-*)
|
||||||
|
+ gcc_GAS_CHECK_FEATURE([-mno-relax support],
|
||||||
|
+ gcc_cv_as_mno_relax,[-mno-relax],[.text],,
|
||||||
|
+ [check_leb128_asflags=-mno-relax])
|
||||||
|
+ ;;
|
||||||
|
+ *)
|
||||||
|
+ check_leb128_asflags=
|
||||||
|
+ ;;
|
||||||
|
+esac
|
||||||
|
+
|
||||||
|
# Check if we have .[us]leb128, and support symbol arithmetic with it.
|
||||||
|
# Older versions of GAS and some non-GNU assemblers, have a bugs handling
|
||||||
|
# these directives, even when they appear to accept them.
|
||||||
|
-gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128,,
|
||||||
|
+gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128,
|
||||||
|
+[$check_leb128_asflags],
|
||||||
|
[ .data
|
||||||
|
.uleb128 L2 - L1
|
||||||
|
L1:
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
1310
0004-LoongArch-Optimizations-of-vector-construction.patch
Normal file
1310
0004-LoongArch-Optimizations-of-vector-construction.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,51 @@
|
|||||||
|
From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 2 Oct 2023 18:51:00 +0800
|
||||||
|
Subject: [PATCH 005/188] LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL
|
||||||
|
|
||||||
|
When I added copysign support for LoongArch (r13-3702), we did not have
|
||||||
|
a copysign RTL insn, so I had to use UNSPEC to represent the copysign
|
||||||
|
instruction. Now the copysign RTX code has been added in r14-1586, so
|
||||||
|
this patch removes those UNSPECs, and it uses the native RTL copysign
|
||||||
|
insn.
|
||||||
|
|
||||||
|
Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign
|
||||||
|
RTL" [1] from Michael Meissner.
|
||||||
|
|
||||||
|
[1]: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete.
|
||||||
|
(copysign<mode>3): Use copysign RTL instead of UNSPEC.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 6 ++----
|
||||||
|
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 63ff32e75..73e2cbe0b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -37,7 +37,6 @@
|
||||||
|
UNSPEC_FCLASS
|
||||||
|
UNSPEC_FMAX
|
||||||
|
UNSPEC_FMIN
|
||||||
|
- UNSPEC_FCOPYSIGN
|
||||||
|
UNSPEC_FTINT
|
||||||
|
UNSPEC_FTINTRM
|
||||||
|
UNSPEC_FTINTRP
|
||||||
|
@@ -1129,9 +1128,8 @@
|
||||||
|
|
||||||
|
(define_insn "copysign<mode>3"
|
||||||
|
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||||
|
- (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
|
||||||
|
- (match_operand:ANYF 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_FCOPYSIGN))]
|
||||||
|
+ (copysign:ANYF (match_operand:ANYF 1 "register_operand" "f")
|
||||||
|
+ (match_operand:ANYF 2 "register_operand" "f")))]
|
||||||
|
"TARGET_HARD_FLOAT"
|
||||||
|
"fcopysign.<fmt>\t%0,%1,%2"
|
||||||
|
[(set_attr "type" "fcopysign")
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,71 @@
|
|||||||
|
From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yang Yujie <yangyujie@loongson.cn>
|
||||||
|
Date: Wed, 11 Oct 2023 17:59:53 +0800
|
||||||
|
Subject: [PATCH 006/188] LoongArch: Adjust makefile dependency for loongarch
|
||||||
|
headers.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.gcc: Add loongarch-driver.h to tm_files.
|
||||||
|
* config/loongarch/loongarch.h: Do not include loongarch-driver.h.
|
||||||
|
* config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H)
|
||||||
|
instead of $(TM_H) for building generator programs.
|
||||||
|
---
|
||||||
|
gcc/config.gcc | 4 ++--
|
||||||
|
gcc/config/loongarch/loongarch.h | 3 ---
|
||||||
|
gcc/config/loongarch/t-loongarch | 3 ++-
|
||||||
|
3 files changed, 4 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||||
|
index e34a5fbb9..11ab620d0 100644
|
||||||
|
--- a/gcc/config.gcc
|
||||||
|
+++ b/gcc/config.gcc
|
||||||
|
@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*)
|
||||||
|
|
||||||
|
loongarch*-*-linux*)
|
||||||
|
tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
|
||||||
|
- tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
|
||||||
|
+ tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h"
|
||||||
|
extra_options="${extra_options} linux-android.opt"
|
||||||
|
tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
|
||||||
|
gnu_ld=yes
|
||||||
|
@@ -2521,7 +2521,7 @@ loongarch*-*-linux*)
|
||||||
|
|
||||||
|
loongarch*-*-elf*)
|
||||||
|
tm_file="elfos.h newlib-stdint.h ${tm_file}"
|
||||||
|
- tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
|
||||||
|
+ tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h"
|
||||||
|
tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
|
||||||
|
gnu_ld=yes
|
||||||
|
gas=yes
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index a443a6427..a2dc4ba8c 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
|
||||||
|
#define TARGET_LIBGCC_SDATA_SECTION ".sdata"
|
||||||
|
|
||||||
|
-/* Driver native functions for SPEC processing in the GCC driver. */
|
||||||
|
-#include "loongarch-driver.h"
|
||||||
|
-
|
||||||
|
/* This definition replaces the formerly used 'm' constraint with a
|
||||||
|
different constraint letter in order to avoid changing semantics of
|
||||||
|
the 'm' constraint when accepting new address formats in
|
||||||
|
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
|
||||||
|
index 28cfb49df..12734c37b 100644
|
||||||
|
--- a/gcc/config/loongarch/t-loongarch
|
||||||
|
+++ b/gcc/config/loongarch/t-loongarch
|
||||||
|
@@ -16,7 +16,8 @@
|
||||||
|
# along with GCC; see the file COPYING3. If not see
|
||||||
|
# <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
-TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h
|
||||||
|
+
|
||||||
|
+GTM_H += loongarch-multilib.h
|
||||||
|
OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
|
||||||
|
$(srcdir)/config/loongarch/loongarch-tune.h
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
65
0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
Normal file
65
0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Fri, 25 Oct 2024 00:58:01 +0000
|
||||||
|
Subject: [PATCH 007/188] LoongArch: Enable vect.exp for LoongArch. [PR111424]
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/111424
|
||||||
|
* lib/target-supports.exp: Enable vect.exp for LoongArch.
|
||||||
|
---
|
||||||
|
gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++
|
||||||
|
1 file changed, 31 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
index 192e0aded..bbe145c1c 100644
|
||||||
|
--- a/gcc/testsuite/lib/target-supports.exp
|
||||||
|
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } {
|
||||||
|
}
|
||||||
|
} elseif [istarget amdgcn-*-*] {
|
||||||
|
set dg-do-what-default run
|
||||||
|
+ } elseif [istarget loongarch*-*-*] {
|
||||||
|
+ lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
|
||||||
|
+ if [check_effective_target_loongarch_asx_hw] {
|
||||||
|
+ set dg-do-what-default run
|
||||||
|
+ } else {
|
||||||
|
+ set dg-do-what-default compile
|
||||||
|
+ }
|
||||||
|
} else {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } {
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
+proc check_effective_target_loongarch_sx_hw { } {
|
||||||
|
+ return [check_runtime loongarch_sx_hw {
|
||||||
|
+ #include <lsxintrin.h>
|
||||||
|
+ int main (void)
|
||||||
|
+ {
|
||||||
|
+ __m128i a, b, c;
|
||||||
|
+ c = __lsx_vand_v (a, b);
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ } "-mlsx"]
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+proc check_effective_target_loongarch_asx_hw { } {
|
||||||
|
+ return [check_runtime loongarch_asx_hw {
|
||||||
|
+ #include <lasxintrin.h>
|
||||||
|
+ int main (void)
|
||||||
|
+ {
|
||||||
|
+ __m256i a, b, c;
|
||||||
|
+ c = __lasx_xvand_v (a, b);
|
||||||
|
+ return 0;
|
||||||
|
+ }
|
||||||
|
+ } "-mlasx"]
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
# Return 1 if the target does *not* require strict alignment.
|
||||||
|
|
||||||
|
proc check_effective_target_non_strict_align {} {
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,48 @@
|
|||||||
|
From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Fri, 15 Sep 2023 11:56:01 +0800
|
||||||
|
Subject: [PATCH 008/188] LoongArch: Delete macro definition
|
||||||
|
ASM_OUTPUT_ALIGN_WITH_NOP.
|
||||||
|
|
||||||
|
There are two reasons for removing this macro definition:
|
||||||
|
1. The default in the assembler is to use the nop instruction for filling.
|
||||||
|
2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]]
|
||||||
|
The third expression it is the maximum number of bytes that should be
|
||||||
|
skipped by this alignment directive.
|
||||||
|
Therefore, it will affect the display of the specified alignment rules
|
||||||
|
and affect the operating efficiency.
|
||||||
|
|
||||||
|
This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
|
||||||
|
(Since the assembler will add nop based on the .align information when doing relax,
|
||||||
|
it will cause the conditional branch to go out of bounds during the assembly process.
|
||||||
|
This submission of binutils solves this problem.)
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
|
||||||
|
Delete.
|
||||||
|
|
||||||
|
Co-authored-by: Chenghua Xu <xuchenghua@loongson.cn>
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.h | 5 -----
|
||||||
|
1 file changed, 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index a2dc4ba8c..572b538be 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -1058,11 +1058,6 @@ typedef struct {
|
||||||
|
|
||||||
|
#define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
|
||||||
|
|
||||||
|
-/* "nop" instruction 54525952 (andi $r0,$r0,0) is
|
||||||
|
- used for padding. */
|
||||||
|
-#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
|
||||||
|
- fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
|
||||||
|
-
|
||||||
|
/* This is how to output an assembler line to advance the location
|
||||||
|
counter by SIZE bytes. */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
105
0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
Normal file
105
0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
Normal file
@ -0,0 +1,105 @@
|
|||||||
|
From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Wed, 11 Oct 2023 16:41:25 +0800
|
||||||
|
Subject: [PATCH 009/188] LoongArch: Fix vec_initv32qiv16qi template to avoid
|
||||||
|
ICE.
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
Following test code triggers unrecognized insn ICE on LoongArch target
|
||||||
|
with "-O3 -mlasx":
|
||||||
|
|
||||||
|
void
|
||||||
|
foo (unsigned char *dst, unsigned char *src)
|
||||||
|
{
|
||||||
|
for (int y = 0; y < 16; y++)
|
||||||
|
{
|
||||||
|
for (int x = 0; x < 16; x++)
|
||||||
|
dst[x] = src[x] + 1;
|
||||||
|
dst += 32;
|
||||||
|
src += 32;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ICE info:
|
||||||
|
./test.c: In function ‘foo’:
|
||||||
|
./test.c:8:1: error: unrecognizable insn:
|
||||||
|
8 | }
|
||||||
|
| ^
|
||||||
|
(insn 15 14 16 4 (set (reg:V32QI 185 [ vect__24.7 ])
|
||||||
|
(vec_concat:V32QI (reg:V16QI 186)
|
||||||
|
(const_vector:V16QI [
|
||||||
|
(const_int 0 [0]) repeated x16
|
||||||
|
]))) "./test.c":4:19 -1
|
||||||
|
(nil))
|
||||||
|
during RTL pass: vregs
|
||||||
|
./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791
|
||||||
|
0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108
|
||||||
|
0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116
|
||||||
|
0x120a03c5b extract_insn(rtx_insn*)
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/recog.cc:2791
|
||||||
|
0x12067ff73 instantiate_virtual_regs_in_insn
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/function.cc:1610
|
||||||
|
0x12067ff73 instantiate_virtual_regs
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/function.cc:1983
|
||||||
|
0x12067ff73 execute
|
||||||
|
/home/panchenghui/upstream/gcc/gcc/function.cc:2030
|
||||||
|
|
||||||
|
This RTL is generated inside loongarch_expand_vector_group_init function (related
|
||||||
|
to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments
|
||||||
|
are register type. This patch adds force_reg() to the vec_concat argument generation.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_expand_vector_group_init):
|
||||||
|
fix impl related to vec_initv32qiv16qi template to avoid ICE.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 3 ++-
|
||||||
|
.../loongarch/vector/lasx/lasx-vec-init-1.c | 14 ++++++++++++++
|
||||||
|
2 files changed, 16 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 760b12268..9a629a999 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
|
||||||
|
void
|
||||||
|
loongarch_expand_vector_group_init (rtx target, rtx vals)
|
||||||
|
{
|
||||||
|
- rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
|
||||||
|
+ rtx ops[2] = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)),
|
||||||
|
+ force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) };
|
||||||
|
emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0],
|
||||||
|
ops[1])));
|
||||||
|
}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..28be32982
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3" } */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo (unsigned char *dst, unsigned char *src)
|
||||||
|
+{
|
||||||
|
+ for (int y = 0; y < 16; y++)
|
||||||
|
+ {
|
||||||
|
+ for (int x = 0; x < 16; x++)
|
||||||
|
+ dst[x] = src[x] + 1;
|
||||||
|
+ dst += 32;
|
||||||
|
+ src += 32;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,35 @@
|
|||||||
|
From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 17 Oct 2023 21:55:05 +0800
|
||||||
|
Subject: [PATCH 010/188] LoongArch: Use fcmp.caf.s instead of movgr2cf for
|
||||||
|
zeroing a fcc
|
||||||
|
|
||||||
|
During the review of an LLVM change [1], on LA464 we found that zeroing
|
||||||
|
an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0.
|
||||||
|
|
||||||
|
[1]: https://github.com/llvm/llvm-project/pull/69300
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for
|
||||||
|
zeroing a fcc.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 73e2cbe0b..5f9e63d66 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2150,7 +2150,7 @@
|
||||||
|
[(set (match_operand:FCC 0 "register_operand" "=z")
|
||||||
|
(const_int 0))]
|
||||||
|
""
|
||||||
|
- "movgr2cf\t%0,$r0")
|
||||||
|
+ "fcmp.caf.s\t%0,$f0,$f0")
|
||||||
|
|
||||||
|
;; Conditional move instructions.
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
389
0011-LoongArch-Implement-avg-and-sad-standard-names.patch
Normal file
389
0011-LoongArch-Implement-avg-and-sad-standard-names.patch
Normal file
@ -0,0 +1,389 @@
|
|||||||
|
From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 18 Oct 2023 17:36:12 +0800
|
||||||
|
Subject: [PATCH 011/188] LoongArch:Implement avg and sad standard names.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md
|
||||||
|
(avg<mode>3_ceil): New patterns.
|
||||||
|
(uavg<mode>3_ceil): Ditto.
|
||||||
|
(avg<mode>3_floor): Ditto.
|
||||||
|
(uavg<mode>3_floor): Ditto.
|
||||||
|
(usadv32qi): Ditto.
|
||||||
|
(ssadv32qi): Ditto.
|
||||||
|
* config/loongarch/lsx.md
|
||||||
|
(avg<mode>3_ceil): New patterns.
|
||||||
|
(uavg<mode>3_ceil): Ditto.
|
||||||
|
(avg<mode>3_floor): Ditto.
|
||||||
|
(uavg<mode>3_floor): Ditto.
|
||||||
|
(usadv16qi): Ditto.
|
||||||
|
(ssadv16qi): Ditto.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/avg-ceil-lasx.c: New test.
|
||||||
|
* gcc.target/loongarch/avg-ceil-lsx.c: New test.
|
||||||
|
* gcc.target/loongarch/avg-floor-lasx.c: New test.
|
||||||
|
* gcc.target/loongarch/avg-floor-lsx.c: New test.
|
||||||
|
* gcc.target/loongarch/sad-lasx.c: New test.
|
||||||
|
* gcc.target/loongarch/sad-lsx.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 78 +++++++++++++++++++
|
||||||
|
gcc/config/loongarch/lsx.md | 78 +++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/avg-ceil-lasx.c | 22 ++++++
|
||||||
|
.../gcc.target/loongarch/avg-ceil-lsx.c | 22 ++++++
|
||||||
|
.../gcc.target/loongarch/avg-floor-lasx.c | 22 ++++++
|
||||||
|
.../gcc.target/loongarch/avg-floor-lsx.c | 22 ++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/sad-lsx.c | 20 +++++
|
||||||
|
8 files changed, 284 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 2bc5d47ed..c7496d68a 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -5171,3 +5171,81 @@
|
||||||
|
const0_rtx));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
+
|
||||||
|
+(define_expand "avg<mode>3_ceil"
|
||||||
|
+ [(match_operand:ILASX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lasx_xvavgr_s_<lasxfmt> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "uavg<mode>3_ceil"
|
||||||
|
+ [(match_operand:ILASX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lasx_xvavgr_u_<lasxfmt_u> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "avg<mode>3_floor"
|
||||||
|
+ [(match_operand:ILASX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lasx_xvavg_s_<lasxfmt> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "uavg<mode>3_floor"
|
||||||
|
+ [(match_operand:ILASX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILASX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lasx_xvavg_u_<lasxfmt_u> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "usadv32qi"
|
||||||
|
+ [(match_operand:V8SI 0 "register_operand")
|
||||||
|
+ (match_operand:V32QI 1 "register_operand")
|
||||||
|
+ (match_operand:V32QI 2 "register_operand")
|
||||||
|
+ (match_operand:V8SI 3 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ rtx t1 = gen_reg_rtx (V32QImode);
|
||||||
|
+ rtx t2 = gen_reg_rtx (V16HImode);
|
||||||
|
+ rtx t3 = gen_reg_rtx (V8SImode);
|
||||||
|
+ emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "ssadv32qi"
|
||||||
|
+ [(match_operand:V8SI 0 "register_operand")
|
||||||
|
+ (match_operand:V32QI 1 "register_operand")
|
||||||
|
+ (match_operand:V32QI 2 "register_operand")
|
||||||
|
+ (match_operand:V8SI 3 "register_operand")]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ rtx t1 = gen_reg_rtx (V32QImode);
|
||||||
|
+ rtx t2 = gen_reg_rtx (V16HImode);
|
||||||
|
+ rtx t3 = gen_reg_rtx (V8SImode);
|
||||||
|
+ emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 075f6ba56..b4e92ae9c 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -3581,6 +3581,84 @@
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_expand "avg<mode>3_ceil"
|
||||||
|
+ [(match_operand:ILSX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lsx_vavgr_s_<lsxfmt> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "uavg<mode>3_ceil"
|
||||||
|
+ [(match_operand:ILSX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lsx_vavgr_u_<lsxfmt_u> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "avg<mode>3_floor"
|
||||||
|
+ [(match_operand:ILSX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lsx_vavg_s_<lsxfmt> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "uavg<mode>3_floor"
|
||||||
|
+ [(match_operand:ILSX_WHB 0 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 1 "register_operand")
|
||||||
|
+ (match_operand:ILSX_WHB 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ emit_insn (gen_lsx_vavg_u_<lsxfmt_u> (operands[0],
|
||||||
|
+ operands[1], operands[2]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "usadv16qi"
|
||||||
|
+ [(match_operand:V4SI 0 "register_operand")
|
||||||
|
+ (match_operand:V16QI 1 "register_operand")
|
||||||
|
+ (match_operand:V16QI 2 "register_operand")
|
||||||
|
+ (match_operand:V4SI 3 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ rtx t1 = gen_reg_rtx (V16QImode);
|
||||||
|
+ rtx t2 = gen_reg_rtx (V8HImode);
|
||||||
|
+ rtx t3 = gen_reg_rtx (V4SImode);
|
||||||
|
+ emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "ssadv16qi"
|
||||||
|
+ [(match_operand:V4SI 0 "register_operand")
|
||||||
|
+ (match_operand:V16QI 1 "register_operand")
|
||||||
|
+ (match_operand:V16QI 2 "register_operand")
|
||||||
|
+ (match_operand:V4SI 3 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ rtx t1 = gen_reg_rtx (V16QImode);
|
||||||
|
+ rtx t2 = gen_reg_rtx (V8HImode);
|
||||||
|
+ rtx t3 = gen_reg_rtx (V4SImode);
|
||||||
|
+ emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
(define_insn "lsx_v<optab>wev_d_w<u>"
|
||||||
|
[(set (match_operand:V2DI 0 "register_operand" "=f")
|
||||||
|
(addsubmul:V2DI
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..16db7bf72
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavgr.b" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavgr.bu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavgr.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavgr.h" } } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(TYPE, NAME) \
|
||||||
|
+ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \
|
||||||
|
+ void f_##NAME (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(char, 1);
|
||||||
|
+TEST(short, 2);
|
||||||
|
+TEST(unsigned char, 3);
|
||||||
|
+TEST(unsigned short, 4);
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..94119c23b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlsx" } */
|
||||||
|
+/* { dg-final { scan-assembler "vavgr.b" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavgr.bu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavgr.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavgr.h" } } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(TYPE, NAME) \
|
||||||
|
+ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \
|
||||||
|
+ void f_##NAME (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(char, 1);
|
||||||
|
+TEST(short, 2);
|
||||||
|
+TEST(unsigned char, 3);
|
||||||
|
+TEST(unsigned short, 4);
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..da6896531
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavg.b" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavg.bu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavg.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvavg.h" } } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(TYPE, NAME) \
|
||||||
|
+ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \
|
||||||
|
+ void f_##NAME (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(char, 1);
|
||||||
|
+TEST(short, 2);
|
||||||
|
+TEST(unsigned char, 3);
|
||||||
|
+TEST(unsigned short, 4);
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bbb9db527
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
|
||||||
|
@@ -0,0 +1,22 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlsx" } */
|
||||||
|
+/* { dg-final { scan-assembler "vavg.b" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavg.bu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavg.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vavg.h" } } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(TYPE, NAME) \
|
||||||
|
+ TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N]; \
|
||||||
|
+ void f_##NAME (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(char, 1);
|
||||||
|
+TEST(short, 2);
|
||||||
|
+TEST(unsigned char, 3);
|
||||||
|
+TEST(unsigned short, 4);
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lasx.c b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..6c0cdfd97
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(SIGN) \
|
||||||
|
+ SIGN char a_##SIGN[N], b_##SIGN[N]; \
|
||||||
|
+ int f_##SIGN (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i, sum = 0; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);; \
|
||||||
|
+ return sum; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(signed);
|
||||||
|
+TEST(unsigned);
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler {\txvabsd.bu\t} } } */
|
||||||
|
+/* { dg-final { scan-assembler {\txvabsd.b\t} } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lsx.c b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..b92110a8b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c
|
||||||
|
@@ -0,0 +1,20 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlsx" } */
|
||||||
|
+
|
||||||
|
+#define N 1024
|
||||||
|
+
|
||||||
|
+#define TEST(SIGN) \
|
||||||
|
+ SIGN char a_##SIGN[N], b_##SIGN[N]; \
|
||||||
|
+ int f_##SIGN (void) \
|
||||||
|
+ { \
|
||||||
|
+ int i, sum = 0; \
|
||||||
|
+ for (i = 0; i < N; i++) \
|
||||||
|
+ sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);; \
|
||||||
|
+ return sum; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+TEST(signed);
|
||||||
|
+TEST(unsigned);
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler {\tvabsd.bu\t} } } */
|
||||||
|
+/* { dg-final { scan-assembler {\tvabsd.b\t} } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
403
0012-LoongArch-Implement-vec_widen-standard-names.patch
Normal file
403
0012-LoongArch-Implement-vec_widen-standard-names.patch
Normal file
@ -0,0 +1,403 @@
|
|||||||
|
From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 18 Oct 2023 17:39:40 +0800
|
||||||
|
Subject: [PATCH 012/188] LoongArch:Implement vec_widen standard names.
|
||||||
|
|
||||||
|
Add support for vec_widen lo/hi patterns. These do not directly
|
||||||
|
match on Loongarch lasx instructions but can be emulated with
|
||||||
|
even/odd + vector merge.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md
|
||||||
|
(vec_widen_<su>mult_even_v8si): New patterns.
|
||||||
|
(vec_widen_<su>add_hi_<mode>): Ditto.
|
||||||
|
(vec_widen_<su>add_lo_<mode>): Ditto.
|
||||||
|
(vec_widen_<su>sub_hi_<mode>): Ditto.
|
||||||
|
(vec_widen_<su>sub_lo_<mode>): Ditto.
|
||||||
|
(vec_widen_<su>mult_hi_<mode>): Ditto.
|
||||||
|
(vec_widen_<su>mult_lo_<mode>): Ditto.
|
||||||
|
* config/loongarch/loongarch.md (u_bool): New iterator.
|
||||||
|
* config/loongarch/loongarch-protos.h
|
||||||
|
(loongarch_expand_vec_widen_hilo): New prototype.
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_expand_vec_interleave): New function.
|
||||||
|
(loongarch_expand_vec_widen_hilo): New function.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-widen-add.c: New test.
|
||||||
|
* gcc.target/loongarch/vect-widen-mul.c: New test.
|
||||||
|
* gcc.target/loongarch/vect-widen-sub.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 82 ++++++++---
|
||||||
|
gcc/config/loongarch/loongarch-protos.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 137 ++++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch.md | 2 +
|
||||||
|
.../gcc.target/loongarch/vect-widen-add.c | 24 +++
|
||||||
|
.../gcc.target/loongarch/vect-widen-mul.c | 24 +++
|
||||||
|
.../gcc.target/loongarch/vect-widen-sub.c | 24 +++
|
||||||
|
7 files changed, 277 insertions(+), 17 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index c7496d68a..442fda246 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -5048,23 +5048,71 @@
|
||||||
|
[(set_attr "type" "simd_store")
|
||||||
|
(set_attr "mode" "DI")])
|
||||||
|
|
||||||
|
-(define_insn "vec_widen_<su>mult_even_v8si"
|
||||||
|
- [(set (match_operand:V4DI 0 "register_operand" "=f")
|
||||||
|
- (mult:V4DI
|
||||||
|
- (any_extend:V4DI
|
||||||
|
- (vec_select:V4SI
|
||||||
|
- (match_operand:V8SI 1 "register_operand" "%f")
|
||||||
|
- (parallel [(const_int 0) (const_int 2)
|
||||||
|
- (const_int 4) (const_int 6)])))
|
||||||
|
- (any_extend:V4DI
|
||||||
|
- (vec_select:V4SI
|
||||||
|
- (match_operand:V8SI 2 "register_operand" "f")
|
||||||
|
- (parallel [(const_int 0) (const_int 2)
|
||||||
|
- (const_int 4) (const_int 6)])))))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvmulwev.d.w<u>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
+(define_expand "vec_widen_<su>add_hi_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, true, "add");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "vec_widen_<su>add_lo_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, false, "add");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "vec_widen_<su>sub_hi_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, true, "sub");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "vec_widen_<su>sub_lo_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, false, "sub");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "vec_widen_<su>mult_hi_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, true, "mult");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_expand "vec_widen_<su>mult_lo_<mode>"
|
||||||
|
+ [(match_operand:<VDMODE256> 0 "register_operand")
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
|
||||||
|
+ (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
|
||||||
|
+ <u_bool>, false, "mult");
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
|
||||||
|
;; Vector reduction operation
|
||||||
|
(define_expand "reduc_plus_scal_v4di"
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
index ea61cf567..163162598 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void);
|
||||||
|
extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
|
||||||
|
extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode,
|
||||||
|
rtx *);
|
||||||
|
+extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *);
|
||||||
|
|
||||||
|
/* Routines implemented in loongarch-c.c. */
|
||||||
|
void loongarch_cpu_cpp_builtins (cpp_reader *);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 9a629a999..c0f58f9a9 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
|
||||||
|
return loongarch_expand_vec_perm_even_odd_1 (d, odd);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static void
|
||||||
|
+loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
|
||||||
|
+{
|
||||||
|
+ struct expand_vec_perm_d d;
|
||||||
|
+ unsigned i, nelt, base;
|
||||||
|
+ bool ok;
|
||||||
|
+
|
||||||
|
+ d.target = target;
|
||||||
|
+ d.op0 = op0;
|
||||||
|
+ d.op1 = op1;
|
||||||
|
+ d.vmode = GET_MODE (target);
|
||||||
|
+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
|
||||||
|
+ d.one_vector_p = false;
|
||||||
|
+ d.testing_p = false;
|
||||||
|
+
|
||||||
|
+ base = high_p ? nelt / 2 : 0;
|
||||||
|
+ for (i = 0; i < nelt / 2; ++i)
|
||||||
|
+ {
|
||||||
|
+ d.perm[i * 2] = i + base;
|
||||||
|
+ d.perm[i * 2 + 1] = i + base + nelt;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ ok = loongarch_expand_vec_perm_interleave (&d);
|
||||||
|
+ gcc_assert (ok);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd
|
||||||
|
+ parts of the double sized result elements in the corresponding elements of
|
||||||
|
+ the target register. That's NOT what the vec_widen_umult_lo/hi patterns are
|
||||||
|
+ expected to do. We emulate the widening lo/hi multiplies with the even/odd
|
||||||
|
+ versions followed by a vector merge. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
|
||||||
|
+ bool uns_p, bool high_p, const char *optab)
|
||||||
|
+{
|
||||||
|
+ machine_mode wmode = GET_MODE (dest);
|
||||||
|
+ machine_mode mode = GET_MODE (op1);
|
||||||
|
+ rtx t1, t2, t3;
|
||||||
|
+
|
||||||
|
+ t1 = gen_reg_rtx (wmode);
|
||||||
|
+ t2 = gen_reg_rtx (wmode);
|
||||||
|
+ t3 = gen_reg_rtx (wmode);
|
||||||
|
+ switch (mode)
|
||||||
|
+ {
|
||||||
|
+ case V16HImode:
|
||||||
|
+ if (!strcmp (optab, "add"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else if (!strcmp (optab, "mult"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else if (!strcmp (optab, "sub"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ case V32QImode:
|
||||||
|
+ if (!strcmp (optab, "add"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else if (!strcmp (optab, "mult"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ else if (!strcmp (optab, "sub"))
|
||||||
|
+ {
|
||||||
|
+ if (!uns_p)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2));
|
||||||
|
+ emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2));
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_expand_vec_interleave (t3, t1, t2, high_p);
|
||||||
|
+ emit_move_insn (dest, gen_lowpart (wmode, t3));
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Expand a variable vector permutation for LASX. */
|
||||||
|
|
||||||
|
void
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 5f9e63d66..29ac950bf 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -509,6 +509,8 @@
|
||||||
|
;; <su> is like <u>, but the signed form expands to "s" rather than "".
|
||||||
|
(define_code_attr su [(sign_extend "s") (zero_extend "u")])
|
||||||
|
|
||||||
|
+(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
|
||||||
|
+
|
||||||
|
;; <optab> expands to the name of the optab for a particular code.
|
||||||
|
(define_code_attr optab [(ashift "ashl")
|
||||||
|
(ashiftrt "ashr")
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..0bf832d0e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
|
||||||
|
@@ -0,0 +1,24 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvaddwev.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvaddwod.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvaddwev.w.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvaddwod.w.hu" } } */
|
||||||
|
+
|
||||||
|
+#include <stdint.h>
|
||||||
|
+
|
||||||
|
+#define SIZE 1024
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_uadd (uint32_t *foo, uint16_t *a, uint16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] + b[i];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_sadd (int32_t *foo, int16_t *a, int16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] + b[i];
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..84b020eea
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
|
||||||
|
@@ -0,0 +1,24 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvmulwev.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvmulwod.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvmulwev.w.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvmulwod.w.hu" } } */
|
||||||
|
+
|
||||||
|
+#include <stdint.h>
|
||||||
|
+
|
||||||
|
+#define SIZE 1024
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_umul (uint32_t *foo, uint16_t *a, uint16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] * b[i];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_smul (int32_t *foo, int16_t *a, int16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] * b[i];
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..69fc3a517
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
|
||||||
|
@@ -0,0 +1,24 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvsubwev.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvsubwod.w.h" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvsubwev.w.hu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvsubwod.w.hu" } } */
|
||||||
|
+
|
||||||
|
+#include <stdint.h>
|
||||||
|
+
|
||||||
|
+#define SIZE 1024
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_usub (uint32_t *foo, uint16_t *a, uint16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] - b[i];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+wide_ssub (int32_t *foo, int16_t *a, int16_t *b)
|
||||||
|
+{
|
||||||
|
+ for ( int i = 0; i < SIZE; i++)
|
||||||
|
+ foo[i] = a[i] - b[i];
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
354
0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
Normal file
354
0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
Normal file
@ -0,0 +1,354 @@
|
|||||||
|
From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 18 Oct 2023 17:43:39 +0800
|
||||||
|
Subject: [PATCH 013/188] LoongArch:Implement the new vector cost model
|
||||||
|
framework.
|
||||||
|
|
||||||
|
This patch make loongarch use the new vector hooks and implements the costing
|
||||||
|
function determine_suggested_unroll_factor, to make it be able to suggest the
|
||||||
|
unroll factor for a given loop being vectorized base vec_ops analysis during
|
||||||
|
vector costing and the available issue information. Referring to aarch64 and
|
||||||
|
rs6000 port.
|
||||||
|
|
||||||
|
The patch also reduces the cost of unaligned stores, making it equal to the
|
||||||
|
cost of aligned ones in order to avoid odd alignment peeling.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from
|
||||||
|
vector_costs. Add a constructor.
|
||||||
|
(loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to
|
||||||
|
adjust the cost for inner loops.
|
||||||
|
(loongarch_vector_costs::count_operations): New function.
|
||||||
|
(loongarch_vector_costs::determine_suggested_unroll_factor): Ditto.
|
||||||
|
(loongarch_vector_costs::finish_cost): Ditto.
|
||||||
|
(loongarch_builtin_vectorization_cost): Adjust.
|
||||||
|
* config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter.
|
||||||
|
(loongarcg-vect-issue-info): Ditto.
|
||||||
|
(mmemvec-cost): Delete.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in
|
||||||
|
(loongarch-vect-unroll-limit): Ditto.
|
||||||
|
(loongarcg-vect-issue-info): Ditto.
|
||||||
|
(mmemvec-cost): Delete.
|
||||||
|
* doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 15 +-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 173 ++++++++++++++++--
|
||||||
|
gcc/config/loongarch/loongarch.opt | 15 +-
|
||||||
|
gcc/doc/invoke.texi | 7 +
|
||||||
|
4 files changed, 188 insertions(+), 22 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index f18733c24..74cf4a7f7 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -152,10 +152,6 @@ mbranch-cost=
|
||||||
|
Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
|
||||||
|
-mbranch-cost=COST Set the cost of branches to roughly COST instructions.
|
||||||
|
|
||||||
|
-mmemvec-cost=
|
||||||
|
-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
|
||||||
|
-mmemvec-cost=COST Set the cost of vector memory access instructions.
|
||||||
|
-
|
||||||
|
mcheck-zero-division
|
||||||
|
Target Mask(CHECK_ZERO_DIV)
|
||||||
|
Trap on integer divide by zero.
|
||||||
|
@@ -219,3 +215,14 @@ mrelax
|
||||||
|
Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
required to materialize symbol addresses.
|
||||||
|
+
|
||||||
|
+-param=loongarch-vect-unroll-limit=
|
||||||
|
+Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
|
||||||
|
+Used to limit unroll factor which indicates how much the autovectorizer may
|
||||||
|
+unroll a loop. The default value is 6.
|
||||||
|
+
|
||||||
|
+-param=loongarch-vect-issue-info=
|
||||||
|
+Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
|
||||||
|
+Indicate how many non memory access vector instructions can be issued per
|
||||||
|
+cycle, it's used in unroll factor determination for autovectorizer. The
|
||||||
|
+default value is 4.
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index c0f58f9a9..e22a64600 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "rtl-iter.h"
|
||||||
|
#include "opts.h"
|
||||||
|
#include "function-abi.h"
|
||||||
|
+#include "cfgloop.h"
|
||||||
|
+#include "tree-vectorizer.h"
|
||||||
|
|
||||||
|
/* This file should be included last. */
|
||||||
|
#include "target-def.h"
|
||||||
|
@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Vectorizer cost model implementation. */
|
||||||
|
-
|
||||||
|
/* Implement targetm.vectorize.builtin_vectorization_cost. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
|
case vector_load:
|
||||||
|
case vec_to_scalar:
|
||||||
|
case scalar_to_vec:
|
||||||
|
- case cond_branch_not_taken:
|
||||||
|
- case vec_promote_demote:
|
||||||
|
case scalar_store:
|
||||||
|
case vector_store:
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
+ case vec_promote_demote:
|
||||||
|
case vec_perm:
|
||||||
|
return LASX_SUPPORTED_MODE_P (mode)
|
||||||
|
&& !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
|
||||||
|
|
||||||
|
case unaligned_load:
|
||||||
|
- case vector_gather_load:
|
||||||
|
- return 2;
|
||||||
|
-
|
||||||
|
case unaligned_store:
|
||||||
|
- case vector_scatter_store:
|
||||||
|
- return 10;
|
||||||
|
+ return 2;
|
||||||
|
|
||||||
|
case cond_branch_taken:
|
||||||
|
- return 3;
|
||||||
|
+ return 4;
|
||||||
|
+
|
||||||
|
+ case cond_branch_not_taken:
|
||||||
|
+ return 2;
|
||||||
|
|
||||||
|
case vec_construct:
|
||||||
|
elements = TYPE_VECTOR_SUBPARTS (vectype);
|
||||||
|
- return elements / 2 + 1;
|
||||||
|
+ if (ISA_HAS_LASX)
|
||||||
|
+ return elements + 1;
|
||||||
|
+ else
|
||||||
|
+ return elements;
|
||||||
|
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+class loongarch_vector_costs : public vector_costs
|
||||||
|
+{
|
||||||
|
+public:
|
||||||
|
+ using vector_costs::vector_costs;
|
||||||
|
+
|
||||||
|
+ unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
|
||||||
|
+ stmt_vec_info stmt_info, slp_tree, tree vectype,
|
||||||
|
+ int misalign,
|
||||||
|
+ vect_cost_model_location where) override;
|
||||||
|
+ void finish_cost (const vector_costs *) override;
|
||||||
|
+
|
||||||
|
+protected:
|
||||||
|
+ void count_operations (vect_cost_for_stmt, stmt_vec_info,
|
||||||
|
+ vect_cost_model_location, unsigned int);
|
||||||
|
+ unsigned int determine_suggested_unroll_factor (loop_vec_info);
|
||||||
|
+ /* The number of vectorized stmts in loop. */
|
||||||
|
+ unsigned m_stmts = 0;
|
||||||
|
+ /* The number of load and store operations in loop. */
|
||||||
|
+ unsigned m_loads = 0;
|
||||||
|
+ unsigned m_stores = 0;
|
||||||
|
+ /* Reduction factor for suggesting unroll factor. */
|
||||||
|
+ unsigned m_reduc_factor = 0;
|
||||||
|
+ /* True if the loop contains an average operation. */
|
||||||
|
+ bool m_has_avg =false;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
|
||||||
|
+static vector_costs *
|
||||||
|
+loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
|
||||||
|
+{
|
||||||
|
+ return new loongarch_vector_costs (vinfo, costing_for_scalar);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
|
||||||
|
+ stmt_vec_info stmt_info,
|
||||||
|
+ vect_cost_model_location where,
|
||||||
|
+ unsigned int count)
|
||||||
|
+{
|
||||||
|
+ if (!m_costing_for_scalar
|
||||||
|
+ && is_a<loop_vec_info> (m_vinfo)
|
||||||
|
+ && where == vect_body)
|
||||||
|
+ {
|
||||||
|
+ m_stmts += count;
|
||||||
|
+
|
||||||
|
+ if (kind == scalar_load
|
||||||
|
+ || kind == vector_load
|
||||||
|
+ || kind == unaligned_load)
|
||||||
|
+ m_loads += count;
|
||||||
|
+ else if (kind == scalar_store
|
||||||
|
+ || kind == vector_store
|
||||||
|
+ || kind == unaligned_store)
|
||||||
|
+ m_stores += count;
|
||||||
|
+ else if ((kind == scalar_stmt
|
||||||
|
+ || kind == vector_stmt
|
||||||
|
+ || kind == vec_to_scalar)
|
||||||
|
+ && stmt_info && vect_is_reduction (stmt_info))
|
||||||
|
+ {
|
||||||
|
+ tree lhs = gimple_get_lhs (stmt_info->stmt);
|
||||||
|
+ unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
|
||||||
|
+ m_reduc_factor = MAX (base * count, m_reduc_factor);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
|
||||||
|
+{
|
||||||
|
+ class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||||
|
+
|
||||||
|
+ if (m_has_avg)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ /* Don't unroll if it's specified explicitly not to be unrolled. */
|
||||||
|
+ if (loop->unroll == 1
|
||||||
|
+ || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
|
||||||
|
+ || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores;
|
||||||
|
+ /* Don't unroll if no vector instructions excepting for memory access. */
|
||||||
|
+ if (nstmts_nonldst == 0)
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ /* Use this simple hardware resource model that how many non vld/vst
|
||||||
|
+ vector instructions can be issued per cycle. */
|
||||||
|
+ unsigned int issue_info = loongarch_vect_issue_info;
|
||||||
|
+ unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
|
||||||
|
+ unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
|
||||||
|
+ uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf);
|
||||||
|
+
|
||||||
|
+ return 1 << ceil_log2 (uf);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|
||||||
|
+ stmt_vec_info stmt_info, slp_tree,
|
||||||
|
+ tree vectype, int misalign,
|
||||||
|
+ vect_cost_model_location where)
|
||||||
|
+{
|
||||||
|
+ unsigned retval = 0;
|
||||||
|
+
|
||||||
|
+ if (flag_vect_cost_model)
|
||||||
|
+ {
|
||||||
|
+ int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
|
||||||
|
+ misalign);
|
||||||
|
+ retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
|
||||||
|
+ m_costs[where] += retval;
|
||||||
|
+
|
||||||
|
+ count_operations (kind, stmt_info, where, count);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (stmt_info)
|
||||||
|
+ {
|
||||||
|
+ /* Detect the use of an averaging operation. */
|
||||||
|
+ gimple *stmt = stmt_info->stmt;
|
||||||
|
+ if (is_gimple_call (stmt)
|
||||||
|
+ && gimple_call_internal_p (stmt))
|
||||||
|
+ {
|
||||||
|
+ switch (gimple_call_internal_fn (stmt))
|
||||||
|
+ {
|
||||||
|
+ case IFN_AVG_FLOOR:
|
||||||
|
+ case IFN_AVG_CEIL:
|
||||||
|
+ m_has_avg = true;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return retval;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
|
||||||
|
+{
|
||||||
|
+ loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
|
||||||
|
+ if (loop_vinfo)
|
||||||
|
+ {
|
||||||
|
+ m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ vector_costs::finish_cost (scalar_costs);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Implement TARGET_ADDRESS_COST. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
@@ -7261,9 +7407,6 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
|
||||||
|
error ("%qs cannot be used for compiling a shared library",
|
||||||
|
"-mdirect-extern-access");
|
||||||
|
- if (loongarch_vector_access_cost == 0)
|
||||||
|
- loongarch_vector_access_cost = 5;
|
||||||
|
-
|
||||||
|
|
||||||
|
switch (la_target.cmodel)
|
||||||
|
{
|
||||||
|
@@ -11275,6 +11418,8 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
|
||||||
|
#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
|
||||||
|
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
|
||||||
|
loongarch_builtin_vectorization_cost
|
||||||
|
+#undef TARGET_VECTORIZE_CREATE_COSTS
|
||||||
|
+#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs
|
||||||
|
|
||||||
|
|
||||||
|
#undef TARGET_IN_SMALL_DATA_P
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 78f2baf3a..34bd832bd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -159,10 +159,6 @@ mbranch-cost=
|
||||||
|
Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
|
||||||
|
-mbranch-cost=COST Set the cost of branches to roughly COST instructions.
|
||||||
|
|
||||||
|
-mmemvec-cost=
|
||||||
|
-Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
|
||||||
|
-mmemvec-cost=COST Set the cost of vector memory access instructions.
|
||||||
|
-
|
||||||
|
mcheck-zero-division
|
||||||
|
Target Mask(CHECK_ZERO_DIV)
|
||||||
|
Trap on integer divide by zero.
|
||||||
|
@@ -226,3 +222,14 @@ mrelax
|
||||||
|
Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
required to materialize symbol addresses.
|
||||||
|
+
|
||||||
|
+-param=loongarch-vect-unroll-limit=
|
||||||
|
+Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
|
||||||
|
+Used to limit unroll factor which indicates how much the autovectorizer may
|
||||||
|
+unroll a loop. The default value is 6.
|
||||||
|
+
|
||||||
|
+-param=loongarch-vect-issue-info=
|
||||||
|
+Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
|
||||||
|
+Indicate how many non memory access vector instructions can be issued per
|
||||||
|
+cycle, it's used in unroll factor determination for autovectorizer. The
|
||||||
|
+default value is 4.
|
||||||
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||||
|
index 7eed77836..168f3d0db 100644
|
||||||
|
--- a/gcc/doc/invoke.texi
|
||||||
|
+++ b/gcc/doc/invoke.texi
|
||||||
|
@@ -24632,6 +24632,13 @@ environments where no dynamic link is performed, like firmwares, OS
|
||||||
|
kernels, executables linked with @option{-static} or @option{-static-pie}.
|
||||||
|
@option{-mdirect-extern-access} is not compatible with @option{-fPIC} or
|
||||||
|
@option{-fpic}.
|
||||||
|
+
|
||||||
|
+@item loongarch-vect-unroll-limit
|
||||||
|
+The vectorizer will use available tuning information to determine whether it
|
||||||
|
+would be beneficial to unroll the main vectorized loop and by how much. This
|
||||||
|
+parameter set's the upper bound of how much the vectorizer will unroll the main
|
||||||
|
+loop. The default value is six.
|
||||||
|
+
|
||||||
|
@end table
|
||||||
|
|
||||||
|
@node M32C Options
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
34
0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
Normal file
34
0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Mon, 23 Oct 2023 09:07:32 +0800
|
||||||
|
Subject: [PATCH 014/188] LoongArch: Define macro CLEAR_INSN_CACHE.
|
||||||
|
|
||||||
|
LoongArch's microstructure ensures cache consistency by hardware.
|
||||||
|
Due to out-of-order execution, "ibar" is required to ensure the visibility of the
|
||||||
|
store (invalidated icache) executed by this CPU before "ibar" (to the instance).
|
||||||
|
"ibar" will not invalidate the icache, so the start and end parameters are not Affect
|
||||||
|
"ibar" performance.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.h | 5 +++++
|
||||||
|
1 file changed, 5 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index 572b538be..f7ddfc452 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function
|
||||||
|
(TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0)
|
||||||
|
|
||||||
|
#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)
|
||||||
|
+
|
||||||
|
+/* LoongArch maintains ICache/DCache coherency by hardware,
|
||||||
|
+ we just need "ibar" to avoid instruction hazard here. */
|
||||||
|
+#undef CLEAR_INSN_CACHE
|
||||||
|
+#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
233
0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
Normal file
233
0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 23 Oct 2023 15:23:11 +0800
|
||||||
|
Subject: [PATCH 015/188] LoongArch: Add enum-style -mexplicit-relocs= option
|
||||||
|
|
||||||
|
To take a better balance between scheduling and relaxation when -flto is
|
||||||
|
enabled, add three-way -mexplicit-relocs={auto,none,always} options.
|
||||||
|
The old -mexplicit-relocs and -mno-explicit-relocs options are still
|
||||||
|
supported, they are mapped to -mexplicit-relocs=always and
|
||||||
|
-mexplicit-relocs=none.
|
||||||
|
|
||||||
|
The default choice is determined by probing assembler capabilities at
|
||||||
|
build time. If the assembler does not supports explicit relocs at all,
|
||||||
|
the default will be none; if it supports explicit relocs but not
|
||||||
|
relaxation, the default will be always; if both explicit relocs and
|
||||||
|
relaxation are supported, the default will be auto.
|
||||||
|
|
||||||
|
Currently auto is same as none. We will make auto more clever in
|
||||||
|
following changes.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/loongarch-strings: Add strings for
|
||||||
|
-mexplicit-relocs={auto,none,always}.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in: Add options for
|
||||||
|
-mexplicit-relocs={auto,none,always}.
|
||||||
|
* config/loongarch/loongarch-str.h: Regenerate.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* config/loongarch/loongarch-def.h
|
||||||
|
(EXPLICIT_RELOCS_AUTO): Define.
|
||||||
|
(EXPLICIT_RELOCS_NONE): Define.
|
||||||
|
(EXPLICIT_RELOCS_ALWAYS): Define.
|
||||||
|
(N_EXPLICIT_RELOCS_TYPES): Define.
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_option_override_internal): Error out if the old-style
|
||||||
|
-m[no-]explicit-relocs option is used with
|
||||||
|
-mexplicit-relocs={auto,none,always} together. Map
|
||||||
|
-mno-explicit-relocs to -mexplicit-relocs=none and
|
||||||
|
-mexplicit-relocs to -mexplicit-relocs=always for backward
|
||||||
|
compatibility. Set a proper default for -mexplicit-relocs=
|
||||||
|
based on configure-time probed linker capability. Update a
|
||||||
|
diagnostic message to mention -mexplicit-relocs=always instead
|
||||||
|
of the old-style -mexplicit-relocs.
|
||||||
|
(loongarch_handle_model_attribute): Update a diagnostic message
|
||||||
|
to mention -mexplicit-relocs=always instead of the old-style
|
||||||
|
-mexplicit-relocs.
|
||||||
|
* config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define.
|
||||||
|
---
|
||||||
|
.../loongarch/genopts/loongarch-strings | 6 +++++
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++--
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 6 +++++
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 5 ++++
|
||||||
|
gcc/config/loongarch/loongarch.cc | 24 +++++++++++++++++--
|
||||||
|
gcc/config/loongarch/loongarch.h | 3 +++
|
||||||
|
gcc/config/loongarch/loongarch.opt | 21 ++++++++++++++--
|
||||||
|
7 files changed, 80 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
index eb5086fe3..6c8a42af2 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
@@ -65,3 +65,9 @@ STR_CMODEL_TS tiny-static
|
||||||
|
STR_CMODEL_MEDIUM medium
|
||||||
|
STR_CMODEL_LARGE large
|
||||||
|
STR_CMODEL_EXTREME extreme
|
||||||
|
+
|
||||||
|
+# -mexplicit-relocs
|
||||||
|
+OPTSTR_EXPLICIT_RELOCS explicit-relocs
|
||||||
|
+STR_EXPLICIT_RELOCS_AUTO auto
|
||||||
|
+STR_EXPLICIT_RELOCS_NONE none
|
||||||
|
+STR_EXPLICIT_RELOCS_ALWAYS always
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index 74cf4a7f7..e7df1964a 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -176,10 +176,27 @@ mmax-inline-memcpy-size=
|
||||||
|
Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
|
||||||
|
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
|
||||||
|
|
||||||
|
-mexplicit-relocs
|
||||||
|
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Enum
|
||||||
|
+Name(explicit_relocs) Type(int)
|
||||||
|
+The code model option names for -mexplicit-relocs:
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO)
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE)
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
+
|
||||||
|
+mexplicit-relocs=
|
||||||
|
+Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
|
||||||
|
Use %reloc() assembly operators.
|
||||||
|
|
||||||
|
+mexplicit-relocs
|
||||||
|
+Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
|
||||||
|
+Use %reloc() assembly operators (for backward compatibility).
|
||||||
|
+
|
||||||
|
; The code model option names for -mcmodel.
|
||||||
|
Enum
|
||||||
|
Name(cmodel) Type(int)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index eb8e53b20..4757de14b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings[];
|
||||||
|
#define CMODEL_EXTREME 5
|
||||||
|
#define N_CMODEL_TYPES 6
|
||||||
|
|
||||||
|
+/* enum explicit_relocs */
|
||||||
|
+#define EXPLICIT_RELOCS_AUTO 0
|
||||||
|
+#define EXPLICIT_RELOCS_NONE 1
|
||||||
|
+#define EXPLICIT_RELOCS_ALWAYS 2
|
||||||
|
+#define N_EXPLICIT_RELOCS_TYPES 3
|
||||||
|
+
|
||||||
|
/* The common default value for variables whose assignments
|
||||||
|
are triggered by command-line options. */
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index ecfebf9db..037e9e583 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_CMODEL_LARGE "large"
|
||||||
|
#define STR_CMODEL_EXTREME "extreme"
|
||||||
|
|
||||||
|
+#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs"
|
||||||
|
+#define STR_EXPLICIT_RELOCS_AUTO "auto"
|
||||||
|
+#define STR_EXPLICIT_RELOCS_NONE "none"
|
||||||
|
+#define STR_EXPLICIT_RELOCS_ALWAYS "always"
|
||||||
|
+
|
||||||
|
#endif /* LOONGARCH_STR_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index e22a64600..3258c8655 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
|
||||||
|
loongarch_cpu_option_override (&la_target, opts, opts_set);
|
||||||
|
|
||||||
|
+ if (la_opt_explicit_relocs != M_OPT_UNSET
|
||||||
|
+ && la_opt_explicit_relocs_backward != M_OPT_UNSET)
|
||||||
|
+ error ("do not use %qs (with %qs) and %qs (without %qs) together",
|
||||||
|
+ "-mexplicit-relocs=", "=",
|
||||||
|
+ la_opt_explicit_relocs_backward ? "-mexplicit-relocs"
|
||||||
|
+ : "-mno-explicit-relocs", "=");
|
||||||
|
+
|
||||||
|
+ if (la_opt_explicit_relocs_backward != M_OPT_UNSET)
|
||||||
|
+ la_opt_explicit_relocs = (la_opt_explicit_relocs_backward
|
||||||
|
+ ? EXPLICIT_RELOCS_ALWAYS
|
||||||
|
+ : EXPLICIT_RELOCS_NONE);
|
||||||
|
+
|
||||||
|
+ if (la_opt_explicit_relocs == M_OPT_UNSET)
|
||||||
|
+ la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
|
||||||
|
+ ? (HAVE_AS_MRELAX_OPTION
|
||||||
|
+ ? EXPLICIT_RELOCS_AUTO
|
||||||
|
+ : EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
+ : EXPLICIT_RELOCS_NONE);
|
||||||
|
+
|
||||||
|
if (TARGET_ABI_LP64)
|
||||||
|
flag_pcc_struct_return = 0;
|
||||||
|
|
||||||
|
@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
case CMODEL_EXTREME:
|
||||||
|
if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
error ("code model %qs needs %s",
|
||||||
|
- "extreme", "-mexplicit-relocs");
|
||||||
|
+ "extreme", "-mexplicit-relocs=always");
|
||||||
|
|
||||||
|
if (opts->x_flag_plt)
|
||||||
|
{
|
||||||
|
@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
|
||||||
|
if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
{
|
||||||
|
error_at (DECL_SOURCE_LOCATION (decl),
|
||||||
|
- "%qE attribute requires %s", name, "-mexplicit-relocs");
|
||||||
|
+ "%qE attribute requires %s", name,
|
||||||
|
+ "-mexplicit-relocs=always");
|
||||||
|
*no_add_attrs = true;
|
||||||
|
return NULL_TREE;
|
||||||
|
}
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index f7ddfc452..6e8ac293a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function
|
||||||
|
we just need "ibar" to avoid instruction hazard here. */
|
||||||
|
#undef CLEAR_INSN_CACHE
|
||||||
|
#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
|
||||||
|
+
|
||||||
|
+#define TARGET_EXPLICIT_RELOCS \
|
||||||
|
+ (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 34bd832bd..44376fd77 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -183,10 +183,27 @@ mmax-inline-memcpy-size=
|
||||||
|
Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
|
||||||
|
-mmax-inline-memcpy-size=SIZE Set the max size of memcpy to inline, default is 1024.
|
||||||
|
|
||||||
|
-mexplicit-relocs
|
||||||
|
-Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Enum
|
||||||
|
+Name(explicit_relocs) Type(int)
|
||||||
|
+The code model option names for -mexplicit-relocs:
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO)
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE)
|
||||||
|
+
|
||||||
|
+EnumValue
|
||||||
|
+Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
+
|
||||||
|
+mexplicit-relocs=
|
||||||
|
+Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
|
||||||
|
Use %reloc() assembly operators.
|
||||||
|
|
||||||
|
+mexplicit-relocs
|
||||||
|
+Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
|
||||||
|
+Use %reloc() assembly operators (for backward compatibility).
|
||||||
|
+
|
||||||
|
; The code model option names for -mcmodel.
|
||||||
|
Enum
|
||||||
|
Name(cmodel) Type(int)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
212
0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
Normal file
212
0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 30 Sep 2023 18:46:28 +0800
|
||||||
|
Subject: [PATCH 016/188] LoongArch: Use explicit relocs for GOT access when
|
||||||
|
-mexplicit-relocs=auto and LTO during a final link with linker plugin
|
||||||
|
|
||||||
|
If we are performing LTO for a final link and linker plugin is enabled,
|
||||||
|
then we are sure any GOT access may resolve to a symbol out of the link
|
||||||
|
unit (otherwise the linker plugin will tell us the symbol should be
|
||||||
|
resolved locally and we'll use PC-relative access instead).
|
||||||
|
|
||||||
|
Produce machine instructions with explicit relocs instead of la.global
|
||||||
|
for better scheduling.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-protos.h
|
||||||
|
(loongarch_explicit_relocs_p): Declare new function.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
|
||||||
|
Implement.
|
||||||
|
(loongarch_symbol_insns): Call loongarch_explicit_relocs_p for
|
||||||
|
SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS.
|
||||||
|
(loongarch_split_symbol): Call loongarch_explicit_relocs_p for
|
||||||
|
deciding if return early, instead of using
|
||||||
|
TARGET_EXPLICIT_RELOCS.
|
||||||
|
(loongarch_output_move): CAll loongarch_explicit_relocs_p
|
||||||
|
instead of using TARGET_EXPLICIT_RELOCS.
|
||||||
|
* config/loongarch/loongarch.md (*low<mode>): Remove
|
||||||
|
TARGET_EXPLICIT_RELOCS from insn condition.
|
||||||
|
(@ld_from_got<mode>): Likewise.
|
||||||
|
* config/loongarch/predicates.md (move_operand): Call
|
||||||
|
loongarch_explicit_relocs_p instead of using
|
||||||
|
TARGET_EXPLICIT_RELOCS.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-lto.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-protos.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 34 +++++++++++++++----
|
||||||
|
gcc/config/loongarch/loongarch.md | 4 +--
|
||||||
|
gcc/config/loongarch/predicates.md | 8 ++---
|
||||||
|
.../loongarch/explicit-relocs-auto-lto.c | 26 ++++++++++++++
|
||||||
|
5 files changed, 59 insertions(+), 14 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
index 163162598..51d38177b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int);
|
||||||
|
extern tree loongarch_build_builtin_va_list (void);
|
||||||
|
|
||||||
|
extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool);
|
||||||
|
+extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
|
||||||
|
#endif /* ! GCC_LOONGARCH_PROTOS_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 3258c8655..1d20577e7 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* If -mexplicit-relocs=auto, we use machine operations with reloc hints
|
||||||
|
+ for cases where the linker is unable to relax so we can schedule the
|
||||||
|
+ machine operations, otherwise use an assembler pseudo-op so the
|
||||||
|
+ assembler will generate R_LARCH_RELAX. */
|
||||||
|
+
|
||||||
|
+bool
|
||||||
|
+loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
|
||||||
|
+{
|
||||||
|
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
|
||||||
|
+ return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
|
||||||
|
+
|
||||||
|
+ /* If we are performing LTO for a final link, and we have the linker
|
||||||
|
+ plugin so we know the resolution of the symbols, then all GOT
|
||||||
|
+ references are binding to external symbols or preemptable symbols.
|
||||||
|
+ So the linker cannot relax them. */
|
||||||
|
+ return (in_lto_p
|
||||||
|
+ && !flag_incremental_link
|
||||||
|
+ && HAVE_LTO_PLUGIN == 2
|
||||||
|
+ && (!global_options_set.x_flag_use_linker_plugin
|
||||||
|
+ || global_options.x_flag_use_linker_plugin)
|
||||||
|
+ && type == SYMBOL_GOT_DISP);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Returns the number of instructions necessary to reference a symbol. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
|
||||||
|
case SYMBOL_GOT_DISP:
|
||||||
|
/* The constant will have to be loaded from the GOT before it
|
||||||
|
is used in an address. */
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE)
|
||||||
|
+ if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 3;
|
||||||
|
@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
|
||||||
|
If so, and if LOW_OUT is nonnull, emit the high part and store the
|
||||||
|
low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise.
|
||||||
|
|
||||||
|
- Return false if build with '-mno-explicit-relocs'.
|
||||||
|
+ Return false if build with '-mexplicit-relocs=none'.
|
||||||
|
|
||||||
|
TEMP is as for loongarch_force_temporary and is used to load the high
|
||||||
|
part into a register.
|
||||||
|
@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||||
|
{
|
||||||
|
enum loongarch_symbol_type symbol_type;
|
||||||
|
|
||||||
|
- /* If build with '-mno-explicit-relocs', don't split symbol. */
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
|
||||||
|
|| !loongarch_symbolic_constant_p (addr, &symbol_type)
|
||||||
|
+ || !loongarch_explicit_relocs_p (symbol_type)
|
||||||
|
|| loongarch_symbol_insns (symbol_type, mode) == 0
|
||||||
|
|| !loongarch_split_symbol_type (symbol_type))
|
||||||
|
return false;
|
||||||
|
@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS
|
||||||
|
+ if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src))
|
||||||
|
&& dest_code == REG && symbolic_operand (src, VOIDmode))
|
||||||
|
{
|
||||||
|
if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 29ac950bf..81c97393b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2247,7 +2247,7 @@
|
||||||
|
[(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
(lo_sum:P (match_operand:P 1 "register_operand" " r")
|
||||||
|
(match_operand:P 2 "symbolic_operand" "")))]
|
||||||
|
- "TARGET_EXPLICIT_RELOCS"
|
||||||
|
+ ""
|
||||||
|
"addi.<d>\t%0,%1,%L2"
|
||||||
|
[(set_attr "type" "arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
@@ -2275,7 +2275,7 @@
|
||||||
|
(match_operand:P 1 "register_operand" "r")
|
||||||
|
(match_operand:P 2 "symbolic_operand")))]
|
||||||
|
UNSPEC_LOAD_FROM_GOT))]
|
||||||
|
- "TARGET_EXPLICIT_RELOCS"
|
||||||
|
+ ""
|
||||||
|
"ld.<d>\t%0,%1,%L2"
|
||||||
|
[(set_attr "type" "move")]
|
||||||
|
)
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index ad6cee5c4..6b50b3a4d 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -541,16 +541,14 @@
|
||||||
|
case SYMBOL_REF:
|
||||||
|
case LABEL_REF:
|
||||||
|
return (loongarch_symbolic_constant_p (op, &symbol_type)
|
||||||
|
- && (!TARGET_EXPLICIT_RELOCS
|
||||||
|
+ && (!loongarch_explicit_relocs_p (symbol_type)
|
||||||
|
|| !loongarch_split_symbol_type (symbol_type)));
|
||||||
|
|
||||||
|
case HIGH:
|
||||||
|
- /* '-mno-explicit-relocs' don't generate high/low pairs. */
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
op = XEXP (op, 0);
|
||||||
|
+
|
||||||
|
return (loongarch_symbolic_constant_p (op, &symbol_type)
|
||||||
|
+ && loongarch_explicit_relocs_p (symbol_type)
|
||||||
|
&& loongarch_split_symbol_type (symbol_type));
|
||||||
|
|
||||||
|
default:
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..f53b54689
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* { dg-do link } */
|
||||||
|
+/* { dg-require-effective-target lto } */
|
||||||
|
+/* { dg-require-linker-plugin "" } */
|
||||||
|
+/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */
|
||||||
|
+
|
||||||
|
+int pcrel __attribute__ ((visibility ("hidden")));
|
||||||
|
+int got __attribute__ ((visibility ("default")));
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+*addr_pcrel (void)
|
||||||
|
+{
|
||||||
|
+ return &pcrel;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+*addr_got (void)
|
||||||
|
+{
|
||||||
|
+ return &got;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* With linker plugin we should use la.local (it can be relaxed to pcaddi),
|
||||||
|
+ but not la.global (we are pretty sure the linker cannot relax la.global
|
||||||
|
+ got). */
|
||||||
|
+/* { dg-final { scan-lto-assembler "la.local.*pcrel" } } */
|
||||||
|
+/* { dg-final { scan-lto-assembler "pcalau12i.*%got_pc_hi20\\\(got\\\)" } } */
|
||||||
|
+/* { dg-final { scan-lto-assembler "ld.*%got_pc_lo12\\\(got\\\)" } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
146
0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
Normal file
146
0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
Normal file
@ -0,0 +1,146 @@
|
|||||||
|
From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 2 Oct 2023 13:00:18 +0800
|
||||||
|
Subject: [PATCH 017/188] LoongArch: Use explicit relocs for TLS access with
|
||||||
|
-mexplicit-relocs=auto
|
||||||
|
|
||||||
|
The linker does not know how to relax TLS access for LoongArch, so let's
|
||||||
|
emit machine instructions with explicit relocs for TLS.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
|
||||||
|
Return true for TLS symbol types if -mexplicit-relocs=auto.
|
||||||
|
(loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS
|
||||||
|
with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE.
|
||||||
|
(loongarch_legitimize_tls_address): Likewise.
|
||||||
|
* config/loongarch/loongarch.md (@tls_low<mode>): Remove
|
||||||
|
TARGET_EXPLICIT_RELOCS from insn condition.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New
|
||||||
|
test.
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New
|
||||||
|
test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 37 ++++++++++++-------
|
||||||
|
gcc/config/loongarch/loongarch.md | 2 +-
|
||||||
|
.../explicit-relocs-auto-tls-ld-gd.c | 9 +++++
|
||||||
|
.../explicit-relocs-auto-tls-le-ie.c | 6 +++
|
||||||
|
4 files changed, 40 insertions(+), 14 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 1d20577e7..fa5c14be6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
|
||||||
|
if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
|
||||||
|
return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
|
||||||
|
|
||||||
|
- /* If we are performing LTO for a final link, and we have the linker
|
||||||
|
- plugin so we know the resolution of the symbols, then all GOT
|
||||||
|
- references are binding to external symbols or preemptable symbols.
|
||||||
|
- So the linker cannot relax them. */
|
||||||
|
- return (in_lto_p
|
||||||
|
- && !flag_incremental_link
|
||||||
|
- && HAVE_LTO_PLUGIN == 2
|
||||||
|
- && (!global_options_set.x_flag_use_linker_plugin
|
||||||
|
- || global_options.x_flag_use_linker_plugin)
|
||||||
|
- && type == SYMBOL_GOT_DISP);
|
||||||
|
+ switch (type)
|
||||||
|
+ {
|
||||||
|
+ case SYMBOL_TLS_IE:
|
||||||
|
+ case SYMBOL_TLS_LE:
|
||||||
|
+ case SYMBOL_TLSGD:
|
||||||
|
+ case SYMBOL_TLSLDM:
|
||||||
|
+ /* The linker don't know how to relax TLS accesses. */
|
||||||
|
+ return true;
|
||||||
|
+ case SYMBOL_GOT_DISP:
|
||||||
|
+ /* If we are performing LTO for a final link, and we have the
|
||||||
|
+ linker plugin so we know the resolution of the symbols, then
|
||||||
|
+ all GOT references are binding to external symbols or
|
||||||
|
+ preemptable symbols. So the linker cannot relax them. */
|
||||||
|
+ return (in_lto_p
|
||||||
|
+ && !flag_incremental_link
|
||||||
|
+ && HAVE_LTO_PLUGIN == 2
|
||||||
|
+ && (!global_options_set.x_flag_use_linker_plugin
|
||||||
|
+ || global_options.x_flag_use_linker_plugin));
|
||||||
|
+ default:
|
||||||
|
+ return false;
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Returns the number of instructions necessary to reference a symbol. */
|
||||||
|
@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||||
|
|
||||||
|
start_sequence ();
|
||||||
|
|
||||||
|
- if (TARGET_EXPLICIT_RELOCS)
|
||||||
|
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||||||
|
{
|
||||||
|
/* Split tls symbol to high and low. */
|
||||||
|
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
|
||||||
|
@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||||
|
tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
|
||||||
|
tmp1 = gen_reg_rtx (Pmode);
|
||||||
|
dest = gen_reg_rtx (Pmode);
|
||||||
|
- if (TARGET_EXPLICIT_RELOCS)
|
||||||
|
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||||||
|
{
|
||||||
|
tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
|
||||||
|
tmp3 = gen_reg_rtx (Pmode);
|
||||||
|
@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||||
|
tmp1 = gen_reg_rtx (Pmode);
|
||||||
|
dest = gen_reg_rtx (Pmode);
|
||||||
|
|
||||||
|
- if (TARGET_EXPLICIT_RELOCS)
|
||||||
|
+ if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||||||
|
{
|
||||||
|
tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
|
||||||
|
tmp3 = gen_reg_rtx (Pmode);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 81c97393b..3b836d535 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2257,7 +2257,7 @@
|
||||||
|
(unspec:P [(mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r")
|
||||||
|
(match_operand:P 2 "symbolic_operand" "")))]
|
||||||
|
UNSPEC_TLS_LOW))]
|
||||||
|
- "TARGET_EXPLICIT_RELOCS"
|
||||||
|
+ ""
|
||||||
|
"addi.<d>\t%0,%1,%L2"
|
||||||
|
[(set_attr "type" "arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..957ff98df
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
|
||||||
|
@@ -0,0 +1,9 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */
|
||||||
|
+
|
||||||
|
+__thread int a __attribute__((visibility("hidden")));
|
||||||
|
+extern __thread int b __attribute__((visibility("default")));
|
||||||
|
+
|
||||||
|
+int test() { return a + b; }
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..78898cfc6
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
|
||||||
|
@@ -0,0 +1,6 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mexplicit-relocs=auto" } */
|
||||||
|
+
|
||||||
|
+#include "explicit-relocs-auto-tls-ld-gd.c"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
245
0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
Normal file
245
0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
Normal file
@ -0,0 +1,245 @@
|
|||||||
|
From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 1 Oct 2023 11:14:29 +0800
|
||||||
|
Subject: [PATCH 018/188] LoongArch: Use explicit relocs for addresses only
|
||||||
|
used for one load or store with -mexplicit-relocs=auto and
|
||||||
|
-mcmodel={normal,medium}
|
||||||
|
|
||||||
|
In these cases, if we use explicit relocs, we end up with 2
|
||||||
|
instructions:
|
||||||
|
|
||||||
|
pcalau12i t0, %pc_hi20(x)
|
||||||
|
ld.d t0, t0, %pc_lo12(x)
|
||||||
|
|
||||||
|
If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB
|
||||||
|
range) we still have 2 instructions:
|
||||||
|
|
||||||
|
pcaddi t0, %pcrel_20(x)
|
||||||
|
ld.d t0, t0, 0
|
||||||
|
|
||||||
|
If x is out of the range we'll have 3 instructions. So for these cases
|
||||||
|
just emit machine instructions with explicit relocs.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/predicates.md (symbolic_pcrel_operand): New
|
||||||
|
predicate.
|
||||||
|
* config/loongarch/loongarch.md (define_peephole2): Optimize
|
||||||
|
la.local + ld/st to pcalau12i + ld/st if the address is only used
|
||||||
|
once if -mexplicit-relocs=auto and -mcmodel=normal or medium.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-single-load-store.c:
|
||||||
|
New test.
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c:
|
||||||
|
New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 122 ++++++++++++++++++
|
||||||
|
gcc/config/loongarch/predicates.md | 7 +
|
||||||
|
...-relocs-auto-single-load-store-no-anchor.c | 6 +
|
||||||
|
.../explicit-relocs-auto-single-load-store.c | 14 ++
|
||||||
|
4 files changed, 149 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 3b836d535..c4c6baa60 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -65,6 +65,7 @@
|
||||||
|
|
||||||
|
UNSPEC_LOAD_FROM_GOT
|
||||||
|
UNSPEC_PCALAU12I
|
||||||
|
+ UNSPEC_PCALAU12I_GR
|
||||||
|
UNSPEC_ORI_L_LO12
|
||||||
|
UNSPEC_LUI_L_HI20
|
||||||
|
UNSPEC_LUI_H_LO20
|
||||||
|
@@ -2297,6 +2298,16 @@
|
||||||
|
"pcalau12i\t%0,%%pc_hi20(%1)"
|
||||||
|
[(set_attr "type" "move")])
|
||||||
|
|
||||||
|
+;; @pcalau12i may be used for sibcall so it has a strict constraint. This
|
||||||
|
+;; allows any general register as the operand.
|
||||||
|
+(define_insn "@pcalau12i_gr<mode>"
|
||||||
|
+ [(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
+ (unspec:P [(match_operand:P 1 "symbolic_operand" "")]
|
||||||
|
+ UNSPEC_PCALAU12I_GR))]
|
||||||
|
+ ""
|
||||||
|
+ "pcalau12i\t%0,%%pc_hi20(%1)"
|
||||||
|
+ [(set_attr "type" "move")])
|
||||||
|
+
|
||||||
|
(define_insn "@ori_l_lo12<mode>"
|
||||||
|
[(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
(unspec:P [(match_operand:P 1 "register_operand" "r")
|
||||||
|
@@ -3748,6 +3759,117 @@
|
||||||
|
[(set_attr "type" "unknown")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; With normal or medium code models, if the only use of a pc-relative
|
||||||
|
+;; address is for loading or storing a value, then relying on linker
|
||||||
|
+;; relaxation is not better than emitting the machine instruction directly.
|
||||||
|
+;; Even if the la.local pseudo op can be relaxed, we get:
|
||||||
|
+;;
|
||||||
|
+;; pcaddi $t0, %pcrel_20(x)
|
||||||
|
+;; ld.d $t0, $t0, 0
|
||||||
|
+;;
|
||||||
|
+;; There are still two instructions, same as using the machine instructions
|
||||||
|
+;; and explicit relocs:
|
||||||
|
+;;
|
||||||
|
+;; pcalau12i $t0, %pc_hi20(x)
|
||||||
|
+;; ld.d $t0, $t0, %pc_lo12(x)
|
||||||
|
+;;
|
||||||
|
+;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
|
||||||
|
+;; 3 instructions).
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (match_operand:GPR 2 "register_operand")
|
||||||
|
+ (mem:GPR (match_dup 0)))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
+ || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
+ [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (match_operand:GPR 2 "register_operand")
|
||||||
|
+ (mem:GPR (plus (match_dup 0)
|
||||||
|
+ (match_operand 3 "const_int_operand"))))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
+ || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
+ [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
+ {
|
||||||
|
+ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (match_operand:GPR 2 "register_operand")
|
||||||
|
+ (any_extend:GPR (mem:SUBDI (match_dup 0))))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
+ || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
+ [(set (match_dup 2)
|
||||||
|
+ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
|
||||||
|
+ (match_dup 1)))))]
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (match_operand:GPR 2 "register_operand")
|
||||||
|
+ (any_extend:GPR
|
||||||
|
+ (mem:SUBDI (plus (match_dup 0)
|
||||||
|
+ (match_operand 3 "const_int_operand")))))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
+ || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
+ [(set (match_dup 2)
|
||||||
|
+ (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
|
||||||
|
+ (match_dup 1)))))]
|
||||||
|
+ {
|
||||||
|
+ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (mem:QHWD (match_dup 0))
|
||||||
|
+ (match_operand:QHWD 2 "register_operand"))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
+ && REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
+ [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
+(define_peephole2
|
||||||
|
+ [(set (match_operand:P 0 "register_operand")
|
||||||
|
+ (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
+ (set (mem:QHWD (plus (match_dup 0)
|
||||||
|
+ (match_operand 3 "const_int_operand")))
|
||||||
|
+ (match_operand:QHWD 2 "register_operand"))]
|
||||||
|
+ "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
+ && (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
+ && REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
+ [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+ {
|
||||||
|
+ operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
+ emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ })
|
||||||
|
+
|
||||||
|
;; Synchronization instructions.
|
||||||
|
|
||||||
|
(include "sync.md")
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 6b50b3a4d..1d669f560 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -563,6 +563,13 @@
|
||||||
|
return loongarch_symbolic_constant_p (op, &type);
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_predicate "symbolic_pcrel_operand"
|
||||||
|
+ (match_code "const,symbol_ref,label_ref")
|
||||||
|
+{
|
||||||
|
+ enum loongarch_symbol_type type;
|
||||||
|
+ return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
(define_predicate "equality_operator"
|
||||||
|
(match_code "eq,ne"))
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..fb03403d7
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
|
||||||
|
@@ -0,0 +1,6 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto -fno-section-anchors" } */
|
||||||
|
+
|
||||||
|
+#include "explicit-relocs-auto-single-load-store.c"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "la.local" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..0d53644cd
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */
|
||||||
|
+
|
||||||
|
+long a;
|
||||||
|
+int b;
|
||||||
|
+unsigned int c;
|
||||||
|
+
|
||||||
|
+long load_a() { return a; }
|
||||||
|
+long load_b() { return b; }
|
||||||
|
+long load_c() { return c; }
|
||||||
|
+void store_a(long x) { a = x; }
|
||||||
|
+void store_b(int x) { b = x; }
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "la.local" } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,84 @@
|
|||||||
|
From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Tue, 24 Oct 2023 14:40:14 +0800
|
||||||
|
Subject: [PATCH 019/188] LoongArch: Implement __builtin_thread_pointer for
|
||||||
|
TLS.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (get_thread_pointer<mode>):Adds the
|
||||||
|
instruction template corresponding to the __builtin_thread_pointer
|
||||||
|
function.
|
||||||
|
* doc/extend.texi:Add the __builtin_thread_pointer function support
|
||||||
|
description to the documentation.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/builtin_thread_pointer.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 7 +++++++
|
||||||
|
gcc/doc/extend.texi | 5 +++++
|
||||||
|
.../gcc.target/loongarch/builtin_thread_pointer.c | 10 ++++++++++
|
||||||
|
3 files changed, 22 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index c4c6baa60..80487488d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -113,6 +113,7 @@
|
||||||
|
|
||||||
|
(define_constants
|
||||||
|
[(RETURN_ADDR_REGNUM 1)
|
||||||
|
+ (TP_REGNUM 2)
|
||||||
|
(T0_REGNUM 12)
|
||||||
|
(T1_REGNUM 13)
|
||||||
|
(S0_REGNUM 23)
|
||||||
|
@@ -3647,6 +3648,12 @@
|
||||||
|
[(set_attr "length" "0")
|
||||||
|
(set_attr "type" "ghost")])
|
||||||
|
|
||||||
|
+;; Named pattern for expanding thread pointer reference.
|
||||||
|
+(define_expand "get_thread_pointer<mode>"
|
||||||
|
+ [(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
+ (reg:P TP_REGNUM))]
|
||||||
|
+ "HAVE_AS_TLS"
|
||||||
|
+ {})
|
||||||
|
|
||||||
|
(define_split
|
||||||
|
[(match_operand 0 "small_data_pattern")]
|
||||||
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||||
|
index 1d1bac255..497c6de5f 100644
|
||||||
|
--- a/gcc/doc/extend.texi
|
||||||
|
+++ b/gcc/doc/extend.texi
|
||||||
|
@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}.
|
||||||
|
void __break (imm0_32767)
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
+Returns the value that is currently set in the @samp{tp} register.
|
||||||
|
+@smallexample
|
||||||
|
+ void * __builtin_thread_pointer (void)
|
||||||
|
+@end smallexample
|
||||||
|
+
|
||||||
|
@node MIPS DSP Built-in Functions
|
||||||
|
@subsection MIPS DSP Built-in Functions
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..541e3b143
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
|
||||||
|
@@ -0,0 +1,10 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-require-effective-target tls_native } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */
|
||||||
|
+
|
||||||
|
+void *
|
||||||
|
+get_tp ()
|
||||||
|
+{
|
||||||
|
+ return __builtin_thread_pointer ();
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
189
0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
Normal file
189
0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
Normal file
@ -0,0 +1,189 @@
|
|||||||
|
From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Mon, 23 Oct 2023 10:13:24 +0800
|
||||||
|
Subject: [PATCH 020/188] LoongArch: Fix vfrint-releated comments in
|
||||||
|
lsxintrin.h and lasxintrin.h
|
||||||
|
|
||||||
|
The comment of vfrint-related intrinsic functions does not match the return
|
||||||
|
value type in definition. This patch fixes these comments.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments.
|
||||||
|
(__lasx_xvfrintrne_s): Ditto.
|
||||||
|
(__lasx_xvfrintrne_d): Ditto.
|
||||||
|
(__lasx_xvfrintrz_s): Ditto.
|
||||||
|
(__lasx_xvfrintrz_d): Ditto.
|
||||||
|
(__lasx_xvfrintrp_s): Ditto.
|
||||||
|
(__lasx_xvfrintrp_d): Ditto.
|
||||||
|
(__lasx_xvfrintrm_s): Ditto.
|
||||||
|
(__lasx_xvfrintrm_d): Ditto.
|
||||||
|
* config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto.
|
||||||
|
(__lsx_vfrintrne_s): Ditto.
|
||||||
|
(__lsx_vfrintrne_d): Ditto.
|
||||||
|
(__lsx_vfrintrz_s): Ditto.
|
||||||
|
(__lsx_vfrintrz_d): Ditto.
|
||||||
|
(__lsx_vfrintrp_s): Ditto.
|
||||||
|
(__lsx_vfrintrp_d): Ditto.
|
||||||
|
(__lsx_vfrintrm_s): Ditto.
|
||||||
|
(__lsx_vfrintrm_d): Ditto.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasxintrin.h | 16 ++++++++--------
|
||||||
|
gcc/config/loongarch/lsxintrin.h | 16 ++++++++--------
|
||||||
|
2 files changed, 16 insertions(+), 16 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
|
||||||
|
index d39379927..7bce2c757 100644
|
||||||
|
--- a/gcc/config/loongarch/lasxintrin.h
|
||||||
|
+++ b/gcc/config/loongarch/lasxintrin.h
|
||||||
|
@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V8SI, V8SF. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256 __lasx_xvfrintrne_s (__m256 _1)
|
||||||
|
{
|
||||||
|
@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V4DI, V4DF. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256d __lasx_xvfrintrne_d (__m256d _1)
|
||||||
|
{
|
||||||
|
@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V8SI, V8SF. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256 __lasx_xvfrintrz_s (__m256 _1)
|
||||||
|
{
|
||||||
|
@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V4DI, V4DF. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256d __lasx_xvfrintrz_d (__m256d _1)
|
||||||
|
{
|
||||||
|
@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V8SI, V8SF. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256 __lasx_xvfrintrp_s (__m256 _1)
|
||||||
|
{
|
||||||
|
@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V4DI, V4DF. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256d __lasx_xvfrintrp_d (__m256d _1)
|
||||||
|
{
|
||||||
|
@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V8SI, V8SF. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256 __lasx_xvfrintrm_s (__m256 _1)
|
||||||
|
{
|
||||||
|
@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
-/* Data types in instruction templates: V4DI, V4DF. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m256d __lasx_xvfrintrm_d (__m256d _1)
|
||||||
|
{
|
||||||
|
diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
|
||||||
|
index ec4206990..29553c093 100644
|
||||||
|
--- a/gcc/config/loongarch/lsxintrin.h
|
||||||
|
+++ b/gcc/config/loongarch/lsxintrin.h
|
||||||
|
@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V4SI, V4SF. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128 __lsx_vfrintrne_s (__m128 _1)
|
||||||
|
{
|
||||||
|
@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V2DI, V2DF. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128d __lsx_vfrintrne_d (__m128d _1)
|
||||||
|
{
|
||||||
|
@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V4SI, V4SF. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128 __lsx_vfrintrz_s (__m128 _1)
|
||||||
|
{
|
||||||
|
@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V2DI, V2DF. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128d __lsx_vfrintrz_d (__m128d _1)
|
||||||
|
{
|
||||||
|
@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V4SI, V4SF. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128 __lsx_vfrintrp_s (__m128 _1)
|
||||||
|
{
|
||||||
|
@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V2DI, V2DF. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128d __lsx_vfrintrp_d (__m128d _1)
|
||||||
|
{
|
||||||
|
@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V4SI, V4SF. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128 __lsx_vfrintrm_s (__m128 _1)
|
||||||
|
{
|
||||||
|
@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
-/* Data types in instruction templates: V2DI, V2DF. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
__m128d __lsx_vfrintrm_d (__m128d _1)
|
||||||
|
{
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
418
0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
Normal file
418
0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
Normal file
@ -0,0 +1,418 @@
|
|||||||
|
From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Thu, 26 Oct 2023 09:34:32 +0800
|
||||||
|
Subject: [PATCH 021/188] LoongArch:Enable vcond_mask_mn expanders for SF/DF
|
||||||
|
modes.
|
||||||
|
|
||||||
|
If the vcond_mask patterns don't support fp modes, the vector
|
||||||
|
FP comparison instructions will not be generated.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (vcond_mask_<ILASX:mode><ILASX:mode>): Change to
|
||||||
|
(vcond_mask_<mode><mode256_i>): this.
|
||||||
|
* config/loongarch/lsx.md (vcond_mask_<ILSX:mode><ILSX:mode>): Change to
|
||||||
|
(vcond_mask_<mode><mode_i>): this.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 14 +--
|
||||||
|
gcc/config/loongarch/lsx.md | 14 +--
|
||||||
|
.../loongarch/vector/lasx/lasx-vcond-1.c | 64 ++++++++++++++
|
||||||
|
.../loongarch/vector/lasx/lasx-vcond-2.c | 87 +++++++++++++++++++
|
||||||
|
.../loongarch/vector/lsx/lsx-vcond-1.c | 64 ++++++++++++++
|
||||||
|
.../loongarch/vector/lsx/lsx-vcond-2.c | 87 +++++++++++++++++++
|
||||||
|
6 files changed, 316 insertions(+), 14 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 442fda246..f0f2dd08d 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -906,15 +906,15 @@
|
||||||
|
})
|
||||||
|
|
||||||
|
;; Same as vcond_
|
||||||
|
-(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>"
|
||||||
|
- [(match_operand:ILASX 0 "register_operand")
|
||||||
|
- (match_operand:ILASX 1 "reg_or_m1_operand")
|
||||||
|
- (match_operand:ILASX 2 "reg_or_0_operand")
|
||||||
|
- (match_operand:ILASX 3 "register_operand")]
|
||||||
|
+(define_expand "vcond_mask_<mode><mode256_i>"
|
||||||
|
+ [(match_operand:LASX 0 "register_operand")
|
||||||
|
+ (match_operand:LASX 1 "reg_or_m1_operand")
|
||||||
|
+ (match_operand:LASX 2 "reg_or_0_operand")
|
||||||
|
+ (match_operand:<VIMODE256> 3 "register_operand")]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
{
|
||||||
|
- loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode,
|
||||||
|
- <ILASX:VIMODE256>mode, operands);
|
||||||
|
+ loongarch_expand_vec_cond_mask_expr (<MODE>mode,
|
||||||
|
+ <VIMODE256>mode, operands);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index b4e92ae9c..4af32c8df 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -644,15 +644,15 @@
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_expand "vcond_mask_<ILSX:mode><ILSX:mode>"
|
||||||
|
- [(match_operand:ILSX 0 "register_operand")
|
||||||
|
- (match_operand:ILSX 1 "reg_or_m1_operand")
|
||||||
|
- (match_operand:ILSX 2 "reg_or_0_operand")
|
||||||
|
- (match_operand:ILSX 3 "register_operand")]
|
||||||
|
+(define_expand "vcond_mask_<mode><mode_i>"
|
||||||
|
+ [(match_operand:LSX 0 "register_operand")
|
||||||
|
+ (match_operand:LSX 1 "reg_or_m1_operand")
|
||||||
|
+ (match_operand:LSX 2 "reg_or_0_operand")
|
||||||
|
+ (match_operand:<VIMODE> 3 "register_operand")]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
{
|
||||||
|
- loongarch_expand_vec_cond_mask_expr (<ILSX:MODE>mode,
|
||||||
|
- <ILSX:VIMODE>mode, operands);
|
||||||
|
+ loongarch_expand_vec_cond_mask_expr (<MODE>mode,
|
||||||
|
+ <VIMODE>mode, operands);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..ee9cb1a1f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
@@ -0,0 +1,64 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */
|
||||||
|
+
|
||||||
|
+#include <stdint-gcc.h>
|
||||||
|
+
|
||||||
|
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \
|
||||||
|
+ void __attribute__ ((noinline, noclone)) \
|
||||||
|
+ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \
|
||||||
|
+ DATA_TYPE *__restrict__ x, \
|
||||||
|
+ DATA_TYPE *__restrict__ y, \
|
||||||
|
+ CMP_TYPE *__restrict__ a, \
|
||||||
|
+ CMP_TYPE *__restrict__ b, \
|
||||||
|
+ int n) \
|
||||||
|
+ { \
|
||||||
|
+ for (int i = 0; i < n; i++) \
|
||||||
|
+ { \
|
||||||
|
+ DATA_TYPE xval = x[i], yval = y[i]; \
|
||||||
|
+ CMP_TYPE aval = a[i], bval = b[i]; \
|
||||||
|
+ r[i] = aval COND bval ? xval : yval; \
|
||||||
|
+ } \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \
|
||||||
|
+ T (int8_t, int8_t, COND, SUFFIX) \
|
||||||
|
+ T (int16_t, int16_t, COND, SUFFIX) \
|
||||||
|
+ T (int32_t, int32_t, COND, SUFFIX) \
|
||||||
|
+ T (int64_t, int64_t, COND, SUFFIX) \
|
||||||
|
+ T (float, int32_t, COND, SUFFIX##_float) \
|
||||||
|
+ T (double, int64_t, COND, SUFFIX##_double)
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \
|
||||||
|
+ T (uint8_t, uint8_t, COND, SUFFIX) \
|
||||||
|
+ T (uint16_t, uint16_t, COND, SUFFIX) \
|
||||||
|
+ T (uint32_t, uint32_t, COND, SUFFIX) \
|
||||||
|
+ T (uint64_t, uint64_t, COND, SUFFIX) \
|
||||||
|
+ T (float, uint32_t, COND, SUFFIX##_float) \
|
||||||
|
+ T (double, uint64_t, COND, SUFFIX##_double)
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \
|
||||||
|
+ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \
|
||||||
|
+ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
|
||||||
|
+
|
||||||
|
+#define TEST_VAR_ALL(T) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, >, _gt) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, <, _lt) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, >=, _ge) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, <=, _le) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, ==, _eq) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, !=, _ne)
|
||||||
|
+
|
||||||
|
+TEST_VAR_ALL (DEF_VCOND_VAR)
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..5f40ed44c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
@@ -0,0 +1,87 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlasx" } */
|
||||||
|
+
|
||||||
|
+#include <stdint-gcc.h>
|
||||||
|
+
|
||||||
|
+#define eq(A, B) ((A) == (B))
|
||||||
|
+#define ne(A, B) ((A) != (B))
|
||||||
|
+#define olt(A, B) ((A) < (B))
|
||||||
|
+#define ole(A, B) ((A) <= (B))
|
||||||
|
+#define oge(A, B) ((A) >= (B))
|
||||||
|
+#define ogt(A, B) ((A) > (B))
|
||||||
|
+#define ordered(A, B) (!__builtin_isunordered (A, B))
|
||||||
|
+#define unordered(A, B) (__builtin_isunordered (A, B))
|
||||||
|
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
|
||||||
|
+#define ult(A, B) (__builtin_isless (A, B))
|
||||||
|
+#define ule(A, B) (__builtin_islessequal (A, B))
|
||||||
|
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
|
||||||
|
+#define ugt(A, B) (__builtin_isgreater (A, B))
|
||||||
|
+#define nueq(A, B) (__builtin_islessgreater (A, B))
|
||||||
|
+#define nult(A, B) (!__builtin_isless (A, B))
|
||||||
|
+#define nule(A, B) (!__builtin_islessequal (A, B))
|
||||||
|
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
|
||||||
|
+#define nugt(A, B) (!__builtin_isgreater (A, B))
|
||||||
|
+
|
||||||
|
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
|
||||||
|
+ void __attribute__ ((noinline, noclone)) \
|
||||||
|
+ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
|
||||||
|
+ TYPE1 *restrict src, \
|
||||||
|
+ TYPE1 fallback, \
|
||||||
|
+ TYPE2 *restrict a, \
|
||||||
|
+ TYPE2 *restrict b, \
|
||||||
|
+ int count) \
|
||||||
|
+ { \
|
||||||
|
+ for (int i = 0; i < count; ++i) \
|
||||||
|
+ {\
|
||||||
|
+ TYPE2 aval = a[i]; \
|
||||||
|
+ TYPE2 bval = b[i]; \
|
||||||
|
+ TYPE1 srcval = src[i]; \
|
||||||
|
+ dest[i] = CMP (aval, bval) ? srcval : fallback; \
|
||||||
|
+ }\
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+#define TEST_CMP(CMP) \
|
||||||
|
+ TEST_LOOP (int32_t, float, CMP) \
|
||||||
|
+ TEST_LOOP (uint32_t, float, CMP) \
|
||||||
|
+ TEST_LOOP (float, float, CMP) \
|
||||||
|
+ TEST_LOOP (int64_t, double, CMP) \
|
||||||
|
+ TEST_LOOP (uint64_t, double, CMP) \
|
||||||
|
+ TEST_LOOP (double, double, CMP)
|
||||||
|
+
|
||||||
|
+TEST_CMP (eq)
|
||||||
|
+TEST_CMP (ne)
|
||||||
|
+TEST_CMP (olt)
|
||||||
|
+TEST_CMP (ole)
|
||||||
|
+TEST_CMP (oge)
|
||||||
|
+TEST_CMP (ogt)
|
||||||
|
+TEST_CMP (ordered)
|
||||||
|
+TEST_CMP (unordered)
|
||||||
|
+TEST_CMP (ueq)
|
||||||
|
+TEST_CMP (ult)
|
||||||
|
+TEST_CMP (ule)
|
||||||
|
+TEST_CMP (uge)
|
||||||
|
+TEST_CMP (ugt)
|
||||||
|
+TEST_CMP (nueq)
|
||||||
|
+TEST_CMP (nult)
|
||||||
|
+TEST_CMP (nule)
|
||||||
|
+TEST_CMP (nuge)
|
||||||
|
+TEST_CMP (nugt)
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..138adccfa
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
@@ -0,0 +1,64 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlsx" } */
|
||||||
|
+
|
||||||
|
+#include <stdint-gcc.h>
|
||||||
|
+
|
||||||
|
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX) \
|
||||||
|
+ void __attribute__ ((noinline, noclone)) \
|
||||||
|
+ vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r, \
|
||||||
|
+ DATA_TYPE *__restrict__ x, \
|
||||||
|
+ DATA_TYPE *__restrict__ y, \
|
||||||
|
+ CMP_TYPE *__restrict__ a, \
|
||||||
|
+ CMP_TYPE *__restrict__ b, \
|
||||||
|
+ int n) \
|
||||||
|
+ { \
|
||||||
|
+ for (int i = 0; i < n; i++) \
|
||||||
|
+ { \
|
||||||
|
+ DATA_TYPE xval = x[i], yval = y[i]; \
|
||||||
|
+ CMP_TYPE aval = a[i], bval = b[i]; \
|
||||||
|
+ r[i] = aval COND bval ? xval : yval; \
|
||||||
|
+ } \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX) \
|
||||||
|
+ T (int8_t, int8_t, COND, SUFFIX) \
|
||||||
|
+ T (int16_t, int16_t, COND, SUFFIX) \
|
||||||
|
+ T (int32_t, int32_t, COND, SUFFIX) \
|
||||||
|
+ T (int64_t, int64_t, COND, SUFFIX) \
|
||||||
|
+ T (float, int32_t, COND, SUFFIX##_float) \
|
||||||
|
+ T (double, int64_t, COND, SUFFIX##_double)
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX) \
|
||||||
|
+ T (uint8_t, uint8_t, COND, SUFFIX) \
|
||||||
|
+ T (uint16_t, uint16_t, COND, SUFFIX) \
|
||||||
|
+ T (uint32_t, uint32_t, COND, SUFFIX) \
|
||||||
|
+ T (uint64_t, uint64_t, COND, SUFFIX) \
|
||||||
|
+ T (float, uint32_t, COND, SUFFIX##_float) \
|
||||||
|
+ T (double, uint64_t, COND, SUFFIX##_double)
|
||||||
|
+
|
||||||
|
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX) \
|
||||||
|
+ TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX) \
|
||||||
|
+ TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
|
||||||
|
+
|
||||||
|
+#define TEST_VAR_ALL(T) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, >, _gt) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, <, _lt) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, >=, _ge) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, <=, _le) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, ==, _eq) \
|
||||||
|
+ TEST_COND_VAR_ALL (T, !=, _ne)
|
||||||
|
+
|
||||||
|
+TEST_VAR_ALL (DEF_VCOND_VAR)
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..e8fe31f8f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
@@ -0,0 +1,87 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -mlsx" } */
|
||||||
|
+
|
||||||
|
+#include <stdint-gcc.h>
|
||||||
|
+
|
||||||
|
+#define eq(A, B) ((A) == (B))
|
||||||
|
+#define ne(A, B) ((A) != (B))
|
||||||
|
+#define olt(A, B) ((A) < (B))
|
||||||
|
+#define ole(A, B) ((A) <= (B))
|
||||||
|
+#define oge(A, B) ((A) >= (B))
|
||||||
|
+#define ogt(A, B) ((A) > (B))
|
||||||
|
+#define ordered(A, B) (!__builtin_isunordered (A, B))
|
||||||
|
+#define unordered(A, B) (__builtin_isunordered (A, B))
|
||||||
|
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
|
||||||
|
+#define ult(A, B) (__builtin_isless (A, B))
|
||||||
|
+#define ule(A, B) (__builtin_islessequal (A, B))
|
||||||
|
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
|
||||||
|
+#define ugt(A, B) (__builtin_isgreater (A, B))
|
||||||
|
+#define nueq(A, B) (__builtin_islessgreater (A, B))
|
||||||
|
+#define nult(A, B) (!__builtin_isless (A, B))
|
||||||
|
+#define nule(A, B) (!__builtin_islessequal (A, B))
|
||||||
|
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
|
||||||
|
+#define nugt(A, B) (!__builtin_isgreater (A, B))
|
||||||
|
+
|
||||||
|
+#define TEST_LOOP(TYPE1, TYPE2, CMP) \
|
||||||
|
+ void __attribute__ ((noinline, noclone)) \
|
||||||
|
+ test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest, \
|
||||||
|
+ TYPE1 *restrict src, \
|
||||||
|
+ TYPE1 fallback, \
|
||||||
|
+ TYPE2 *restrict a, \
|
||||||
|
+ TYPE2 *restrict b, \
|
||||||
|
+ int count) \
|
||||||
|
+ { \
|
||||||
|
+ for (int i = 0; i < count; ++i) \
|
||||||
|
+ {\
|
||||||
|
+ TYPE2 aval = a[i]; \
|
||||||
|
+ TYPE2 bval = b[i]; \
|
||||||
|
+ TYPE1 srcval = src[i]; \
|
||||||
|
+ dest[i] = CMP (aval, bval) ? srcval : fallback; \
|
||||||
|
+ }\
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+#define TEST_CMP(CMP) \
|
||||||
|
+ TEST_LOOP (int32_t, float, CMP) \
|
||||||
|
+ TEST_LOOP (uint32_t, float, CMP) \
|
||||||
|
+ TEST_LOOP (float, float, CMP) \
|
||||||
|
+ TEST_LOOP (int64_t, double, CMP) \
|
||||||
|
+ TEST_LOOP (uint64_t, double, CMP) \
|
||||||
|
+ TEST_LOOP (double, double, CMP)
|
||||||
|
+
|
||||||
|
+TEST_CMP (eq)
|
||||||
|
+TEST_CMP (ne)
|
||||||
|
+TEST_CMP (olt)
|
||||||
|
+TEST_CMP (ole)
|
||||||
|
+TEST_CMP (oge)
|
||||||
|
+TEST_CMP (ogt)
|
||||||
|
+TEST_CMP (ordered)
|
||||||
|
+TEST_CMP (unordered)
|
||||||
|
+TEST_CMP (ueq)
|
||||||
|
+TEST_CMP (ult)
|
||||||
|
+TEST_CMP (ule)
|
||||||
|
+TEST_CMP (uge)
|
||||||
|
+TEST_CMP (ugt)
|
||||||
|
+TEST_CMP (nueq)
|
||||||
|
+TEST_CMP (nult)
|
||||||
|
+TEST_CMP (nule)
|
||||||
|
+TEST_CMP (nuge)
|
||||||
|
+TEST_CMP (nugt)
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,34 @@
|
|||||||
|
From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 30 Oct 2023 19:39:27 +0800
|
||||||
|
Subject: [PATCH 022/188] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined
|
||||||
|
[PR112299]
|
||||||
|
|
||||||
|
Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
|
||||||
|
building a cross compiler if the cross assembler is not installed yet.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112299
|
||||||
|
* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
|
||||||
|
if not defined yet.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 ++++
|
||||||
|
1 file changed, 4 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index f2b59abe6..c4975af00 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define HAVE_AS_MRELAX_OPTION 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef HAVE_AS_TLS
|
||||||
|
+#define HAVE_AS_TLS 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#endif /* LOONGARCH_OPTS_H */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
From bc3ae60454a51b80538b6deba21975d43de23b6a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Fri, 3 Nov 2023 17:01:36 +0800
|
||||||
|
Subject: [PATCH 023/188] LoongArch: Fix instruction name typo in
|
||||||
|
lsx_vreplgr2vr_<lsxfmt_f> template
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lsx.md: Fix instruction name typo in
|
||||||
|
lsx_vreplgr2vr_<lsxfmt_f> template.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lsx.md | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 4af32c8df..55c7d79a0 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -1523,7 +1523,7 @@
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
{
|
||||||
|
if (which_alternative == 1)
|
||||||
|
- return "ldi.<lsxfmt>\t%w0,0";
|
||||||
|
+ return "vldi.<lsxfmt>\t%w0,0";
|
||||||
|
|
||||||
|
if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
|
||||||
|
return "#";
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
116
0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
Normal file
116
0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 12 Nov 2023 00:55:13 +0800
|
||||||
|
Subject: [PATCH 024/188] LoongArch: Use simplify_gen_subreg instead of
|
||||||
|
gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr [PR112476]
|
||||||
|
|
||||||
|
GCC internal says:
|
||||||
|
|
||||||
|
'subreg's of 'subreg's are not supported. Using
|
||||||
|
'simplify_gen_subreg' is the recommended way to avoid this problem.
|
||||||
|
|
||||||
|
Unfortunately loongarch_expand_vec_cond_mask_expr might create nested
|
||||||
|
subreg under certain circumstances, causing an ICE.
|
||||||
|
|
||||||
|
Use simplify_gen_subreg as the internal document suggests.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112476
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg
|
||||||
|
instead of gen_rtx_SUBREG.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112476
|
||||||
|
* gcc.target/loongarch/pr112476-1.c: New test.
|
||||||
|
* gcc.target/loongarch/pr112476-2.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 11 ++++++---
|
||||||
|
.../gcc.target/loongarch/pr112476-1.c | 24 +++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/pr112476-2.c | 5 ++++
|
||||||
|
3 files changed, 37 insertions(+), 3 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index fa5c14be6..65ca1489f 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
if (mode != vimode)
|
||||||
|
{
|
||||||
|
xop1 = gen_reg_rtx (vimode);
|
||||||
|
- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
|
||||||
|
+ emit_move_insn (xop1,
|
||||||
|
+ simplify_gen_subreg (vimode, operands[1],
|
||||||
|
+ mode, 0));
|
||||||
|
}
|
||||||
|
emit_move_insn (src1, xop1);
|
||||||
|
}
|
||||||
|
@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
if (mode != vimode)
|
||||||
|
{
|
||||||
|
xop2 = gen_reg_rtx (vimode);
|
||||||
|
- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
|
||||||
|
+ emit_move_insn (xop2,
|
||||||
|
+ simplify_gen_subreg (vimode, operands[2],
|
||||||
|
+ mode, 0));
|
||||||
|
}
|
||||||
|
emit_move_insn (src2, xop2);
|
||||||
|
}
|
||||||
|
@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
gen_rtx_AND (vimode, mask, src1));
|
||||||
|
/* The result is placed back to a register with the mask. */
|
||||||
|
emit_insn (gen_rtx_SET (mask, bsel));
|
||||||
|
- emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
|
||||||
|
+ emit_move_insn (operands[0], simplify_gen_subreg (mode, mask,
|
||||||
|
+ vimode, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4cf133e7a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
|
||||||
|
@@ -0,0 +1,24 @@
|
||||||
|
+/* PR target/112476: ICE with -mlsx */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */
|
||||||
|
+
|
||||||
|
+int foo, bar;
|
||||||
|
+float baz, res, a;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+apply_adjacent_ternary (float *dst, float *src0)
|
||||||
|
+{
|
||||||
|
+ do
|
||||||
|
+ {
|
||||||
|
+ __builtin_memcpy (&res, &src0, sizeof (res));
|
||||||
|
+ *dst = foo ? baz : res;
|
||||||
|
+ dst++;
|
||||||
|
+ }
|
||||||
|
+ while (dst != src0);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+xx (void)
|
||||||
|
+{
|
||||||
|
+ apply_adjacent_ternary (&a, &a);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..cc0dfbfc9
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
|
||||||
|
@@ -0,0 +1,5 @@
|
||||||
|
+/* PR target/112476: ICE with -mlasx */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */
|
||||||
|
+
|
||||||
|
+#include "pr112476-1.c"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
116
0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
Normal file
116
0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
Normal file
@ -0,0 +1,116 @@
|
|||||||
|
From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 30 Oct 2023 20:24:58 +0800
|
||||||
|
Subject: [PATCH 025/188] LoongArch: Optimize single-used address with
|
||||||
|
-mexplicit-relocs=auto for fld/fst
|
||||||
|
|
||||||
|
fld and fst have same address mode as ld.w and st.w, so the same
|
||||||
|
optimization as r14-4851 should be applied for them too.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode
|
||||||
|
iterator.
|
||||||
|
(ST_ANY): New mode iterator.
|
||||||
|
(define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and
|
||||||
|
ST_ANY instead of QHWD for applicable patterns.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------
|
||||||
|
1 file changed, 24 insertions(+), 14 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 80487488d..ed86c95bd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -400,6 +400,14 @@
|
||||||
|
(DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
|
||||||
|
(TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")])
|
||||||
|
|
||||||
|
+;; A mode for anything with 32 bits or more, and able to be loaded with
|
||||||
|
+;; the same addressing mode as ld.w.
|
||||||
|
+(define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF])
|
||||||
|
+
|
||||||
|
+;; A mode for anything able to be stored with the same addressing mode as
|
||||||
|
+;; st.w.
|
||||||
|
+(define_mode_iterator ST_ANY [QHWD ANYF])
|
||||||
|
+
|
||||||
|
;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
|
||||||
|
;; 32-bit version and "mul.d" in the 64-bit version.
|
||||||
|
(define_mode_attr d [(SI "w") (DI "d")])
|
||||||
|
@@ -3785,13 +3793,14 @@
|
||||||
|
(define_peephole2
|
||||||
|
[(set (match_operand:P 0 "register_operand")
|
||||||
|
(match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:GPR 2 "register_operand")
|
||||||
|
- (mem:GPR (match_dup 0)))]
|
||||||
|
+ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
|
||||||
|
+ (mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
|
||||||
|
"la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
&& (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
&& (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
|| REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
+ [(set (match_dup 2)
|
||||||
|
+ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
{
|
||||||
|
emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
})
|
||||||
|
@@ -3799,14 +3808,15 @@
|
||||||
|
(define_peephole2
|
||||||
|
[(set (match_operand:P 0 "register_operand")
|
||||||
|
(match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:GPR 2 "register_operand")
|
||||||
|
- (mem:GPR (plus (match_dup 0)
|
||||||
|
- (match_operand 3 "const_int_operand"))))]
|
||||||
|
+ (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
|
||||||
|
+ (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
|
||||||
|
+ (match_operand 3 "const_int_operand"))))]
|
||||||
|
"la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
&& (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
&& (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
|| REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
+ [(set (match_dup 2)
|
||||||
|
+ (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
{
|
||||||
|
operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
@@ -3850,13 +3860,13 @@
|
||||||
|
(define_peephole2
|
||||||
|
[(set (match_operand:P 0 "register_operand")
|
||||||
|
(match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (mem:QHWD (match_dup 0))
|
||||||
|
- (match_operand:QHWD 2 "register_operand"))]
|
||||||
|
+ (set (mem:ST_ANY (match_dup 0))
|
||||||
|
+ (match_operand:ST_ANY 2 "register_operand"))]
|
||||||
|
"la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
&& (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
&& (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
&& REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
- [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+ [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
{
|
||||||
|
emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
})
|
||||||
|
@@ -3864,14 +3874,14 @@
|
||||||
|
(define_peephole2
|
||||||
|
[(set (match_operand:P 0 "register_operand")
|
||||||
|
(match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (mem:QHWD (plus (match_dup 0)
|
||||||
|
- (match_operand 3 "const_int_operand")))
|
||||||
|
- (match_operand:QHWD 2 "register_operand"))]
|
||||||
|
+ (set (mem:ST_ANY (plus (match_dup 0)
|
||||||
|
+ (match_operand 3 "const_int_operand")))
|
||||||
|
+ (match_operand:ST_ANY 2 "register_operand"))]
|
||||||
|
"la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
&& (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
&& (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
&& REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
- [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+ [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
{
|
||||||
|
operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
305
0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
Normal file
305
0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 3 Nov 2023 21:19:59 +0800
|
||||||
|
Subject: [PATCH 026/188] LoongArch: Disable relaxation if the assembler don't
|
||||||
|
support conditional branch relaxation [PR112330]
|
||||||
|
|
||||||
|
As the commit message of r14-4674 has indicated, if the assembler does
|
||||||
|
not support conditional branch relaxation, a relocation overflow may
|
||||||
|
happen on conditional branches when relaxation is enabled because the
|
||||||
|
number of NOP instructions inserted by the assembler will be more than
|
||||||
|
the number estimated by GCC.
|
||||||
|
|
||||||
|
To work around this issue, disable relaxation by default if the
|
||||||
|
assembler is detected incapable to perform conditional branch relaxation
|
||||||
|
at GCC build time. We also need to pass -mno-relax to the assembler to
|
||||||
|
really disable relaxation. But, if the assembler does not support
|
||||||
|
-mrelax option at all, we should not pass -mno-relax to the assembler or
|
||||||
|
it will immediately error out. Also handle this with the build time
|
||||||
|
assembler capability probing, and add a pair of options
|
||||||
|
-m[no-]pass-mrelax-to-as to allow using a different assembler from the
|
||||||
|
build-time one.
|
||||||
|
|
||||||
|
With this change, if GCC is built with GAS 2.41, relaxation will be
|
||||||
|
disabled by default. So the default value of -mexplicit-relocs= is also
|
||||||
|
changed to 'always' if -mno-relax is specified or implied by the
|
||||||
|
build-time default, because using assembler macros for symbol addresses
|
||||||
|
produces no benefit when relaxation is disabled.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112330
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in: Add
|
||||||
|
-m[no]-pass-relax-to-as. Change the default of -m[no]-relax to
|
||||||
|
account conditional branch relaxation support status.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
|
||||||
|
the assembler supports conditional branch relaxation.
|
||||||
|
* configure: Regenerate.
|
||||||
|
* config.in: Regenerate. Note that there are some unrelated
|
||||||
|
changes introduced by r14-5424 (which does not contain a
|
||||||
|
config.in regeneration).
|
||||||
|
* config/loongarch/loongarch-opts.h
|
||||||
|
(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
|
||||||
|
* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
|
||||||
|
Define.
|
||||||
|
(ASM_MRELAX_SPEC): Define.
|
||||||
|
(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
|
||||||
|
* config/loongarch/loongarch.cc: Take the setting of
|
||||||
|
-m[no-]relax into account when determining the default of
|
||||||
|
-mexplicit-relocs=.
|
||||||
|
* doc/invoke.texi: Document -m[no-]relax and
|
||||||
|
-m[no-]pass-mrelax-to-as for LoongArch. Update the default
|
||||||
|
value of -mexplicit-relocs=.
|
||||||
|
---
|
||||||
|
gcc/config.in | 35 ++++++++++++++++++-
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 6 +++-
|
||||||
|
gcc/config/loongarch/loongarch-driver.h | 16 ++++++++-
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 +++
|
||||||
|
gcc/config/loongarch/loongarch.cc | 2 +-
|
||||||
|
gcc/config/loongarch/loongarch.opt | 6 +++-
|
||||||
|
gcc/configure | 35 +++++++++++++++++++
|
||||||
|
gcc/configure.ac | 10 ++++++
|
||||||
|
8 files changed, 109 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config.in b/gcc/config.in
|
||||||
|
index 0c55e67e7..04968b53c 100644
|
||||||
|
--- a/gcc/config.in
|
||||||
|
+++ b/gcc/config.in
|
||||||
|
@@ -374,6 +374,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if your assembler supports conditional branch relaxation. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_AS_COND_BRANCH_RELAXATION
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define if your assembler supports the --debug-prefix-map option. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_AS_DEBUG_PREFIX_MAP
|
||||||
|
@@ -798,6 +804,20 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define to 1 if you have the Mac OS X function
|
||||||
|
+ CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
|
||||||
|
+ the CoreFoundation framework. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_CFPREFERENCESCOPYAPPVALUE
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define to 1 if you have the `clearerr_unlocked' function. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_CLEARERR_UNLOCKED
|
||||||
|
@@ -822,6 +842,13 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if the GNU dcgettext() function is already present or preinstalled.
|
||||||
|
+ */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_DCGETTEXT
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define to 1 if we found a declaration for 'abort', otherwise define to 0.
|
||||||
|
*/
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
@@ -1554,6 +1581,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if the GNU gettext() function is already present or preinstalled. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_GETTEXT
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define to 1 if you have the `gettimeofday' function. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_GETTIMEOFDAY
|
||||||
|
@@ -1585,7 +1618,7 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
-/* Define if you have the iconv() function. */
|
||||||
|
+/* Define if you have the iconv() function and it works. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_ICONV
|
||||||
|
#endif
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index e7df1964a..bd3cfaf60 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
|
||||||
|
Avoid using the GOT to access external symbols.
|
||||||
|
|
||||||
|
mrelax
|
||||||
|
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
|
||||||
|
Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
required to materialize symbol addresses.
|
||||||
|
|
||||||
|
+mpass-mrelax-to-as
|
||||||
|
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Pass -mrelax or -mno-relax option to the assembler.
|
||||||
|
+
|
||||||
|
-param=loongarch-vect-unroll-limit=
|
||||||
|
Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
|
||||||
|
Used to limit unroll factor which indicates how much the autovectorizer may
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
index 59fa3263d..c8dba2cc4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-driver.h
|
||||||
|
@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
"%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
|
||||||
|
"%(subtarget_cc1_spec)"
|
||||||
|
|
||||||
|
+#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION
|
||||||
|
+#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}"
|
||||||
|
+#else
|
||||||
|
+#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}"
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if HAVE_AS_MRELAX_OPTION
|
||||||
|
+#define ASM_MRELAX_SPEC \
|
||||||
|
+ "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
|
||||||
|
+#else
|
||||||
|
+#define ASM_MRELAX_SPEC \
|
||||||
|
+ "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#undef ASM_SPEC
|
||||||
|
#define ASM_SPEC \
|
||||||
|
- "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
|
||||||
|
+ "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)"
|
||||||
|
|
||||||
|
|
||||||
|
extern const char*
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index c4975af00..dfbe9dd5c 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define HAVE_AS_MRELAX_OPTION 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef HAVE_AS_COND_BRANCH_RELAXATION
|
||||||
|
+#define HAVE_AS_COND_BRANCH_RELAXATION 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#ifndef HAVE_AS_TLS
|
||||||
|
#define HAVE_AS_TLS 0
|
||||||
|
#endif
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 65ca1489f..6d580ee75 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -7428,7 +7428,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
|
||||||
|
if (la_opt_explicit_relocs == M_OPT_UNSET)
|
||||||
|
la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
|
||||||
|
- ? (HAVE_AS_MRELAX_OPTION
|
||||||
|
+ ? (loongarch_mrelax
|
||||||
|
? EXPLICIT_RELOCS_AUTO
|
||||||
|
: EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
: EXPLICIT_RELOCS_NONE);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 44376fd77..d936954b8 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -236,10 +236,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
|
||||||
|
Avoid using the GOT to access external symbols.
|
||||||
|
|
||||||
|
mrelax
|
||||||
|
-Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
|
||||||
|
Take advantage of linker relaxations to reduce the number of instructions
|
||||||
|
required to materialize symbol addresses.
|
||||||
|
|
||||||
|
+mpass-mrelax-to-as
|
||||||
|
+Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
|
||||||
|
+Pass -mrelax or -mno-relax option to the assembler.
|
||||||
|
+
|
||||||
|
-param=loongarch-vect-unroll-limit=
|
||||||
|
Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
|
||||||
|
Used to limit unroll factor which indicates how much the autovectorizer may
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 430d44dc3..09bacfec3 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -28901,6 +28901,41 @@ if test $gcc_cv_as_loongarch_relax = yes; then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
|
||||||
|
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for conditional branch relaxation support" >&5
|
||||||
|
+$as_echo_n "checking assembler for conditional branch relaxation support... " >&6; }
|
||||||
|
+if ${gcc_cv_as_loongarch_cond_branch_relax+:} false; then :
|
||||||
|
+ $as_echo_n "(cached) " >&6
|
||||||
|
+else
|
||||||
|
+ gcc_cv_as_loongarch_cond_branch_relax=no
|
||||||
|
+ if test x$gcc_cv_as != x; then
|
||||||
|
+ $as_echo 'a:
|
||||||
|
+ .rept 32769
|
||||||
|
+ nop
|
||||||
|
+ .endr
|
||||||
|
+ beq $a0,$a1,a' > conftest.s
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
|
||||||
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
+ (eval $ac_try) 2>&5
|
||||||
|
+ ac_status=$?
|
||||||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
+ test $ac_status = 0; }; }
|
||||||
|
+ then
|
||||||
|
+ gcc_cv_as_loongarch_cond_branch_relax=yes
|
||||||
|
+ else
|
||||||
|
+ echo "configure: failed program was" >&5
|
||||||
|
+ cat conftest.s >&5
|
||||||
|
+ fi
|
||||||
|
+ rm -f conftest.o conftest.s
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_cond_branch_relax" >&5
|
||||||
|
+$as_echo "$gcc_cv_as_loongarch_cond_branch_relax" >&6; }
|
||||||
|
+if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
|
||||||
|
+
|
||||||
|
+$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
|
||||||
|
+
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index 4b24db190..a0999152e 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -5341,6 +5341,16 @@ x:
|
||||||
|
[-mrelax], [.text],,
|
||||||
|
[AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
|
||||||
|
[Define if your assembler supports -mrelax option.])])
|
||||||
|
+ gcc_GAS_CHECK_FEATURE([conditional branch relaxation support],
|
||||||
|
+ gcc_cv_as_loongarch_cond_branch_relax,
|
||||||
|
+ [],
|
||||||
|
+ [a:
|
||||||
|
+ .rept 32769
|
||||||
|
+ nop
|
||||||
|
+ .endr
|
||||||
|
+ beq $a0,$a1,a],,
|
||||||
|
+ [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1,
|
||||||
|
+ [Define if your assembler supports conditional branch relaxation.])])
|
||||||
|
;;
|
||||||
|
s390*-*-*)
|
||||||
|
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
391
0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
Normal file
391
0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
Normal file
@ -0,0 +1,391 @@
|
|||||||
|
From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 6 Nov 2023 16:06:08 +0800
|
||||||
|
Subject: [PATCH 027/188] LoongArch: Remove redundant barrier instructions
|
||||||
|
before LL-SC loops
|
||||||
|
|
||||||
|
This is isomorphic to the LLVM changes [1-2].
|
||||||
|
|
||||||
|
On LoongArch, the LL and SC instructions has memory barrier semantics:
|
||||||
|
|
||||||
|
- LL: <memory-barrier> + <load-exclusive>
|
||||||
|
- SC: <store-conditional> + <memory-barrier>
|
||||||
|
|
||||||
|
But the compare and swap operation is allowed to fail, and if it fails
|
||||||
|
the SC instruction is not executed, thus the guarantee of acquiring
|
||||||
|
semantics cannot be ensured. Therefore, an acquire barrier needs to be
|
||||||
|
generated when failure_memorder includes an acquire operation.
|
||||||
|
|
||||||
|
On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an
|
||||||
|
acquire barrier; on CPUs implementing LoongArch v1.00, it is a full
|
||||||
|
barrier. So it's always enough for acquire semantics. OTOH if an
|
||||||
|
acquire semantic is not needed, we still needs the "dbar 0x700" as the
|
||||||
|
load-load barrier like all LL-SC loops.
|
||||||
|
|
||||||
|
[1]:https://github.com/llvm/llvm-project/pull/67391
|
||||||
|
[2]:https://github.com/llvm/llvm-project/pull/69339
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_memmodel_needs_release_fence): Remove.
|
||||||
|
(loongarch_cas_failure_memorder_needs_acquire): New static
|
||||||
|
function.
|
||||||
|
(loongarch_print_operand): Redefine 'G' for the barrier on CAS
|
||||||
|
failure.
|
||||||
|
* config/loongarch/sync.md (atomic_cas_value_strong<mode>):
|
||||||
|
Remove the redundant barrier before the LL instruction, and
|
||||||
|
emit an acquire barrier on failure if needed by
|
||||||
|
failure_memorder.
|
||||||
|
(atomic_cas_value_cmp_and_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier
|
||||||
|
before the LL instruction.
|
||||||
|
(atomic_cas_value_sub_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_and_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_xor_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_or_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_nand_7_<mode>): Likewise.
|
||||||
|
(atomic_cas_value_exchange_7_<mode>): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/cas-acquire.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 30 ++++---
|
||||||
|
gcc/config/loongarch/sync.md | 49 +++++------
|
||||||
|
.../gcc.target/loongarch/cas-acquire.c | 82 +++++++++++++++++++
|
||||||
|
3 files changed, 119 insertions(+), 42 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 6d580ee75..8467f03cf 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Return true if a FENCE should be emitted to before a memory access to
|
||||||
|
- implement the release portion of memory model MODEL. */
|
||||||
|
+/* Return true if a FENCE should be emitted after a failed CAS to
|
||||||
|
+ implement the acquire semantic of failure_memorder. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
-loongarch_memmodel_needs_release_fence (enum memmodel model)
|
||||||
|
+loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
|
||||||
|
{
|
||||||
|
- switch (model)
|
||||||
|
+ switch (memmodel_base (model))
|
||||||
|
{
|
||||||
|
+ case MEMMODEL_ACQUIRE:
|
||||||
|
case MEMMODEL_ACQ_REL:
|
||||||
|
case MEMMODEL_SEQ_CST:
|
||||||
|
- case MEMMODEL_SYNC_SEQ_CST:
|
||||||
|
- case MEMMODEL_RELEASE:
|
||||||
|
- case MEMMODEL_SYNC_RELEASE:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
- case MEMMODEL_ACQUIRE:
|
||||||
|
- case MEMMODEL_CONSUME:
|
||||||
|
- case MEMMODEL_SYNC_ACQUIRE:
|
||||||
|
case MEMMODEL_RELAXED:
|
||||||
|
+ case MEMMODEL_RELEASE:
|
||||||
|
return false;
|
||||||
|
|
||||||
|
+ /* MEMMODEL_CONSUME is deliberately not handled because it's always
|
||||||
|
+ replaced by MEMMODEL_ACQUIRE as at now. If you see an ICE caused by
|
||||||
|
+ MEMMODEL_CONSUME, read the change (re)introducing it carefully and
|
||||||
|
+ decide what to do. See PR 59448 and get_memmodel in builtins.cc. */
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||||
|
'd' Print CONST_INT OP in decimal.
|
||||||
|
'E' Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
|
||||||
|
'F' Print the FPU branch condition for comparison OP.
|
||||||
|
- 'G' Print a DBAR insn if the memory model requires a release.
|
||||||
|
+ 'G' Print a DBAR insn for CAS failure (with an acquire semantic if
|
||||||
|
+ needed, otherwise a simple load-load barrier).
|
||||||
|
'H' Print address 52-61bit relocation associated with OP.
|
||||||
|
'h' Print the high-part relocation associated with OP.
|
||||||
|
'i' Print i if the operand is not a register.
|
||||||
|
@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'G':
|
||||||
|
- if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
|
||||||
|
- fputs ("dbar\t0", file);
|
||||||
|
+ if (loongarch_cas_failure_memorder_needs_acquire (
|
||||||
|
+ memmodel_from_int (INTVAL (op))))
|
||||||
|
+ fputs ("dbar\t0b10100", file);
|
||||||
|
+ else
|
||||||
|
+ fputs ("dbar\t0x700", file);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'h':
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index efa40f24c..dd1f98946 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -162,19 +162,18 @@
|
||||||
|
(clobber (match_scratch:GPR 6 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G5\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"bne\\t%0,%z2,2f\\n\\t"
|
||||||
|
"or%i3\\t%6,$zero,%3\\n\\t"
|
||||||
|
"sc.<amo>\\t%6,%1\\n\\t"
|
||||||
|
- "beq\\t$zero,%6,1b\\n\\t"
|
||||||
|
+ "beqz\\t%6,1b\\n\\t"
|
||||||
|
"b\\t3f\\n\\t"
|
||||||
|
"2:\\n\\t"
|
||||||
|
- "dbar\\t0x700\\n\\t"
|
||||||
|
+ "%G5\\n\\t"
|
||||||
|
"3:\\n\\t";
|
||||||
|
}
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_expand "atomic_compare_and_swap<mode>"
|
||||||
|
[(match_operand:SI 0 "register_operand" "") ;; bool output
|
||||||
|
@@ -267,8 +266,7 @@
|
||||||
|
(clobber (match_scratch:GPR 7 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%2\\n\\t"
|
||||||
|
"bne\\t%7,%z4,2f\\n\\t"
|
||||||
|
@@ -278,10 +276,10 @@
|
||||||
|
"beq\\t$zero,%7,1b\\n\\t"
|
||||||
|
"b\\t3f\\n\\t"
|
||||||
|
"2:\\n\\t"
|
||||||
|
- "dbar\\t0x700\\n\\t"
|
||||||
|
+ "%G6\\n\\t"
|
||||||
|
"3:\\n\\t";
|
||||||
|
}
|
||||||
|
- [(set (attr "length") (const_int 40))])
|
||||||
|
+ [(set (attr "length") (const_int 36))])
|
||||||
|
|
||||||
|
(define_expand "atomic_compare_and_swap<mode>"
|
||||||
|
[(match_operand:SI 0 "register_operand" "") ;; bool output
|
||||||
|
@@ -336,8 +334,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"add.w\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -347,7 +344,7 @@
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_sub_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
|
||||||
|
@@ -363,8 +360,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"sub.w\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -373,7 +369,7 @@
|
||||||
|
"sc.<amo>\\t%7,%1\\n\\t"
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_and_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
|
||||||
|
@@ -389,8 +385,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"and\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -399,7 +394,7 @@
|
||||||
|
"sc.<amo>\\t%7,%1\\n\\t"
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_xor_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
|
||||||
|
@@ -415,8 +410,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"xor\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -426,7 +420,7 @@
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_or_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
|
||||||
|
@@ -442,8 +436,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"or\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -453,7 +446,7 @@
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_nand_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r") ;; res
|
||||||
|
@@ -469,8 +462,7 @@
|
||||||
|
(clobber (match_scratch:GPR 8 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%3\\n\\t"
|
||||||
|
"and\\t%8,%0,%z5\\n\\t"
|
||||||
|
@@ -479,7 +471,7 @@
|
||||||
|
"sc.<amo>\\t%7,%1\\n\\t"
|
||||||
|
"beq\\t$zero,%7,1b";
|
||||||
|
}
|
||||||
|
- [(set (attr "length") (const_int 32))])
|
||||||
|
+ [(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_exchange_7_<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r")
|
||||||
|
@@ -494,8 +486,7 @@
|
||||||
|
(clobber (match_scratch:GPR 7 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- return "%G6\\n\\t"
|
||||||
|
- "1:\\n\\t"
|
||||||
|
+ return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"and\\t%7,%0,%z3\\n\\t"
|
||||||
|
"or%i5\\t%7,%7,%5\\n\\t"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..ff7ba866f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c
|
||||||
|
@@ -0,0 +1,82 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-require-effective-target c99_runtime } */
|
||||||
|
+/* { dg-require-effective-target pthread } */
|
||||||
|
+/* { dg-options "-std=c99 -pthread" } */
|
||||||
|
+
|
||||||
|
+/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934
|
||||||
|
+ reported that this had failed with GCC and 3A6000. */
|
||||||
|
+
|
||||||
|
+#include <pthread.h>
|
||||||
|
+#include <stdatomic.h>
|
||||||
|
+#include <stdbool.h>
|
||||||
|
+#include <stdio.h>
|
||||||
|
+
|
||||||
|
+static unsigned int tags[32];
|
||||||
|
+static unsigned int vals[32];
|
||||||
|
+
|
||||||
|
+static void *
|
||||||
|
+writer_entry (void *data)
|
||||||
|
+{
|
||||||
|
+ atomic_uint *pt = (atomic_uint *)tags;
|
||||||
|
+ atomic_uint *pv = (atomic_uint *)vals;
|
||||||
|
+
|
||||||
|
+ for (unsigned int n = 1; n < 10000; n++)
|
||||||
|
+ {
|
||||||
|
+ atomic_store_explicit (&pv[n & 31], n, memory_order_release);
|
||||||
|
+ atomic_store_explicit (&pt[n & 31], n, memory_order_release);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void *
|
||||||
|
+reader_entry (void *data)
|
||||||
|
+{
|
||||||
|
+ atomic_uint *pt = (atomic_uint *)tags;
|
||||||
|
+ atomic_uint *pv = (atomic_uint *)vals;
|
||||||
|
+ int i;
|
||||||
|
+
|
||||||
|
+ for (;;)
|
||||||
|
+ {
|
||||||
|
+ for (i = 0; i < 32; i++)
|
||||||
|
+ {
|
||||||
|
+ unsigned int tag = 0;
|
||||||
|
+ bool res;
|
||||||
|
+
|
||||||
|
+ res = atomic_compare_exchange_weak_explicit (
|
||||||
|
+ &pt[i], &tag, 0, memory_order_acquire, memory_order_acquire);
|
||||||
|
+ if (!res)
|
||||||
|
+ {
|
||||||
|
+ unsigned int val;
|
||||||
|
+
|
||||||
|
+ val = atomic_load_explicit (&pv[i], memory_order_relaxed);
|
||||||
|
+ if (val < tag)
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return NULL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main (int argc, char *argv[])
|
||||||
|
+{
|
||||||
|
+ pthread_t writer;
|
||||||
|
+ pthread_t reader;
|
||||||
|
+ int res;
|
||||||
|
+
|
||||||
|
+ res = pthread_create (&writer, NULL, writer_entry, NULL);
|
||||||
|
+ if (res < 0)
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+
|
||||||
|
+ res = pthread_create (&reader, NULL, reader_entry, NULL);
|
||||||
|
+ if (res < 0)
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+
|
||||||
|
+ res = pthread_join (writer, NULL);
|
||||||
|
+ if (res < 0)
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
161
0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
Normal file
161
0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
Normal file
@ -0,0 +1,161 @@
|
|||||||
|
From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Thu, 16 Nov 2023 20:31:09 +0800
|
||||||
|
Subject: [PATCH 028/188] LoongArch: Fix scan-assembler-times of lasx/lsx test
|
||||||
|
case.
|
||||||
|
|
||||||
|
These tests fail when they are first added,this patch adjusts the scan-assembler-times
|
||||||
|
to fix them.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times.
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto.
|
||||||
|
---
|
||||||
|
.../loongarch/vector/lasx/lasx-vcond-1.c | 12 +++----
|
||||||
|
.../loongarch/vector/lasx/lasx-vcond-2.c | 36 +++++++++----------
|
||||||
|
.../loongarch/vector/lsx/lsx-vcond-1.c | 12 +++----
|
||||||
|
.../loongarch/vector/lsx/lsx-vcond-2.c | 36 +++++++++----------
|
||||||
|
4 files changed, 48 insertions(+), 48 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
index ee9cb1a1f..57064eac9 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
|
||||||
|
@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
index 5f40ed44c..55d5a084c 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
@@ -67,21 +67,21 @@ TEST_CMP (nule)
|
||||||
|
TEST_CMP (nuge)
|
||||||
|
TEST_CMP (nugt)
|
||||||
|
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
index 138adccfa..8c69f0d9b 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
|
||||||
|
@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
index e8fe31f8f..2214afd0a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
@@ -67,21 +67,21 @@ TEST_CMP (nule)
|
||||||
|
TEST_CMP (nuge)
|
||||||
|
TEST_CMP (nugt)
|
||||||
|
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,45 @@
|
|||||||
|
From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Thu, 16 Nov 2023 16:44:36 +0800
|
||||||
|
Subject: [PATCH 029/188] LoongArch: Increase cost of vector aligned
|
||||||
|
store/load.
|
||||||
|
|
||||||
|
Based on SPEC2017 performance evaluation results, it's better to make them equal
|
||||||
|
to the cost of unaligned store/load so as to avoid odd alignment peeling.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_builtin_vectorization_cost): Adjust.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 8467f03cf..b6f0d61ef 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
|
case scalar_stmt:
|
||||||
|
case scalar_load:
|
||||||
|
case vector_stmt:
|
||||||
|
- case vector_load:
|
||||||
|
case vec_to_scalar:
|
||||||
|
case scalar_to_vec:
|
||||||
|
case scalar_store:
|
||||||
|
- case vector_store:
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
case vec_promote_demote:
|
||||||
|
@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||||
|
return LASX_SUPPORTED_MODE_P (mode)
|
||||||
|
&& !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
|
||||||
|
|
||||||
|
+ case vector_load:
|
||||||
|
+ case vector_store:
|
||||||
|
case unaligned_load:
|
||||||
|
case unaligned_store:
|
||||||
|
return 2;
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
58
0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
Normal file
58
0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001
|
||||||
|
From: Li Wei <liwei@loongson.cn>
|
||||||
|
Date: Fri, 17 Nov 2023 10:38:02 +0800
|
||||||
|
Subject: [PATCH 030/188] LoongArch: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO
|
||||||
|
|
||||||
|
The LoongArch has defined ctz and clz on the backend, but if we want GCC
|
||||||
|
do CTZ transformation optimization in forwprop2 pass, GCC need to know
|
||||||
|
the value of c[lt]z at zero, which may be beneficial for some test cases
|
||||||
|
(like spec2017 deepsjeng_r).
|
||||||
|
|
||||||
|
After implementing the macro, we test dynamic instruction count on
|
||||||
|
deepsjeng_r:
|
||||||
|
- before 1688423249186
|
||||||
|
- after 1660311215745 (1.66% reduction)
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO):
|
||||||
|
Implement.
|
||||||
|
(CTZ_DEFINED_VALUE_AT_ZERO): Same.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.dg/pr90838.c: add clz/ctz test support on LoongArch.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.h | 5 +++++
|
||||||
|
gcc/testsuite/gcc.dg/pr90838.c | 5 +++++
|
||||||
|
2 files changed, 10 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index 6e8ac293a..19cf6fd33 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function
|
||||||
|
|
||||||
|
#define TARGET_EXPLICIT_RELOCS \
|
||||||
|
(la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
+
|
||||||
|
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
|
||||||
|
index 7502b8463..7aa912525 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/pr90838.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/pr90838.c
|
||||||
|
@@ -82,3 +82,8 @@ int ctz4 (unsigned long x)
|
||||||
|
/* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */
|
||||||
|
/* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
197
0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
Normal file
197
0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
Normal file
@ -0,0 +1,197 @@
|
|||||||
|
From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 14 Nov 2023 00:17:19 +0800
|
||||||
|
Subject: [PATCH 031/188] LoongArch: Handle vectorized copysign (x, -1)
|
||||||
|
expansion efficiently
|
||||||
|
|
||||||
|
With LSX or LASX, copysign (x[i], -1) (or any negative constant) can be
|
||||||
|
vectorized using [x]vbitseti.{w/d} instructions to directly set the
|
||||||
|
signbits.
|
||||||
|
|
||||||
|
Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion
|
||||||
|
efficiently" (r14-5289).
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lsx.md (copysign<mode>3): Allow operand[2] to
|
||||||
|
be an reg_or_vector_same_val_operand. If it's a const vector
|
||||||
|
with same negative elements, expand the copysign with a bitset
|
||||||
|
instruction. Otherwise, force it into an register.
|
||||||
|
* config/loongarch/lasx.md (copysign<mode>3): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* g++.target/loongarch/vect-copysign-negconst.C: New test.
|
||||||
|
* g++.target/loongarch/vect-copysign-negconst-run.C: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 22 ++++++++-
|
||||||
|
gcc/config/loongarch/lsx.md | 22 ++++++++-
|
||||||
|
.../loongarch/vect-copysign-negconst-run.C | 47 +++++++++++++++++++
|
||||||
|
.../loongarch/vect-copysign-negconst.C | 27 +++++++++++
|
||||||
|
4 files changed, 116 insertions(+), 2 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
|
||||||
|
create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index f0f2dd08d..2e11f0612 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -3136,11 +3136,31 @@
|
||||||
|
(match_operand:FLASX 1 "register_operand")))
|
||||||
|
(set (match_dup 5)
|
||||||
|
(and:FLASX (match_dup 3)
|
||||||
|
- (match_operand:FLASX 2 "register_operand")))
|
||||||
|
+ (match_operand:FLASX 2 "reg_or_vector_same_val_operand")))
|
||||||
|
(set (match_operand:FLASX 0 "register_operand")
|
||||||
|
(ior:FLASX (match_dup 4) (match_dup 5)))]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
{
|
||||||
|
+ /* copysign (x, -1) should instead be expanded as setting the sign
|
||||||
|
+ bit. */
|
||||||
|
+ if (!REG_P (operands[2]))
|
||||||
|
+ {
|
||||||
|
+ rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
|
||||||
|
+ if (GET_CODE (op2_elt) == CONST_DOUBLE
|
||||||
|
+ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
|
||||||
|
+ {
|
||||||
|
+ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
|
||||||
|
+ operands[0] = lowpart_subreg (<VIMODE256>mode, operands[0],
|
||||||
|
+ <MODE>mode);
|
||||||
|
+ operands[1] = lowpart_subreg (<VIMODE256>mode, operands[1],
|
||||||
|
+ <MODE>mode);
|
||||||
|
+ emit_insn (gen_lasx_xvbitseti_<lasxfmt> (operands[0],
|
||||||
|
+ operands[1], n));
|
||||||
|
+ DONE;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ operands[2] = force_reg (<MODE>mode, operands[2]);
|
||||||
|
operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
|
||||||
|
|
||||||
|
operands[4] = gen_reg_rtx (<MODE>mode);
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 55c7d79a0..8ea41c85b 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -2873,11 +2873,31 @@
|
||||||
|
(match_operand:FLSX 1 "register_operand")))
|
||||||
|
(set (match_dup 5)
|
||||||
|
(and:FLSX (match_dup 3)
|
||||||
|
- (match_operand:FLSX 2 "register_operand")))
|
||||||
|
+ (match_operand:FLSX 2 "reg_or_vector_same_val_operand")))
|
||||||
|
(set (match_operand:FLSX 0 "register_operand")
|
||||||
|
(ior:FLSX (match_dup 4) (match_dup 5)))]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
{
|
||||||
|
+ /* copysign (x, -1) should instead be expanded as setting the sign
|
||||||
|
+ bit. */
|
||||||
|
+ if (!REG_P (operands[2]))
|
||||||
|
+ {
|
||||||
|
+ rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
|
||||||
|
+ if (GET_CODE (op2_elt) == CONST_DOUBLE
|
||||||
|
+ && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
|
||||||
|
+ {
|
||||||
|
+ rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
|
||||||
|
+ operands[0] = lowpart_subreg (<VIMODE>mode, operands[0],
|
||||||
|
+ <MODE>mode);
|
||||||
|
+ operands[1] = lowpart_subreg (<VIMODE>mode, operands[1],
|
||||||
|
+ <MODE>mode);
|
||||||
|
+ emit_insn (gen_lsx_vbitseti_<lsxfmt> (operands[0], operands[1],
|
||||||
|
+ n));
|
||||||
|
+ DONE;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ operands[2] = force_reg (<MODE>mode, operands[2]);
|
||||||
|
operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
|
||||||
|
|
||||||
|
operands[4] = gen_reg_rtx (<MODE>mode);
|
||||||
|
diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d2d5d15c9
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
|
||||||
|
@@ -0,0 +1,47 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
|
||||||
|
+/* { dg-require-effective-target loongarch_asx_hw } */
|
||||||
|
+
|
||||||
|
+#include "vect-copysign-negconst.C"
|
||||||
|
+
|
||||||
|
+double d[] = {1.2, -3.4, -5.6, 7.8};
|
||||||
|
+float f[] = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810};
|
||||||
|
+
|
||||||
|
+double _abs(double x) { return __builtin_fabs (x); }
|
||||||
|
+float _abs(float x) { return __builtin_fabsf (x); }
|
||||||
|
+
|
||||||
|
+template <class T>
|
||||||
|
+void
|
||||||
|
+check (T *arr, T *orig, int len)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < len; i++)
|
||||||
|
+ {
|
||||||
|
+ if (arr[i] > 0)
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+ if (_abs (arr[i]) != _abs (orig[i]))
|
||||||
|
+ __builtin_trap ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main()
|
||||||
|
+{
|
||||||
|
+ double test_d[4];
|
||||||
|
+ float test_f[8];
|
||||||
|
+
|
||||||
|
+ __builtin_memcpy (test_d, d, sizeof (test_d));
|
||||||
|
+ force_negative<2> (test_d);
|
||||||
|
+ check (test_d, d, 2);
|
||||||
|
+
|
||||||
|
+ __builtin_memcpy (test_d, d, sizeof (test_d));
|
||||||
|
+ force_negative<4> (test_d);
|
||||||
|
+ check (test_d, d, 4);
|
||||||
|
+
|
||||||
|
+ __builtin_memcpy (test_f, f, sizeof (test_f));
|
||||||
|
+ force_negative<4> (test_f);
|
||||||
|
+ check (test_f, f, 4);
|
||||||
|
+
|
||||||
|
+ __builtin_memcpy (test_f, f, sizeof (test_f));
|
||||||
|
+ force_negative<8> (test_f);
|
||||||
|
+ check (test_f, f, 8);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..5e8820d2b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
|
||||||
|
@@ -0,0 +1,27 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvbitseti.*63" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvbitseti.*31" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvbitseti.*63" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvbitseti.*31" } } */
|
||||||
|
+
|
||||||
|
+template <int N>
|
||||||
|
+__attribute__ ((noipa)) void
|
||||||
|
+force_negative (float *arr)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ arr[i] = __builtin_copysignf (arr[i], -2);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+template <int N>
|
||||||
|
+__attribute__ ((noipa)) void
|
||||||
|
+force_negative (double *arr)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ arr[i] = __builtin_copysign (arr[i], -3);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+template void force_negative<4>(float *);
|
||||||
|
+template void force_negative<8>(float *);
|
||||||
|
+template void force_negative<2>(double *);
|
||||||
|
+template void force_negative<4>(double *);
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
561
0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
Normal file
561
0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
Normal file
@ -0,0 +1,561 @@
|
|||||||
|
From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Thu, 16 Nov 2023 15:06:11 +0800
|
||||||
|
Subject: [PATCH 032/188] LoongArch: Add code generation support for call36
|
||||||
|
function calls.
|
||||||
|
|
||||||
|
When compiling with '-mcmodel=medium', the function call is made through
|
||||||
|
'pcaddu18i+jirl' if binutils supports call36, otherwise the
|
||||||
|
native implementation 'pcalau12i+jirl' is used.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.in: Regenerate.
|
||||||
|
* config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_legitimize_call_address):
|
||||||
|
If binutils supports call36, the function call is not split over expand.
|
||||||
|
* config/loongarch/loongarch.md: Add call36 generation code.
|
||||||
|
* config/loongarch/predicates.md: Likewise.
|
||||||
|
* configure: Regenerate.
|
||||||
|
* configure.ac: Check whether binutils supports call36.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36,
|
||||||
|
the test is abandoned.
|
||||||
|
* gcc.target/loongarch/func-call-medium-6.c: Likewise.
|
||||||
|
* gcc.target/loongarch/func-call-medium-7.c: Likewise.
|
||||||
|
* gcc.target/loongarch/func-call-medium-8.c: Likewise.
|
||||||
|
* lib/target-supports.exp: Added a function to see if the assembler supports
|
||||||
|
the call36 relocation.
|
||||||
|
* gcc.target/loongarch/func-call-medium-call36-1.c: New test.
|
||||||
|
* gcc.target/loongarch/func-call-medium-call36.c: New test.
|
||||||
|
|
||||||
|
Co-authored-by: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
---
|
||||||
|
gcc/config.in | 6 +
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 12 +-
|
||||||
|
gcc/config/loongarch/loongarch.md | 171 +++++++++++++++---
|
||||||
|
gcc/config/loongarch/predicates.md | 7 +-
|
||||||
|
gcc/configure | 32 ++++
|
||||||
|
gcc/configure.ac | 6 +
|
||||||
|
.../gcc.target/loongarch/func-call-medium-5.c | 1 +
|
||||||
|
.../gcc.target/loongarch/func-call-medium-6.c | 1 +
|
||||||
|
.../gcc.target/loongarch/func-call-medium-7.c | 1 +
|
||||||
|
.../gcc.target/loongarch/func-call-medium-8.c | 1 +
|
||||||
|
.../loongarch/func-call-medium-call36-1.c | 21 +++
|
||||||
|
.../loongarch/func-call-medium-call36.c | 32 ++++
|
||||||
|
gcc/testsuite/lib/target-supports.exp | 9 +
|
||||||
|
14 files changed, 268 insertions(+), 36 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config.in b/gcc/config.in
|
||||||
|
index 04968b53c..033cfb98b 100644
|
||||||
|
--- a/gcc/config.in
|
||||||
|
+++ b/gcc/config.in
|
||||||
|
@@ -759,6 +759,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if your assembler supports call36 relocation. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_AS_SUPPORT_CALL36
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define if your assembler and linker support thread-local storage. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
#undef HAVE_AS_TLS
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index dfbe9dd5c..22ce1a122 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define HAVE_AS_EXPLICIT_RELOCS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef HAVE_AS_SUPPORT_CALL36
|
||||||
|
+#define HAVE_AS_SUPPORT_CALL36 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#ifndef HAVE_AS_MRELAX_OPTION
|
||||||
|
#define HAVE_AS_MRELAX_OPTION 0
|
||||||
|
#endif
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index b6f0d61ef..43f0e82ba 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr)
|
||||||
|
|
||||||
|
enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
|
||||||
|
|
||||||
|
- /* Split function call insn 'bl sym' or 'bl %plt(sym)' to :
|
||||||
|
- pcalau12i $rd, %pc_hi20(sym)
|
||||||
|
- jr $rd, %pc_lo12(sym). */
|
||||||
|
+ /* If add the compilation option '-cmodel=medium', and the assembler does
|
||||||
|
+ not support call36. The following sequence of instructions will be
|
||||||
|
+ used for the function call:
|
||||||
|
+ pcalau12i $rd, %pc_hi20(sym)
|
||||||
|
+ jr $rd, %pc_lo12(sym)
|
||||||
|
+ */
|
||||||
|
|
||||||
|
if (TARGET_CMODEL_MEDIUM
|
||||||
|
- && TARGET_EXPLICIT_RELOCS
|
||||||
|
+ && !HAVE_AS_SUPPORT_CALL36
|
||||||
|
+ && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
|
||||||
|
&& (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
|
||||||
|
&& (symbol_type == SYMBOL_PCREL
|
||||||
|
|| (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index ed86c95bd..52e40a208 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -3274,7 +3274,13 @@
|
||||||
|
XEXP (target, 1),
|
||||||
|
operands[1]));
|
||||||
|
else
|
||||||
|
- emit_call_insn (gen_sibcall_internal (target, operands[1]));
|
||||||
|
+ {
|
||||||
|
+ rtx call = emit_call_insn (gen_sibcall_internal (target, operands[1]));
|
||||||
|
+
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM && !REG_P (target))
|
||||||
|
+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
|
||||||
|
+ gen_rtx_REG (Pmode, T0_REGNUM));
|
||||||
|
+ }
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
@@ -3282,10 +3288,25 @@
|
||||||
|
[(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b"))
|
||||||
|
(match_operand 1 "" ""))]
|
||||||
|
"SIBLING_CALL_P (insn)"
|
||||||
|
- "@
|
||||||
|
- jr\t%0
|
||||||
|
- b\t%0
|
||||||
|
- b\t%%plt(%0)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jr\t%0";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%0";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%%plt(%0)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@sibcall_internal_1<mode>"
|
||||||
|
@@ -3318,9 +3339,17 @@
|
||||||
|
operands[2],
|
||||||
|
arg2));
|
||||||
|
else
|
||||||
|
- emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target,
|
||||||
|
- operands[2],
|
||||||
|
- arg2));
|
||||||
|
+ {
|
||||||
|
+ rtx call
|
||||||
|
+ = emit_call_insn (gen_sibcall_value_multiple_internal (arg1,
|
||||||
|
+ target,
|
||||||
|
+ operands[2],
|
||||||
|
+ arg2));
|
||||||
|
+
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM && !REG_P (target))
|
||||||
|
+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
|
||||||
|
+ gen_rtx_REG (Pmode, T0_REGNUM));
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -3334,8 +3363,15 @@
|
||||||
|
XEXP (target, 1),
|
||||||
|
operands[2]));
|
||||||
|
else
|
||||||
|
- emit_call_insn (gen_sibcall_value_internal (operands[0], target,
|
||||||
|
- operands[2]));
|
||||||
|
+ {
|
||||||
|
+ rtx call = emit_call_insn (gen_sibcall_value_internal (operands[0],
|
||||||
|
+ target,
|
||||||
|
+ operands[2]));
|
||||||
|
+
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM && !REG_P (target))
|
||||||
|
+ clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
|
||||||
|
+ gen_rtx_REG (Pmode, T0_REGNUM));
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3345,10 +3381,25 @@
|
||||||
|
(call (mem:SI (match_operand 1 "call_insn_operand" "j,c,b"))
|
||||||
|
(match_operand 2 "" "")))]
|
||||||
|
"SIBLING_CALL_P (insn)"
|
||||||
|
- "@
|
||||||
|
- jr\t%1
|
||||||
|
- b\t%1
|
||||||
|
- b\t%%plt(%1)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jr\t%1";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%1";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%%plt(%1)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@sibcall_value_internal_1<mode>"
|
||||||
|
@@ -3368,10 +3419,25 @@
|
||||||
|
(call (mem:SI (match_dup 1))
|
||||||
|
(match_dup 2)))]
|
||||||
|
"SIBLING_CALL_P (insn)"
|
||||||
|
- "@
|
||||||
|
- jr\t%1
|
||||||
|
- b\t%1
|
||||||
|
- b\t%%plt(%1)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jr\t%1";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%1";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
|
||||||
|
+ else
|
||||||
|
+ return "b\t%%plt(%1)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@sibcall_value_multiple_internal_1<mode>"
|
||||||
|
@@ -3411,10 +3477,25 @@
|
||||||
|
(match_operand 1 "" ""))
|
||||||
|
(clobber (reg:SI RETURN_ADDR_REGNUM))]
|
||||||
|
""
|
||||||
|
- "@
|
||||||
|
- jirl\t$r1,%0,0
|
||||||
|
- bl\t%0
|
||||||
|
- bl\t%%plt(%0)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jirl\t$r1,%0,0";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%0";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%%plt(%0)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@call_internal_1<mode>"
|
||||||
|
@@ -3473,10 +3554,25 @@
|
||||||
|
(match_operand 2 "" "")))
|
||||||
|
(clobber (reg:SI RETURN_ADDR_REGNUM))]
|
||||||
|
""
|
||||||
|
- "@
|
||||||
|
- jirl\t$r1,%1,0
|
||||||
|
- bl\t%1
|
||||||
|
- bl\t%%plt(%1)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jirl\t$r1,%1,0";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%1";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%%plt(%1)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@call_value_internal_1<mode>"
|
||||||
|
@@ -3498,10 +3594,25 @@
|
||||||
|
(match_dup 2)))
|
||||||
|
(clobber (reg:SI RETURN_ADDR_REGNUM))]
|
||||||
|
""
|
||||||
|
- "@
|
||||||
|
- jirl\t$r1,%1,0
|
||||||
|
- bl\t%1
|
||||||
|
- bl\t%%plt(%1)"
|
||||||
|
+{
|
||||||
|
+ switch (which_alternative)
|
||||||
|
+ {
|
||||||
|
+ case 0:
|
||||||
|
+ return "jirl\t$r1,%1,0";
|
||||||
|
+ case 1:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%1";
|
||||||
|
+ case 2:
|
||||||
|
+ if (TARGET_CMODEL_MEDIUM)
|
||||||
|
+ return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
|
||||||
|
+ else
|
||||||
|
+ return "bl\t%%plt(%1)";
|
||||||
|
+ default:
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
[(set_attr "jirl" "indirect,direct,direct")])
|
||||||
|
|
||||||
|
(define_insn "@call_value_multiple_internal_1<mode>"
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 1d669f560..2aae87db4 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -443,7 +443,9 @@
|
||||||
|
{
|
||||||
|
case SYMBOL_PCREL:
|
||||||
|
if (TARGET_CMODEL_EXTREME
|
||||||
|
- || (TARGET_CMODEL_MEDIUM && !TARGET_EXPLICIT_RELOCS))
|
||||||
|
+ || (TARGET_CMODEL_MEDIUM
|
||||||
|
+ && HAVE_AS_SUPPORT_CALL36
|
||||||
|
+ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
@@ -452,7 +454,8 @@
|
||||||
|
if (TARGET_CMODEL_EXTREME
|
||||||
|
|| !flag_plt
|
||||||
|
|| (flag_plt && TARGET_CMODEL_MEDIUM
|
||||||
|
- && !TARGET_EXPLICIT_RELOCS))
|
||||||
|
+ && HAVE_AS_SUPPORT_CALL36
|
||||||
|
+ && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
return 1;
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 09bacfec3..5842e7a18 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -28836,6 +28836,38 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
|
||||||
|
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for call36 relocation support" >&5
|
||||||
|
+$as_echo_n "checking assembler for call36 relocation support... " >&6; }
|
||||||
|
+if ${gcc_cv_as_loongarch_call36+:} false; then :
|
||||||
|
+ $as_echo_n "(cached) " >&6
|
||||||
|
+else
|
||||||
|
+ gcc_cv_as_loongarch_call36=no
|
||||||
|
+ if test x$gcc_cv_as != x; then
|
||||||
|
+ $as_echo 'pcaddu18i $r1, %call36(a)
|
||||||
|
+ jirl $r1, $r1, 0' > conftest.s
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
|
||||||
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
+ (eval $ac_try) 2>&5
|
||||||
|
+ ac_status=$?
|
||||||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
+ test $ac_status = 0; }; }
|
||||||
|
+ then
|
||||||
|
+ gcc_cv_as_loongarch_call36=yes
|
||||||
|
+ else
|
||||||
|
+ echo "configure: failed program was" >&5
|
||||||
|
+ cat conftest.s >&5
|
||||||
|
+ fi
|
||||||
|
+ rm -f conftest.o conftest.s
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_call36" >&5
|
||||||
|
+$as_echo "$gcc_cv_as_loongarch_call36" >&6; }
|
||||||
|
+if test $gcc_cv_as_loongarch_call36 = yes; then
|
||||||
|
+
|
||||||
|
+$as_echo "#define HAVE_AS_SUPPORT_CALL36 1" >>confdefs.h
|
||||||
|
+
|
||||||
|
fi
|
||||||
|
|
||||||
|
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index a0999152e..9c3fd3ad6 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -5329,6 +5329,12 @@ x:
|
||||||
|
[a:pcalau12i $t0,%pc_hi20(a)],,
|
||||||
|
[AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
|
||||||
|
[Define if your assembler supports explicit relocation.])])
|
||||||
|
+ gcc_GAS_CHECK_FEATURE([call36 relocation support],
|
||||||
|
+ gcc_cv_as_loongarch_call36,,
|
||||||
|
+ [pcaddu18i $r1, %call36(a)
|
||||||
|
+ jirl $r1, $r1, 0],,
|
||||||
|
+ [AC_DEFINE(HAVE_AS_SUPPORT_CALL36, 1,
|
||||||
|
+ [Define if your assembler supports call36 relocation.])])
|
||||||
|
gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support],
|
||||||
|
gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,,
|
||||||
|
[.cfi_startproc
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
|
||||||
|
index 8a47b5afc..cae880bd8 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
+/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
|
||||||
|
/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=medium" } */
|
||||||
|
/* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */
|
||||||
|
/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
|
||||||
|
index 1e75e60e0..33819542d 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
+/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
|
||||||
|
/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=medium" } */
|
||||||
|
/* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */
|
||||||
|
/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
|
||||||
|
index 9e89085ca..969b59d04 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
+/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
|
||||||
|
/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=medium" } */
|
||||||
|
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||||
|
/* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
|
||||||
|
index fde9c6e0e..786ff395f 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
+/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
|
||||||
|
/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=medium" } */
|
||||||
|
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||||
|
/* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..872ff32f8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
|
||||||
|
@@ -0,0 +1,21 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-require-effective-target loongarch_call36_support } */
|
||||||
|
+/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */
|
||||||
|
+/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r1,%call36\\(func\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r1,%call36\\(func_value\\)" } } */
|
||||||
|
+
|
||||||
|
+extern void func (void);
|
||||||
|
+int
|
||||||
|
+test (void)
|
||||||
|
+{
|
||||||
|
+ func ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+extern int func_value (void);
|
||||||
|
+float
|
||||||
|
+test_value (void)
|
||||||
|
+{
|
||||||
|
+ func_value ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..98ccd260d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
|
||||||
|
@@ -0,0 +1,32 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-require-effective-target loongarch_call36_support } */
|
||||||
|
+/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */
|
||||||
|
+/* { dg-final { scan-rtl-dump-times "\\(clobber \\(reg:DI 12 \\\$r12\\)\\)" 3 "final" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r12,%call36\\(func\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r12,%call36\\(func_value\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test_multi:.*pcaddu18i\t\\\$r12,%call36\\(func_multi\\)" } } */
|
||||||
|
+
|
||||||
|
+extern void func (void);
|
||||||
|
+void
|
||||||
|
+test (void)
|
||||||
|
+{
|
||||||
|
+ func();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+extern int func_value (void);
|
||||||
|
+int
|
||||||
|
+test_value (void)
|
||||||
|
+{
|
||||||
|
+ func_value ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+struct t {float a; float b;};
|
||||||
|
+
|
||||||
|
+extern struct t func_multi (void);
|
||||||
|
+struct t
|
||||||
|
+test_multi (void)
|
||||||
|
+{
|
||||||
|
+ func_multi ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
index bbe145c1c..b8bff1a31 100644
|
||||||
|
--- a/gcc/testsuite/lib/target-supports.exp
|
||||||
|
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
@@ -10573,6 +10573,15 @@ proc check_effective_target_loongarch_asx_hw { } {
|
||||||
|
} "-mlasx"]
|
||||||
|
}
|
||||||
|
|
||||||
|
+# Check whether LoongArch binutils supports call36 relocation.
|
||||||
|
+proc check_effective_target_loongarch_call36_support { } {
|
||||||
|
+ return [check_no_compiler_messages loongarch_call36_support object {
|
||||||
|
+/* Assembly code */
|
||||||
|
+ pcaddu18i $r1,%call36(a)
|
||||||
|
+ jirl $r1,$r1,0
|
||||||
|
+ } ""]
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
# Return 1 if the target does *not* require strict alignment.
|
||||||
|
|
||||||
|
proc check_effective_target_non_strict_align {} {
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
362
0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
Normal file
362
0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
Normal file
@ -0,0 +1,362 @@
|
|||||||
|
From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Fri, 17 Nov 2023 15:42:53 +0800
|
||||||
|
Subject: [PATCH 033/188] LoongArch: Implement atomic operations using
|
||||||
|
LoongArch1.1 instructions.
|
||||||
|
|
||||||
|
1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are
|
||||||
|
implemented using amadd{_db}.{b/h}.
|
||||||
|
2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange.
|
||||||
|
3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are
|
||||||
|
implemented using amswap{_db}.{b/h}.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-def.h: Add comments.
|
||||||
|
* config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence):
|
||||||
|
Remove redundant code implementations.
|
||||||
|
* config/loongarch/sync.md (d): Added QI, HI support.
|
||||||
|
(atomic_add<mode>): New template.
|
||||||
|
(atomic_exchange<mode>_short): Likewise.
|
||||||
|
(atomic_cas_value_strong<mode>_amcas): Likewise..
|
||||||
|
(atomic_fetch_add<mode>_short): Likewise.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 2 +
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 2 +-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 6 +-
|
||||||
|
gcc/config/loongarch/sync.md | 186 ++++++++++++++++++++------
|
||||||
|
4 files changed, 147 insertions(+), 49 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index 4757de14b..078d8607d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -54,7 +54,9 @@ extern "C" {
|
||||||
|
|
||||||
|
/* enum isa_base */
|
||||||
|
extern const char* loongarch_isa_base_strings[];
|
||||||
|
+/* LoongArch V1.00. */
|
||||||
|
#define ISA_BASE_LA64V100 0
|
||||||
|
+/* LoongArch V1.10. */
|
||||||
|
#define ISA_BASE_LA64V110 1
|
||||||
|
#define N_ISA_BASE_TYPES 2
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 22ce1a122..9b3d023ac 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
|| la_target.isa.simd == ISA_EXT_SIMD_LASX)
|
||||||
|
#define ISA_HAS_LASX (la_target.isa.simd == ISA_EXT_SIMD_LASX)
|
||||||
|
|
||||||
|
-
|
||||||
|
/* TARGET_ macros for use in *.md template conditionals */
|
||||||
|
#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464)
|
||||||
|
#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664)
|
||||||
|
+#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110)
|
||||||
|
|
||||||
|
/* Note: optimize_size may vary across functions,
|
||||||
|
while -m[no]-memcpy imposes a global constraint. */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 43f0e82ba..7bb46a45d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code)
|
||||||
|
static bool
|
||||||
|
loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
|
||||||
|
{
|
||||||
|
- switch (model)
|
||||||
|
+ switch (memmodel_base (model))
|
||||||
|
{
|
||||||
|
case MEMMODEL_ACQ_REL:
|
||||||
|
case MEMMODEL_SEQ_CST:
|
||||||
|
- case MEMMODEL_SYNC_SEQ_CST:
|
||||||
|
case MEMMODEL_RELEASE:
|
||||||
|
- case MEMMODEL_SYNC_RELEASE:
|
||||||
|
case MEMMODEL_ACQUIRE:
|
||||||
|
- case MEMMODEL_CONSUME:
|
||||||
|
- case MEMMODEL_SYNC_ACQUIRE:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case MEMMODEL_RELAXED:
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index dd1f98946..1eabaec04 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -38,7 +38,7 @@
|
||||||
|
[(plus "add") (ior "or") (xor "xor") (and "and")])
|
||||||
|
|
||||||
|
;; This attribute gives the format suffix for atomic memory operations.
|
||||||
|
-(define_mode_attr amo [(SI "w") (DI "d")])
|
||||||
|
+(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
|
||||||
|
|
||||||
|
;; <amop> expands to the name of the atomic operand that implements a
|
||||||
|
;; particular code.
|
||||||
|
@@ -123,7 +123,18 @@
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
""
|
||||||
|
"am<amop>%A2.<amo>\t$zero,%z1,%0"
|
||||||
|
- [(set (attr "length") (const_int 8))])
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
+
|
||||||
|
+(define_insn "atomic_add<mode>"
|
||||||
|
+ [(set (match_operand:SHORT 0 "memory_operand" "+ZB")
|
||||||
|
+ (unspec_volatile:SHORT
|
||||||
|
+ [(plus:SHORT (match_dup 0)
|
||||||
|
+ (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
|
||||||
|
+ (match_operand:SI 2 "const_int_operand")] ;; model
|
||||||
|
+ UNSPEC_SYNC_OLD_OP))]
|
||||||
|
+ "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "amadd%A2.<amo>\t$zero,%z1,%0"
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
(define_insn "atomic_fetch_<atomic_optab><mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r")
|
||||||
|
@@ -131,12 +142,12 @@
|
||||||
|
(set (match_dup 1)
|
||||||
|
(unspec_volatile:GPR
|
||||||
|
[(any_atomic:GPR (match_dup 1)
|
||||||
|
- (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
|
||||||
|
+ (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
|
||||||
|
(match_operand:SI 3 "const_int_operand")] ;; model
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
""
|
||||||
|
"am<amop>%A3.<amo>\t%0,%z2,%1"
|
||||||
|
- [(set (attr "length") (const_int 8))])
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
(define_insn "atomic_exchange<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r")
|
||||||
|
@@ -148,7 +159,19 @@
|
||||||
|
(match_operand:GPR 2 "register_operand" "r"))]
|
||||||
|
""
|
||||||
|
"amswap%A3.<amo>\t%0,%z2,%1"
|
||||||
|
- [(set (attr "length") (const_int 8))])
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
+
|
||||||
|
+(define_insn "atomic_exchange<mode>_short"
|
||||||
|
+ [(set (match_operand:SHORT 0 "register_operand" "=&r")
|
||||||
|
+ (unspec_volatile:SHORT
|
||||||
|
+ [(match_operand:SHORT 1 "memory_operand" "+ZB")
|
||||||
|
+ (match_operand:SI 3 "const_int_operand")] ;; model
|
||||||
|
+ UNSPEC_SYNC_EXCHANGE))
|
||||||
|
+ (set (match_dup 1)
|
||||||
|
+ (match_operand:SHORT 2 "register_operand" "r"))]
|
||||||
|
+ "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "amswap%A3.<amo>\t%0,%z2,%1"
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
(define_insn "atomic_cas_value_strong<mode>"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand" "=&r")
|
||||||
|
@@ -156,25 +179,36 @@
|
||||||
|
(set (match_dup 1)
|
||||||
|
(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
|
||||||
|
(match_operand:GPR 3 "reg_or_0_operand" "rJ")
|
||||||
|
- (match_operand:SI 4 "const_int_operand") ;; mod_s
|
||||||
|
- (match_operand:SI 5 "const_int_operand")] ;; mod_f
|
||||||
|
+ (match_operand:SI 4 "const_int_operand")] ;; mod_s
|
||||||
|
UNSPEC_COMPARE_AND_SWAP))
|
||||||
|
- (clobber (match_scratch:GPR 6 "=&r"))]
|
||||||
|
+ (clobber (match_scratch:GPR 5 "=&r"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
return "1:\\n\\t"
|
||||||
|
"ll.<amo>\\t%0,%1\\n\\t"
|
||||||
|
"bne\\t%0,%z2,2f\\n\\t"
|
||||||
|
- "or%i3\\t%6,$zero,%3\\n\\t"
|
||||||
|
- "sc.<amo>\\t%6,%1\\n\\t"
|
||||||
|
- "beqz\\t%6,1b\\n\\t"
|
||||||
|
+ "or%i3\\t%5,$zero,%3\\n\\t"
|
||||||
|
+ "sc.<amo>\\t%5,%1\\n\\t"
|
||||||
|
+ "beqz\\t%5,1b\\n\\t"
|
||||||
|
"b\\t3f\\n\\t"
|
||||||
|
"2:\\n\\t"
|
||||||
|
- "%G5\\n\\t"
|
||||||
|
+ "%G4\\n\\t"
|
||||||
|
"3:\\n\\t";
|
||||||
|
}
|
||||||
|
[(set (attr "length") (const_int 28))])
|
||||||
|
|
||||||
|
+(define_insn "atomic_cas_value_strong<mode>_amcas"
|
||||||
|
+ [(set (match_operand:QHWD 0 "register_operand" "=&r")
|
||||||
|
+ (match_operand:QHWD 1 "memory_operand" "+ZB"))
|
||||||
|
+ (set (match_dup 1)
|
||||||
|
+ (unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
|
||||||
|
+ (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
|
||||||
|
+ (match_operand:SI 4 "const_int_operand")] ;; mod_s
|
||||||
|
+ UNSPEC_COMPARE_AND_SWAP))]
|
||||||
|
+ "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
|
||||||
|
+ [(set (attr "length") (const_int 8))])
|
||||||
|
+
|
||||||
|
(define_expand "atomic_compare_and_swap<mode>"
|
||||||
|
[(match_operand:SI 0 "register_operand" "") ;; bool output
|
||||||
|
(match_operand:GPR 1 "register_operand" "") ;; val output
|
||||||
|
@@ -186,9 +220,29 @@
|
||||||
|
(match_operand:SI 7 "const_int_operand" "")] ;; mod_f
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
|
||||||
|
- operands[3], operands[4],
|
||||||
|
- operands[6], operands[7]));
|
||||||
|
+ rtx mod_s, mod_f;
|
||||||
|
+
|
||||||
|
+ mod_s = operands[6];
|
||||||
|
+ mod_f = operands[7];
|
||||||
|
+
|
||||||
|
+ /* Normally the succ memory model must be stronger than fail, but in the
|
||||||
|
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
|
||||||
|
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
|
||||||
|
+
|
||||||
|
+ if (is_mm_acquire (memmodel_base (INTVAL (mod_f)))
|
||||||
|
+ && is_mm_release (memmodel_base (INTVAL (mod_s))))
|
||||||
|
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
|
||||||
|
+
|
||||||
|
+ operands[6] = mod_s;
|
||||||
|
+
|
||||||
|
+ if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
|
||||||
|
+ operands[3], operands[4],
|
||||||
|
+ operands[6]));
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
|
||||||
|
+ operands[3], operands[4],
|
||||||
|
+ operands[6]));
|
||||||
|
|
||||||
|
rtx compare = operands[1];
|
||||||
|
if (operands[3] != const0_rtx)
|
||||||
|
@@ -292,31 +346,53 @@
|
||||||
|
(match_operand:SI 7 "const_int_operand" "")] ;; mod_f
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- union loongarch_gen_fn_ptrs generator;
|
||||||
|
- generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
|
||||||
|
- loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
|
||||||
|
- operands[3], operands[4], operands[7]);
|
||||||
|
+ rtx mod_s, mod_f;
|
||||||
|
|
||||||
|
- rtx compare = operands[1];
|
||||||
|
- if (operands[3] != const0_rtx)
|
||||||
|
- {
|
||||||
|
- machine_mode mode = GET_MODE (operands[3]);
|
||||||
|
- rtx op1 = convert_modes (SImode, mode, operands[1], true);
|
||||||
|
- rtx op3 = convert_modes (SImode, mode, operands[3], true);
|
||||||
|
- rtx difference = gen_rtx_MINUS (SImode, op1, op3);
|
||||||
|
- compare = gen_reg_rtx (SImode);
|
||||||
|
- emit_insn (gen_rtx_SET (compare, difference));
|
||||||
|
- }
|
||||||
|
+ mod_s = operands[6];
|
||||||
|
+ mod_f = operands[7];
|
||||||
|
|
||||||
|
- if (word_mode != <MODE>mode)
|
||||||
|
+ /* Normally the succ memory model must be stronger than fail, but in the
|
||||||
|
+ unlikely event of fail being ACQUIRE and succ being RELEASE we need to
|
||||||
|
+ promote succ to ACQ_REL so that we don't lose the acquire semantics. */
|
||||||
|
+
|
||||||
|
+ if (is_mm_acquire (memmodel_base (INTVAL (mod_f)))
|
||||||
|
+ && is_mm_release (memmodel_base (INTVAL (mod_s))))
|
||||||
|
+ mod_s = GEN_INT (MEMMODEL_ACQ_REL);
|
||||||
|
+
|
||||||
|
+ operands[6] = mod_s;
|
||||||
|
+
|
||||||
|
+ if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
|
||||||
|
+ operands[3], operands[4],
|
||||||
|
+ operands[6]));
|
||||||
|
+ else
|
||||||
|
{
|
||||||
|
- rtx reg = gen_reg_rtx (word_mode);
|
||||||
|
- emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
|
||||||
|
- compare = reg;
|
||||||
|
+ union loongarch_gen_fn_ptrs generator;
|
||||||
|
+ generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
|
||||||
|
+ loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
|
||||||
|
+ operands[3], operands[4], operands[6]);
|
||||||
|
}
|
||||||
|
|
||||||
|
- emit_insn (gen_rtx_SET (operands[0],
|
||||||
|
- gen_rtx_EQ (SImode, compare, const0_rtx)));
|
||||||
|
+ rtx compare = operands[1];
|
||||||
|
+ if (operands[3] != const0_rtx)
|
||||||
|
+ {
|
||||||
|
+ machine_mode mode = GET_MODE (operands[3]);
|
||||||
|
+ rtx op1 = convert_modes (SImode, mode, operands[1], true);
|
||||||
|
+ rtx op3 = convert_modes (SImode, mode, operands[3], true);
|
||||||
|
+ rtx difference = gen_rtx_MINUS (SImode, op1, op3);
|
||||||
|
+ compare = gen_reg_rtx (SImode);
|
||||||
|
+ emit_insn (gen_rtx_SET (compare, difference));
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (word_mode != <MODE>mode)
|
||||||
|
+ {
|
||||||
|
+ rtx reg = gen_reg_rtx (word_mode);
|
||||||
|
+ emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
|
||||||
|
+ compare = reg;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ emit_insn (gen_rtx_SET (operands[0],
|
||||||
|
+ gen_rtx_EQ (SImode, compare, const0_rtx)));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
@@ -505,13 +581,31 @@
|
||||||
|
(match_operand:SHORT 2 "register_operand"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- union loongarch_gen_fn_ptrs generator;
|
||||||
|
- generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
|
||||||
|
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
|
||||||
|
- const0_rtx, operands[2], operands[3]);
|
||||||
|
+ if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ emit_insn (gen_atomic_exchange<mode>_short (operands[0], operands[1], operands[2], operands[3]));
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ union loongarch_gen_fn_ptrs generator;
|
||||||
|
+ generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
|
||||||
|
+ loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
|
||||||
|
+ const0_rtx, operands[2], operands[3]);
|
||||||
|
+ }
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_insn "atomic_fetch_add<mode>_short"
|
||||||
|
+ [(set (match_operand:SHORT 0 "register_operand" "=&r")
|
||||||
|
+ (match_operand:SHORT 1 "memory_operand" "+ZB"))
|
||||||
|
+ (set (match_dup 1)
|
||||||
|
+ (unspec_volatile:SHORT
|
||||||
|
+ [(plus:SHORT (match_dup 1)
|
||||||
|
+ (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
|
||||||
|
+ (match_operand:SI 3 "const_int_operand")] ;; model
|
||||||
|
+ UNSPEC_SYNC_OLD_OP))]
|
||||||
|
+ "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "amadd%A3.<amo>\t%0,%z2,%1"
|
||||||
|
+ [(set (attr "length") (const_int 4))])
|
||||||
|
+
|
||||||
|
(define_expand "atomic_fetch_add<mode>"
|
||||||
|
[(set (match_operand:SHORT 0 "register_operand" "=&r")
|
||||||
|
(match_operand:SHORT 1 "memory_operand" "+ZB"))
|
||||||
|
@@ -523,10 +617,16 @@
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- union loongarch_gen_fn_ptrs generator;
|
||||||
|
- generator.fn_7 = gen_atomic_cas_value_add_7_si;
|
||||||
|
- loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
|
||||||
|
- operands[1], operands[2], operands[3]);
|
||||||
|
+ if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ emit_insn (gen_atomic_fetch_add<mode>_short (operands[0], operands[1],
|
||||||
|
+ operands[2], operands[3]));
|
||||||
|
+ else
|
||||||
|
+ {
|
||||||
|
+ union loongarch_gen_fn_ptrs generator;
|
||||||
|
+ generator.fn_7 = gen_atomic_cas_value_add_7_si;
|
||||||
|
+ loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
|
||||||
|
+ operands[1], operands[2], operands[3]);
|
||||||
|
+ }
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
140
0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
Normal file
140
0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Fri, 17 Nov 2023 16:04:45 +0800
|
||||||
|
Subject: [PATCH 034/188] LoongArch: atomic_load and atomic_store are
|
||||||
|
implemented using dbar grading.
|
||||||
|
|
||||||
|
Because the la464 memory model design allows the same address load out of order,
|
||||||
|
so in the following test example, the Load of 23 lines may be executed first over
|
||||||
|
the load of 21 lines, resulting in an error.
|
||||||
|
So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by
|
||||||
|
"dbar 0x700" when implementing _atomic_load.
|
||||||
|
|
||||||
|
1 void *
|
||||||
|
2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
|
||||||
|
3 {
|
||||||
|
4 int *intptr;
|
||||||
|
5 uintptr_t oldval = 1;
|
||||||
|
6
|
||||||
|
7 __atomic_compare_exchange_n (ptrlock, &oldval, 2, false,
|
||||||
|
8 MEMMODEL_RELAXED, MEMMODEL_RELAXED);
|
||||||
|
9
|
||||||
|
10 /* futex works on ints, not pointers.
|
||||||
|
11 But a valid work share pointer will be at least
|
||||||
|
12 8 byte aligned, so it is safe to assume the low
|
||||||
|
13 32-bits of the pointer won't contain values 1 or 2. */
|
||||||
|
14 __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
|
||||||
|
15 #if __BYTE_ORDER == __BIG_ENDIAN
|
||||||
|
16 if (sizeof (*ptrlock) > sizeof (int))
|
||||||
|
17 intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
|
||||||
|
18 #endif
|
||||||
|
19 do
|
||||||
|
20 do_wait (intptr, 2);
|
||||||
|
21 while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2);
|
||||||
|
22 __asm volatile ("" : : : "memory");
|
||||||
|
23 return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
|
||||||
|
24 }
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/sync.md (atomic_load<mode>): New template.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++---
|
||||||
|
1 file changed, 65 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index 1eabaec04..f4673c856 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -30,6 +30,7 @@
|
||||||
|
UNSPEC_SYNC_OLD_OP
|
||||||
|
UNSPEC_SYNC_EXCHANGE
|
||||||
|
UNSPEC_ATOMIC_STORE
|
||||||
|
+ UNSPEC_ATOMIC_LOAD
|
||||||
|
UNSPEC_MEMORY_BARRIER
|
||||||
|
])
|
||||||
|
|
||||||
|
@@ -103,16 +104,75 @@
|
||||||
|
|
||||||
|
;; Atomic memory operations.
|
||||||
|
|
||||||
|
+(define_insn "atomic_load<mode>"
|
||||||
|
+ [(set (match_operand:QHWD 0 "register_operand" "=r")
|
||||||
|
+ (unspec_volatile:QHWD
|
||||||
|
+ [(match_operand:QHWD 1 "memory_operand" "+m")
|
||||||
|
+ (match_operand:SI 2 "const_int_operand")] ;; model
|
||||||
|
+ UNSPEC_ATOMIC_LOAD))]
|
||||||
|
+ ""
|
||||||
|
+{
|
||||||
|
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
|
||||||
|
+
|
||||||
|
+ switch (model)
|
||||||
|
+ {
|
||||||
|
+ case MEMMODEL_SEQ_CST:
|
||||||
|
+ return "dbar\t0x11\\n\\t"
|
||||||
|
+ "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ "dbar\t0x14\\n\\t";
|
||||||
|
+ case MEMMODEL_ACQUIRE:
|
||||||
|
+ return "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ "dbar\t0x14\\n\\t";
|
||||||
|
+ case MEMMODEL_RELAXED:
|
||||||
|
+ return "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ "dbar\t0x700\\n\\t";
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||||
|
+ __ATOMIC_CONSUME and __ATOMIC_ACQUIRE.
|
||||||
|
+ The expand_builtin_atomic_store function converts all invalid memmodels
|
||||||
|
+ to MEMMODEL_SEQ_CST.
|
||||||
|
+
|
||||||
|
+ __atomic builtins doc: "Consume is implemented using the
|
||||||
|
+ stronger acquire memory order because of a deficiency in C++11's
|
||||||
|
+ semantics." See PR 59448 and get_memmodel in builtins.cc. */
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+ [(set (attr "length") (const_int 12))])
|
||||||
|
+
|
||||||
|
;; Implement atomic stores with amoswap. Fall back to fences for atomic loads.
|
||||||
|
(define_insn "atomic_store<mode>"
|
||||||
|
- [(set (match_operand:GPR 0 "memory_operand" "+ZB")
|
||||||
|
- (unspec_volatile:GPR
|
||||||
|
- [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
|
||||||
|
+ [(set (match_operand:QHWD 0 "memory_operand" "+m")
|
||||||
|
+ (unspec_volatile:QHWD
|
||||||
|
+ [(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
|
||||||
|
(match_operand:SI 2 "const_int_operand")] ;; model
|
||||||
|
UNSPEC_ATOMIC_STORE))]
|
||||||
|
""
|
||||||
|
- "amswap%A2.<amo>\t$zero,%z1,%0"
|
||||||
|
- [(set (attr "length") (const_int 8))])
|
||||||
|
+{
|
||||||
|
+ enum memmodel model = memmodel_base (INTVAL (operands[2]));
|
||||||
|
+
|
||||||
|
+ switch (model)
|
||||||
|
+ {
|
||||||
|
+ case MEMMODEL_SEQ_CST:
|
||||||
|
+ return "dbar\t0x12\\n\\t"
|
||||||
|
+ "st.<size>\t%z1,%0\\n\\t"
|
||||||
|
+ "dbar\t0x18\\n\\t";
|
||||||
|
+ case MEMMODEL_RELEASE:
|
||||||
|
+ return "dbar\t0x12\\n\\t"
|
||||||
|
+ "st.<size>\t%z1,%0\\n\\t";
|
||||||
|
+ case MEMMODEL_RELAXED:
|
||||||
|
+ return "st.<size>\t%z1,%0";
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||||
|
+ and __ATOMIC_RELEASE.
|
||||||
|
+ The expand_builtin_atomic_store function converts all invalid memmodels
|
||||||
|
+ to MEMMODEL_SEQ_CST. */
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+ [(set (attr "length") (const_int 12))])
|
||||||
|
|
||||||
|
(define_insn "atomic_<atomic_optab><mode>"
|
||||||
|
[(set (match_operand:GPR 0 "memory_operand" "+ZB")
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
615
0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
Normal file
615
0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
Normal file
@ -0,0 +1,615 @@
|
|||||||
|
From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 25 Oct 2024 02:08:03 +0000
|
||||||
|
Subject: [PATCH 035/188] LoongArch: genopts: Add infrastructure to generate
|
||||||
|
code for new features in ISA evolution
|
||||||
|
|
||||||
|
LoongArch v1.10 introduced the concept of ISA evolution. During ISA
|
||||||
|
evolution, many independent features can be added and enumerated via
|
||||||
|
CPUCFG.
|
||||||
|
|
||||||
|
Add a data file into genopts storing the CPUCFG word, bit, the name
|
||||||
|
of the command line option controlling if this feature should be used
|
||||||
|
for compilation, and the text description. Make genstr.sh process these
|
||||||
|
info and add the command line options into loongarch.opt and
|
||||||
|
loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for
|
||||||
|
mapping CPUCFG output to the corresponding option. When handling
|
||||||
|
-march=native, use the information in loongarch-cpucfg-map.h to generate
|
||||||
|
the corresponding option mask. Enable the features implied by -march
|
||||||
|
setting unless the user has explicitly disabled the feature.
|
||||||
|
|
||||||
|
The added options (-mdiv32 and -mld-seq-sa) are not really handled yet.
|
||||||
|
They'll be used in the following patches.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/isa-evolution.in: New data file.
|
||||||
|
* config/loongarch/genopts/genstr.sh: Translate info in
|
||||||
|
isa-evolution.in when generating loongarch-str.h, loongarch.opt,
|
||||||
|
and loongarch-cpucfg-map.h.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in (isa_evolution):
|
||||||
|
New variable.
|
||||||
|
* config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New
|
||||||
|
rule.
|
||||||
|
(loongarch-str.h): Depend on isa-evolution.in.
|
||||||
|
(loongarch.opt): Depend on isa-evolution.in.
|
||||||
|
(loongarch-cpu.o): Depend on loongarch-cpucfg-map.h.
|
||||||
|
* config/loongarch/loongarch-str.h: Regenerate.
|
||||||
|
* config/loongarch/loongarch-def.h (loongarch_isa): Add field
|
||||||
|
for evolution features. Add helper function to enable features
|
||||||
|
in this field.
|
||||||
|
Probe native CPU capability and save the corresponding options
|
||||||
|
into preset.
|
||||||
|
* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config):
|
||||||
|
Probe native CPU capability and save the corresponding options
|
||||||
|
into preset.
|
||||||
|
(cache_cpucfg): Simplify with C++11-style for loop.
|
||||||
|
(cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ...
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_option_override_internal): Enable the ISA evolution
|
||||||
|
feature options implied by -march and not explicitly disabled.
|
||||||
|
(loongarch_asm_code_end): New function, print ISA information as
|
||||||
|
comments in the assembly if -fverbose-asm. It makes easier to
|
||||||
|
debug things like -march=native.
|
||||||
|
(TARGET_ASM_CODE_END): Define.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* config/loongarch/loongarch-cpucfg-map.h: Generate.
|
||||||
|
(cpucfg_useful_idx, N_CPUCFG_WORDS) ... here.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/genopts/genstr.sh | 92 ++++++++++++++++++-
|
||||||
|
gcc/config/loongarch/genopts/isa-evolution.in | 2 +
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 7 ++
|
||||||
|
gcc/config/loongarch/loongarch-cpu.cc | 46 +++++-----
|
||||||
|
gcc/config/loongarch/loongarch-cpucfg-map.h | 48 ++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 7 ++
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 6 +-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 31 +++++++
|
||||||
|
gcc/config/loongarch/loongarch.opt | 20 +++-
|
||||||
|
gcc/config/loongarch/t-loongarch | 21 ++++-
|
||||||
|
10 files changed, 244 insertions(+), 36 deletions(-)
|
||||||
|
create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
|
||||||
|
index 972ef125f..bcc616e98 100755
|
||||||
|
--- a/gcc/config/loongarch/genopts/genstr.sh
|
||||||
|
+++ b/gcc/config/loongarch/genopts/genstr.sh
|
||||||
|
@@ -25,8 +25,8 @@ cd "$(dirname "$0")"
|
||||||
|
# Generate a header containing definitions from the string table.
|
||||||
|
gen_defines() {
|
||||||
|
cat <<EOF
|
||||||
|
-/* Generated automatically by "genstr" from "loongarch-strings".
|
||||||
|
- Please do not edit this file directly.
|
||||||
|
+/* Generated automatically by "genstr" from "loongarch-strings" and
|
||||||
|
+ "isa-evolution.in". Please do not edit this file directly.
|
||||||
|
|
||||||
|
Copyright (C) 2021-2022 Free Software Foundation, Inc.
|
||||||
|
Contributed by Loongson Ltd.
|
||||||
|
@@ -56,6 +56,15 @@ EOF
|
||||||
|
loongarch-strings
|
||||||
|
|
||||||
|
echo
|
||||||
|
+
|
||||||
|
+ # Generate the strings from isa-evolution.in.
|
||||||
|
+ awk '{
|
||||||
|
+ a=$3
|
||||||
|
+ gsub(/-/, "_", a)
|
||||||
|
+ print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
|
||||||
|
+ }' isa-evolution.in
|
||||||
|
+
|
||||||
|
+ echo
|
||||||
|
echo "#endif /* LOONGARCH_STR_H */"
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -77,11 +86,12 @@ gen_options() {
|
||||||
|
# print a header
|
||||||
|
cat << EOF
|
||||||
|
; Generated by "genstr" from the template "loongarch.opt.in"
|
||||||
|
-; and definitions from "loongarch-strings".
|
||||||
|
+; and definitions from "loongarch-strings" and "isa-evolution.in".
|
||||||
|
;
|
||||||
|
; Please do not edit this file directly.
|
||||||
|
; It will be automatically updated during a gcc build
|
||||||
|
-; if you change "loongarch.opt.in" or "loongarch-strings".
|
||||||
|
+; if you change "loongarch.opt.in", "loongarch-strings", or
|
||||||
|
+; "isa-evolution.in".
|
||||||
|
;
|
||||||
|
EOF
|
||||||
|
|
||||||
|
@@ -91,13 +101,85 @@ EOF
|
||||||
|
eval "echo \"$line\""
|
||||||
|
done
|
||||||
|
}
|
||||||
|
+
|
||||||
|
+ # Generate the strings from isa-evolution.in.
|
||||||
|
+ awk '{
|
||||||
|
+ print("")
|
||||||
|
+ print("m"$3)
|
||||||
|
+ gsub(/-/, "_", $3)
|
||||||
|
+ print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)")
|
||||||
|
+ $1=""; $2=""; $3=""
|
||||||
|
+ sub(/^ */, "", $0)
|
||||||
|
+ print($0)
|
||||||
|
+ }' isa-evolution.in
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+gen_cpucfg_map() {
|
||||||
|
+ cat <<EOF
|
||||||
|
+/* Generated automatically by "genstr" from "isa-evolution.in".
|
||||||
|
+ Please do not edit this file directly.
|
||||||
|
+
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#ifndef LOONGARCH_CPUCFG_MAP_H
|
||||||
|
+#define LOONGARCH_CPUCFG_MAP_H
|
||||||
|
+
|
||||||
|
+#include "options.h"
|
||||||
|
+
|
||||||
|
+static constexpr struct {
|
||||||
|
+ int cpucfg_word;
|
||||||
|
+ unsigned int cpucfg_bit;
|
||||||
|
+ HOST_WIDE_INT isa_evolution_bit;
|
||||||
|
+} cpucfg_map[] = {
|
||||||
|
+EOF
|
||||||
|
+
|
||||||
|
+ # Generate the strings from isa-evolution.in.
|
||||||
|
+ awk '{
|
||||||
|
+ gsub(/-/, "_", $3)
|
||||||
|
+ print(" { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },")
|
||||||
|
+ }' isa-evolution.in
|
||||||
|
+
|
||||||
|
+ echo "};"
|
||||||
|
+ echo
|
||||||
|
+ echo "static constexpr int cpucfg_useful_idx[] = {"
|
||||||
|
+
|
||||||
|
+ awk 'BEGIN { print(" 0,\n 1,\n 2,\n 16,\n 17,\n 18,\n 19,") }
|
||||||
|
+ {if ($1+0 > max+0) max=$1; print(" "$1",")}' \
|
||||||
|
+ isa-evolution.in | sort -n | uniq
|
||||||
|
+
|
||||||
|
+ echo "};"
|
||||||
|
+ echo ""
|
||||||
|
+
|
||||||
|
+ awk 'BEGIN { max=19 }
|
||||||
|
+ { if ($1+0 > max+0) max=$1 }
|
||||||
|
+ END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \
|
||||||
|
+ isa-evolution.in
|
||||||
|
+
|
||||||
|
+ echo "#endif /* LOONGARCH_CPUCFG_MAP_H */"
|
||||||
|
}
|
||||||
|
|
||||||
|
main() {
|
||||||
|
case "$1" in
|
||||||
|
+ cpucfg-map) gen_cpucfg_map;;
|
||||||
|
header) gen_defines;;
|
||||||
|
opt) gen_options;;
|
||||||
|
- *) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;;
|
||||||
|
+ *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..e58f0d6a1
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
@@ -0,0 +1,2 @@
|
||||||
|
+2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
+3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700).
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index bd3cfaf60..a49de07c9 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -247,3 +247,10 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ
|
||||||
|
Indicate how many non memory access vector instructions can be issued per
|
||||||
|
cycle, it's used in unroll factor determination for autovectorizer. The
|
||||||
|
default value is 4.
|
||||||
|
+
|
||||||
|
+; Features added during ISA evolution. This concept is different from ISA
|
||||||
|
+; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the
|
||||||
|
+; explanation. These features may be implemented and enumerated with
|
||||||
|
+; CPUCFG independantly, so we use bit flags to specify them.
|
||||||
|
+Variable
|
||||||
|
+HOST_WIDE_INT isa_evolution = 0
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
index cbe52d7ed..e1cd85d02 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
@@ -29,12 +29,11 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
#include "loongarch-opts.h"
|
||||||
|
#include "loongarch-cpu.h"
|
||||||
|
+#include "loongarch-cpucfg-map.h"
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
|
||||||
|
/* Native CPU detection with "cpucfg" */
|
||||||
|
-#define N_CPUCFG_WORDS 0x15
|
||||||
|
static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
|
||||||
|
-static const int cpucfg_useful_idx[] = {0, 1, 2, 16, 17, 18, 19};
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
read_cpucfg_word (int wordno)
|
||||||
|
@@ -56,11 +55,8 @@ read_cpucfg_word (int wordno)
|
||||||
|
void
|
||||||
|
cache_cpucfg (void)
|
||||||
|
{
|
||||||
|
- for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++)
|
||||||
|
- {
|
||||||
|
- cpucfg_cache[cpucfg_useful_idx[i]]
|
||||||
|
- = read_cpucfg_word (cpucfg_useful_idx[i]);
|
||||||
|
- }
|
||||||
|
+ for (int idx: cpucfg_useful_idx)
|
||||||
|
+ cpucfg_cache[idx] = read_cpucfg_word (idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t
|
||||||
|
@@ -125,11 +121,12 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
int tmp;
|
||||||
|
tgt->cpu_arch = native_cpu_type;
|
||||||
|
|
||||||
|
+ auto &preset = loongarch_cpu_default_isa[tgt->cpu_arch];
|
||||||
|
+
|
||||||
|
/* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].base
|
||||||
|
With: base architecture (ARCH)
|
||||||
|
At: cpucfg_words[1][1:0] */
|
||||||
|
|
||||||
|
- #define PRESET_ARCH (loongarch_cpu_default_isa[tgt->cpu_arch].base)
|
||||||
|
switch (cpucfg_cache[1] & 0x3)
|
||||||
|
{
|
||||||
|
case 0x02:
|
||||||
|
@@ -144,19 +141,18 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check consistency with PRID presets. */
|
||||||
|
- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH)
|
||||||
|
+ if (native_cpu_type != CPU_NATIVE && tmp != preset.base)
|
||||||
|
warning (0, "base architecture %qs differs from PRID preset %qs",
|
||||||
|
loongarch_isa_base_strings[tmp],
|
||||||
|
- loongarch_isa_base_strings[PRESET_ARCH]);
|
||||||
|
+ loongarch_isa_base_strings[preset.base]);
|
||||||
|
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
- PRESET_ARCH = tmp;
|
||||||
|
+ preset.base = tmp;
|
||||||
|
|
||||||
|
/* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].fpu
|
||||||
|
With: FPU type (FP, FP_SP, FP_DP)
|
||||||
|
At: cpucfg_words[2][2:0] */
|
||||||
|
|
||||||
|
- #define PRESET_FPU (loongarch_cpu_default_isa[tgt->cpu_arch].fpu)
|
||||||
|
switch (cpucfg_cache[2] & 0x7)
|
||||||
|
{
|
||||||
|
case 0x07:
|
||||||
|
@@ -179,20 +175,19 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check consistency with PRID presets. */
|
||||||
|
- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU)
|
||||||
|
+ if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu)
|
||||||
|
warning (0, "floating-point unit %qs differs from PRID preset %qs",
|
||||||
|
loongarch_isa_ext_strings[tmp],
|
||||||
|
- loongarch_isa_ext_strings[PRESET_FPU]);
|
||||||
|
+ loongarch_isa_ext_strings[preset.fpu]);
|
||||||
|
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
- PRESET_FPU = tmp;
|
||||||
|
+ preset.fpu = tmp;
|
||||||
|
|
||||||
|
|
||||||
|
/* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd
|
||||||
|
With: SIMD extension type (LSX, LASX)
|
||||||
|
At: cpucfg_words[2][7:6] */
|
||||||
|
|
||||||
|
- #define PRESET_SIMD (loongarch_cpu_default_isa[tgt->cpu_arch].simd)
|
||||||
|
switch (cpucfg_cache[2] & 0xc0)
|
||||||
|
{
|
||||||
|
case 0xc0:
|
||||||
|
@@ -219,14 +214,19 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
/* Check consistency with PRID presets. */
|
||||||
|
|
||||||
|
/*
|
||||||
|
- if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD)
|
||||||
|
+ if (native_cpu_type != CPU_NATIVE && tmp != preset.simd)
|
||||||
|
warning (0, "SIMD extension %qs differs from PRID preset %qs",
|
||||||
|
loongarch_isa_ext_strings[tmp],
|
||||||
|
- loongarch_isa_ext_strings[PRESET_SIMD]);
|
||||||
|
+ loongarch_isa_ext_strings[preset.simd]);
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
- PRESET_SIMD = tmp;
|
||||||
|
+ preset.simd = tmp;
|
||||||
|
+
|
||||||
|
+ /* Features added during ISA evolution. */
|
||||||
|
+ for (const auto &entry: cpucfg_map)
|
||||||
|
+ if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
|
||||||
|
+ preset.evolution |= entry.isa_evolution_bit;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tune_native_p)
|
||||||
|
@@ -237,7 +237,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
With: cache size info
|
||||||
|
At: cpucfg_words[16:20][31:0] */
|
||||||
|
|
||||||
|
- #define PRESET_CACHE (loongarch_cpu_cache[tgt->cpu_tune])
|
||||||
|
+ auto &preset_cache = loongarch_cpu_cache[tgt->cpu_tune];
|
||||||
|
struct loongarch_cache native_cache;
|
||||||
|
int l1d_present = 0, l1u_present = 0;
|
||||||
|
int l2d_present = 0;
|
||||||
|
@@ -268,8 +268,8 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
>> 10; /* in kibibytes */
|
||||||
|
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
- PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size;
|
||||||
|
- PRESET_CACHE.l1d_size = native_cache.l1d_size;
|
||||||
|
- PRESET_CACHE.l2d_size = native_cache.l2d_size;
|
||||||
|
+ preset_cache.l1d_line_size = native_cache.l1d_line_size;
|
||||||
|
+ preset_cache.l1d_size = native_cache.l1d_size;
|
||||||
|
+ preset_cache.l2d_size = native_cache.l2d_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..0c078c397
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
@@ -0,0 +1,48 @@
|
||||||
|
+/* Generated automatically by "genstr" from "isa-evolution.in".
|
||||||
|
+ Please do not edit this file directly.
|
||||||
|
+
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#ifndef LOONGARCH_CPUCFG_MAP_H
|
||||||
|
+#define LOONGARCH_CPUCFG_MAP_H
|
||||||
|
+
|
||||||
|
+#include "options.h"
|
||||||
|
+
|
||||||
|
+static constexpr struct {
|
||||||
|
+ int cpucfg_word;
|
||||||
|
+ unsigned int cpucfg_bit;
|
||||||
|
+ HOST_WIDE_INT isa_evolution_bit;
|
||||||
|
+} cpucfg_map[] = {
|
||||||
|
+ { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
|
||||||
|
+ { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static constexpr int cpucfg_useful_idx[] = {
|
||||||
|
+ 0,
|
||||||
|
+ 1,
|
||||||
|
+ 2,
|
||||||
|
+ 3,
|
||||||
|
+ 16,
|
||||||
|
+ 17,
|
||||||
|
+ 18,
|
||||||
|
+ 19,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static constexpr int N_CPUCFG_WORDS = 20;
|
||||||
|
+#endif /* LOONGARCH_CPUCFG_MAP_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index 078d8607d..cb99caebe 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef LOONGARCH_DEF_H
|
||||||
|
#define LOONGARCH_DEF_H
|
||||||
|
|
||||||
|
+#include <stdint.h>
|
||||||
|
#include "loongarch-tune.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
@@ -121,6 +122,12 @@ struct loongarch_isa
|
||||||
|
int base; /* ISA_BASE_ */
|
||||||
|
int fpu; /* ISA_EXT_FPU_ */
|
||||||
|
int simd; /* ISA_EXT_SIMD_ */
|
||||||
|
+
|
||||||
|
+ /* ISA evolution features implied by -march=, for -march=native probed
|
||||||
|
+ via CPUCFG. The features implied by base may be not included here.
|
||||||
|
+
|
||||||
|
+ Using int64_t instead of HOST_WIDE_INT for C compatibility. */
|
||||||
|
+ int64_t evolution;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_abi
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index 037e9e583..cd9dbb41b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
-/* Generated automatically by "genstr" from "loongarch-strings".
|
||||||
|
- Please do not edit this file directly.
|
||||||
|
+/* Generated automatically by "genstr" from "loongarch-strings" and
|
||||||
|
+ "isa-evolution.in". Please do not edit this file directly.
|
||||||
|
|
||||||
|
Copyright (C) 2021-2022 Free Software Foundation, Inc.
|
||||||
|
Contributed by Loongson Ltd.
|
||||||
|
@@ -69,4 +69,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_EXPLICIT_RELOCS_NONE "none"
|
||||||
|
#define STR_EXPLICIT_RELOCS_ALWAYS "always"
|
||||||
|
|
||||||
|
+#define OPTSTR_DIV32 "div32"
|
||||||
|
+#define OPTSTR_LD_SEQ_SA "ld-seq-sa"
|
||||||
|
#endif /* LOONGARCH_STR_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 7bb46a45d..8bd46da62 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -7451,6 +7451,10 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
if (loongarch_branch_cost == 0)
|
||||||
|
loongarch_branch_cost = loongarch_cost->branch_cost;
|
||||||
|
|
||||||
|
+ /* If the user hasn't disabled a feature added during ISA evolution,
|
||||||
|
+ use the processor's default. */
|
||||||
|
+ isa_evolution |= (la_target.isa.evolution &
|
||||||
|
+ ~global_options_set.x_isa_evolution);
|
||||||
|
|
||||||
|
/* Enable sw prefetching at -O3 and higher. */
|
||||||
|
if (opts->x_flag_prefetch_loop_arrays < 0
|
||||||
|
@@ -11427,6 +11431,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
|
||||||
|
is_packed);
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* If -fverbose-asm, dump some info for debugging. */
|
||||||
|
+static void
|
||||||
|
+loongarch_asm_code_end (void)
|
||||||
|
+{
|
||||||
|
+#define DUMP_FEATURE(PRED) \
|
||||||
|
+ fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \
|
||||||
|
+ (PRED) ? "enabled" : "disabled")
|
||||||
|
+
|
||||||
|
+ if (flag_verbose_asm)
|
||||||
|
+ {
|
||||||
|
+ fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START,
|
||||||
|
+ loongarch_cpu_strings [la_target.cpu_arch]);
|
||||||
|
+ fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START,
|
||||||
|
+ loongarch_cpu_strings [la_target.cpu_tune]);
|
||||||
|
+ fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
|
||||||
|
+ loongarch_isa_base_strings [la_target.isa.base]);
|
||||||
|
+ DUMP_FEATURE (TARGET_DIV32);
|
||||||
|
+ DUMP_FEATURE (TARGET_LD_SEQ_SA);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ fputs ("\n\n", asm_out_file);
|
||||||
|
+#undef DUMP_FEATURE
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Initialize the GCC target structure. */
|
||||||
|
#undef TARGET_ASM_ALIGNED_HI_OP
|
||||||
|
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
|
||||||
|
@@ -11446,6 +11474,9 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
|
||||||
|
#undef TARGET_ASM_FUNCTION_RODATA_SECTION
|
||||||
|
#define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
|
||||||
|
|
||||||
|
+#undef TARGET_ASM_CODE_END
|
||||||
|
+#define TARGET_ASM_CODE_END loongarch_asm_code_end
|
||||||
|
+
|
||||||
|
#undef TARGET_SCHED_INIT
|
||||||
|
#define TARGET_SCHED_INIT loongarch_sched_init
|
||||||
|
#undef TARGET_SCHED_REORDER
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index d936954b8..5251f705d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -1,9 +1,10 @@
|
||||||
|
; Generated by "genstr" from the template "loongarch.opt.in"
|
||||||
|
-; and definitions from "loongarch-strings".
|
||||||
|
+; and definitions from "loongarch-strings" and "isa-evolution.in".
|
||||||
|
;
|
||||||
|
; Please do not edit this file directly.
|
||||||
|
; It will be automatically updated during a gcc build
|
||||||
|
-; if you change "loongarch.opt.in" or "loongarch-strings".
|
||||||
|
+; if you change "loongarch.opt.in", "loongarch-strings", or
|
||||||
|
+; "isa-evolution.in".
|
||||||
|
;
|
||||||
|
; Copyright (C) 2021-2022 Free Software Foundation, Inc.
|
||||||
|
;
|
||||||
|
@@ -254,3 +255,18 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ
|
||||||
|
Indicate how many non memory access vector instructions can be issued per
|
||||||
|
cycle, it's used in unroll factor determination for autovectorizer. The
|
||||||
|
default value is 4.
|
||||||
|
+
|
||||||
|
+; Features added during ISA evolution. This concept is different from ISA
|
||||||
|
+; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the
|
||||||
|
+; explanation. These features may be implemented and enumerated with
|
||||||
|
+; CPUCFG independantly, so we use bit flags to specify them.
|
||||||
|
+Variable
|
||||||
|
+HOST_WIDE_INT isa_evolution = 0
|
||||||
|
+
|
||||||
|
+mdiv32
|
||||||
|
+Target Mask(ISA_DIV32) Var(isa_evolution)
|
||||||
|
+Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
+
|
||||||
|
+mld-seq-sa
|
||||||
|
+Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution)
|
||||||
|
+Do not need load-load barriers (dbar 0x700).
|
||||||
|
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
|
||||||
|
index 12734c37b..57b1176bc 100644
|
||||||
|
--- a/gcc/config/loongarch/t-loongarch
|
||||||
|
+++ b/gcc/config/loongarch/t-loongarch
|
||||||
|
@@ -18,8 +18,9 @@
|
||||||
|
|
||||||
|
|
||||||
|
GTM_H += loongarch-multilib.h
|
||||||
|
-OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
|
||||||
|
- $(srcdir)/config/loongarch/loongarch-tune.h
|
||||||
|
+OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
|
||||||
|
+ $(srcdir)/config/loongarch/loongarch-tune.h \
|
||||||
|
+ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
|
||||||
|
# Canonical target triplet from config.gcc
|
||||||
|
LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\
|
||||||
|
@@ -31,7 +32,8 @@ LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h
|
||||||
|
# String definition header
|
||||||
|
$(LA_STR_H): s-loongarch-str ; @true
|
||||||
|
s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
- $(srcdir)/config/loongarch/genopts/loongarch-strings
|
||||||
|
+ $(srcdir)/config/loongarch/genopts/loongarch-strings \
|
||||||
|
+ $(srcdir)/config/loongarch/genopts/isa-evolution.in
|
||||||
|
$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \
|
||||||
|
$(srcdir)/config/loongarch/genopts/loongarch-strings > \
|
||||||
|
tmp-loongarch-str.h
|
||||||
|
@@ -58,7 +60,8 @@ loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.cc $(LA_STR_H)
|
||||||
|
loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H)
|
||||||
|
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||||
|
|
||||||
|
-loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H)
|
||||||
|
+loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
|
||||||
|
+ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||||
|
|
||||||
|
loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
|
||||||
|
@@ -67,6 +70,7 @@ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
|
||||||
|
$(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
|
||||||
|
s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
|
||||||
|
+ $(srcdir)/config/loongarch/genopts/isa-evolution.in \
|
||||||
|
$(srcdir)/config/loongarch/genopts/loongarch-strings $(LA_STR_H)
|
||||||
|
$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \
|
||||||
|
$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
|
||||||
|
@@ -74,3 +78,12 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
$(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \
|
||||||
|
$(srcdir)/config/loongarch/loongarch.opt
|
||||||
|
$(STAMP) s-loongarch-opt
|
||||||
|
+
|
||||||
|
+$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map
|
||||||
|
+ @true
|
||||||
|
+s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
+ $(srcdir)/config/loongarch/genopts/isa-evolution.in
|
||||||
|
+ $(SHELL) $< cpucfg-map > tmp-cpucfg.h
|
||||||
|
+ $(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \
|
||||||
|
+ $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
+ $(STAMP) $@
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
148
0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
Normal file
148
0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 18 Nov 2023 03:19:07 +0800
|
||||||
|
Subject: [PATCH 036/188] LoongArch: Add evolution features of base ISA
|
||||||
|
revisions
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-def.h:
|
||||||
|
(loongarch_isa_base_features): Declare. Define it in ...
|
||||||
|
* config/loongarch/loongarch-cpu.cc
|
||||||
|
(loongarch_isa_base_features): ... here.
|
||||||
|
(fill_native_cpu_config): If we know the base ISA of the CPU
|
||||||
|
model from PRID, use it instead of la64 (v1.0). Check if all
|
||||||
|
expected features of this base ISA is available, emit a warning
|
||||||
|
if not.
|
||||||
|
* config/loongarch/loongarch-opts.cc (config_target_isa): Enable
|
||||||
|
the features implied by the base ISA if not -march=native.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-cpu.cc | 62 ++++++++++++++++++--------
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 5 +++
|
||||||
|
gcc/config/loongarch/loongarch-opts.cc | 3 ++
|
||||||
|
3 files changed, 52 insertions(+), 18 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
index e1cd85d02..76d66fa55 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "loongarch-cpucfg-map.h"
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
|
||||||
|
+/* loongarch_isa_base_features defined here instead of loongarch-def.c
|
||||||
|
+ because we need to use options.h. Pay attention on the order of elements
|
||||||
|
+ in the initializer becaue ISO C++ does not allow C99 designated
|
||||||
|
+ initializers! */
|
||||||
|
+
|
||||||
|
+#define ISA_BASE_LA64V110_FEATURES \
|
||||||
|
+ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
|
||||||
|
+
|
||||||
|
+int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
|
||||||
|
+ /* [ISA_BASE_LA64V100] = */ 0,
|
||||||
|
+ /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
/* Native CPU detection with "cpucfg" */
|
||||||
|
static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
|
||||||
|
|
||||||
|
@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
With: base architecture (ARCH)
|
||||||
|
At: cpucfg_words[1][1:0] */
|
||||||
|
|
||||||
|
- switch (cpucfg_cache[1] & 0x3)
|
||||||
|
- {
|
||||||
|
- case 0x02:
|
||||||
|
- tmp = ISA_BASE_LA64V100;
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
- default:
|
||||||
|
- fatal_error (UNKNOWN_LOCATION,
|
||||||
|
- "unknown native base architecture %<0x%x%>, "
|
||||||
|
- "%qs failed", (unsigned int) (cpucfg_cache[1] & 0x3),
|
||||||
|
- "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
|
||||||
|
- }
|
||||||
|
-
|
||||||
|
- /* Check consistency with PRID presets. */
|
||||||
|
- if (native_cpu_type != CPU_NATIVE && tmp != preset.base)
|
||||||
|
- warning (0, "base architecture %qs differs from PRID preset %qs",
|
||||||
|
- loongarch_isa_base_strings[tmp],
|
||||||
|
- loongarch_isa_base_strings[preset.base]);
|
||||||
|
+ if (native_cpu_type != CPU_NATIVE)
|
||||||
|
+ tmp = loongarch_cpu_default_isa[native_cpu_type].base;
|
||||||
|
+ else
|
||||||
|
+ switch (cpucfg_cache[1] & 0x3)
|
||||||
|
+ {
|
||||||
|
+ case 0x02:
|
||||||
|
+ tmp = ISA_BASE_LA64V100;
|
||||||
|
+ break;
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ fatal_error (UNKNOWN_LOCATION,
|
||||||
|
+ "unknown native base architecture %<0x%x%>, "
|
||||||
|
+ "%qs failed",
|
||||||
|
+ (unsigned int) (cpucfg_cache[1] & 0x3),
|
||||||
|
+ "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
|
||||||
|
+ }
|
||||||
|
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
preset.base = tmp;
|
||||||
|
@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
for (const auto &entry: cpucfg_map)
|
||||||
|
if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
|
||||||
|
preset.evolution |= entry.isa_evolution_bit;
|
||||||
|
+
|
||||||
|
+ if (native_cpu_type != CPU_NATIVE)
|
||||||
|
+ {
|
||||||
|
+ /* Check if the local CPU really supports the features of the base
|
||||||
|
+ ISA of probed native_cpu_type. If any feature is not detected,
|
||||||
|
+ either GCC or the hardware is buggy. */
|
||||||
|
+ auto base_isa_feature = loongarch_isa_base_features[preset.base];
|
||||||
|
+ if ((preset.evolution & base_isa_feature) != base_isa_feature)
|
||||||
|
+ warning (0,
|
||||||
|
+ "detected base architecture %qs, but some of its "
|
||||||
|
+ "features are not detected; the detected base "
|
||||||
|
+ "architecture may be unreliable, only detected "
|
||||||
|
+ "features will be enabled",
|
||||||
|
+ loongarch_isa_base_strings[preset.base]);
|
||||||
|
+ }
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tune_native_p)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index cb99caebe..ca0a324dd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -55,12 +55,17 @@ extern "C" {
|
||||||
|
|
||||||
|
/* enum isa_base */
|
||||||
|
extern const char* loongarch_isa_base_strings[];
|
||||||
|
+
|
||||||
|
/* LoongArch V1.00. */
|
||||||
|
#define ISA_BASE_LA64V100 0
|
||||||
|
/* LoongArch V1.10. */
|
||||||
|
#define ISA_BASE_LA64V110 1
|
||||||
|
#define N_ISA_BASE_TYPES 2
|
||||||
|
|
||||||
|
+/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is
|
||||||
|
+ we cannot use the C++ header options.h in loongarch-def.c. */
|
||||||
|
+extern int64_t loongarch_isa_base_features[];
|
||||||
|
+
|
||||||
|
/* enum isa_ext_* */
|
||||||
|
extern const char* loongarch_isa_ext_strings[];
|
||||||
|
#define ISA_EXT_NONE 0
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
index f10a9d3ff..390720479 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
@@ -284,6 +284,9 @@ config_target_isa:
|
||||||
|
/* Get default ISA from "-march" or its default value. */
|
||||||
|
t.isa = loongarch_cpu_default_isa[t.cpu_arch];
|
||||||
|
|
||||||
|
+ if (t.cpu_arch != CPU_NATIVE)
|
||||||
|
+ t.isa.evolution |= loongarch_isa_base_features[t.isa.base];
|
||||||
|
+
|
||||||
|
/* Apply incremental changes. */
|
||||||
|
/* "-march=native" overrides the default FPU type. */
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
156
0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
Normal file
156
0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Thu, 16 Nov 2023 09:21:47 +0800
|
||||||
|
Subject: [PATCH 037/188] LoongArch: Take the advantage of -mdiv32 if it's
|
||||||
|
enabled
|
||||||
|
|
||||||
|
With -mdiv32, we can assume div.w[u] and mod.w[u] works on low 32 bits
|
||||||
|
of a 64-bit GPR even if it's not sign-extended.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (DIV): New mode iterator.
|
||||||
|
(<optab:ANY_DIV><mode:GPR>3): Don't expand if TARGET_DIV32.
|
||||||
|
(<optab:ANY_DIV>di3_fake): Disable if TARGET_DIV32.
|
||||||
|
(*<optab:ANY_DIV><mode:GPR>3): Allow SImode if TARGET_DIV32.
|
||||||
|
(<optab:ANY_DIV>si3_extended): New insn if TARGET_DIV32.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/div-div32.c: New test.
|
||||||
|
* gcc.target/loongarch/div-no-div32.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 31 ++++++++++++++++---
|
||||||
|
.../gcc.target/loongarch/div-div32.c | 31 +++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/div-no-div32.c | 11 +++++++
|
||||||
|
3 files changed, 68 insertions(+), 5 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 52e40a208..c4e7af107 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -408,6 +408,10 @@
|
||||||
|
;; st.w.
|
||||||
|
(define_mode_iterator ST_ANY [QHWD ANYF])
|
||||||
|
|
||||||
|
+;; A mode for anything legal as a input of a div or mod instruction.
|
||||||
|
+(define_mode_iterator DIV [(DI "TARGET_64BIT")
|
||||||
|
+ (SI "!TARGET_64BIT || TARGET_DIV32")])
|
||||||
|
+
|
||||||
|
;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
|
||||||
|
;; 32-bit version and "mul.d" in the 64-bit version.
|
||||||
|
(define_mode_attr d [(SI "w") (DI "d")])
|
||||||
|
@@ -914,7 +918,7 @@
|
||||||
|
(match_operand:GPR 2 "register_operand")))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
|
||||||
|
+ if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !TARGET_DIV32)
|
||||||
|
{
|
||||||
|
rtx reg1 = gen_reg_rtx (DImode);
|
||||||
|
rtx reg2 = gen_reg_rtx (DImode);
|
||||||
|
@@ -934,9 +938,9 @@
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_insn "*<optab><mode>3"
|
||||||
|
- [(set (match_operand:X 0 "register_operand" "=r,&r,&r")
|
||||||
|
- (any_div:X (match_operand:X 1 "register_operand" "r,r,0")
|
||||||
|
- (match_operand:X 2 "register_operand" "r,r,r")))]
|
||||||
|
+ [(set (match_operand:DIV 0 "register_operand" "=r,&r,&r")
|
||||||
|
+ (any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0")
|
||||||
|
+ (match_operand:DIV 2 "register_operand" "r,r,r")))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
|
||||||
|
@@ -949,6 +953,23 @@
|
||||||
|
(const_string "yes")
|
||||||
|
(const_string "no")))])
|
||||||
|
|
||||||
|
+(define_insn "<optab>si3_extended"
|
||||||
|
+ [(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
|
||||||
|
+ (sign_extend
|
||||||
|
+ (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0")
|
||||||
|
+ (match_operand:SI 2 "register_operand" "r,r,r"))))]
|
||||||
|
+ "TARGET_64BIT && TARGET_DIV32"
|
||||||
|
+{
|
||||||
|
+ return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
|
||||||
|
+}
|
||||||
|
+ [(set_attr "type" "idiv")
|
||||||
|
+ (set_attr "mode" "SI")
|
||||||
|
+ (set (attr "enabled")
|
||||||
|
+ (if_then_else
|
||||||
|
+ (match_test "!!which_alternative == loongarch_check_zero_div_p()")
|
||||||
|
+ (const_string "yes")
|
||||||
|
+ (const_string "no")))])
|
||||||
|
+
|
||||||
|
(define_insn "<optab>di3_fake"
|
||||||
|
[(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
|
||||||
|
(sign_extend:DI
|
||||||
|
@@ -957,7 +978,7 @@
|
||||||
|
(any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
|
||||||
|
(match_operand:DI 2 "register_operand" "r,r,r")) 0)]
|
||||||
|
UNSPEC_FAKE_ANY_DIV)))]
|
||||||
|
- "TARGET_64BIT"
|
||||||
|
+ "TARGET_64BIT && !TARGET_DIV32"
|
||||||
|
{
|
||||||
|
return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
|
||||||
|
}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..8b1f686ec
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c
|
||||||
|
@@ -0,0 +1,31 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */
|
||||||
|
+/* { dg-final { scan-assembler "div\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler "div\.wu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "mod\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler "mod\.wu" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+divw (long a, long b)
|
||||||
|
+{
|
||||||
|
+ return (int)a / (int)b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+divwu (long a, long b)
|
||||||
|
+{
|
||||||
|
+ return (unsigned int)a / (unsigned int)b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+modw (long a, long b)
|
||||||
|
+{
|
||||||
|
+ return (int)a % (int)b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned int
|
||||||
|
+modwu (long a, long b)
|
||||||
|
+{
|
||||||
|
+ return (unsigned int)a % (unsigned int)b;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..f0f697ba5
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
|
||||||
|
+/* { dg-final { scan-assembler "div\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler "div\.wu" } } */
|
||||||
|
+/* { dg-final { scan-assembler "mod\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler "mod\.wu" } } */
|
||||||
|
+
|
||||||
|
+/* -mno-div32 should be implied by -march=loongarch64. */
|
||||||
|
+/* { dg-final { scan-assembler-times "slli\.w\[^\n\]*0" 8 } } */
|
||||||
|
+
|
||||||
|
+#include "div-div32.c"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
61
0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
Normal file
61
0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Thu, 16 Nov 2023 09:30:14 +0800
|
||||||
|
Subject: [PATCH 038/188] LoongArch: Don't emit dbar 0x700 if -mld-seq-sa
|
||||||
|
|
||||||
|
This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that
|
||||||
|
two loads on the same address won't be reordered with each other". Thus
|
||||||
|
we can omit the "load-load" barrier dbar 0x700.
|
||||||
|
|
||||||
|
This is only a micro-optimization because dbar 0x700 is already treated
|
||||||
|
as nop if the hardware supports LD_SEQ_SA.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_print_operand): Don't
|
||||||
|
print dbar 0x700 if TARGET_LD_SEQ_SA.
|
||||||
|
* config/loongarch/sync.md (atomic_load<mode>): Likewise.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 2 +-
|
||||||
|
gcc/config/loongarch/sync.md | 9 +++++----
|
||||||
|
2 files changed, 6 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 8bd46da62..c86b787c4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||||
|
if (loongarch_cas_failure_memorder_needs_acquire (
|
||||||
|
memmodel_from_int (INTVAL (op))))
|
||||||
|
fputs ("dbar\t0b10100", file);
|
||||||
|
- else
|
||||||
|
+ else if (!TARGET_LD_SEQ_SA)
|
||||||
|
fputs ("dbar\t0x700", file);
|
||||||
|
break;
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index f4673c856..65443c899 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -119,13 +119,14 @@
|
||||||
|
case MEMMODEL_SEQ_CST:
|
||||||
|
return "dbar\t0x11\\n\\t"
|
||||||
|
"ld.<size>\t%0,%1\\n\\t"
|
||||||
|
- "dbar\t0x14\\n\\t";
|
||||||
|
+ "dbar\t0x14";
|
||||||
|
case MEMMODEL_ACQUIRE:
|
||||||
|
return "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
- "dbar\t0x14\\n\\t";
|
||||||
|
+ "dbar\t0x14";
|
||||||
|
case MEMMODEL_RELAXED:
|
||||||
|
- return "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
- "dbar\t0x700\\n\\t";
|
||||||
|
+ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ : "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ "dbar\t0x700";
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
208
0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
Normal file
208
0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
Normal file
@ -0,0 +1,208 @@
|
|||||||
|
From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 25 Oct 2024 02:13:53 +0000
|
||||||
|
Subject: [PATCH 039/188] LoongArch: Add fine-grained control for LAM_BH and
|
||||||
|
LAMCAS
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas):
|
||||||
|
Add.
|
||||||
|
* config/loongarch/loongarch-str.h: Regenerate.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* config/loongarch/loongarch-cpucfg-map.h: Regenerate.
|
||||||
|
* config/loongarch/loongarch-cpu.cc
|
||||||
|
(ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH
|
||||||
|
and OPTION_MASK_ISA_LAMCAS.
|
||||||
|
* config/loongarch/sync.md (atomic_add<mode:SHORT>): Use
|
||||||
|
TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110. Remove empty
|
||||||
|
lines from assembly output.
|
||||||
|
(atomic_exchange<mode>_short): Likewise.
|
||||||
|
(atomic_exchange<mode:SHORT>): Likewise.
|
||||||
|
(atomic_fetch_add<mode>_short): Likewise.
|
||||||
|
(atomic_fetch_add<mode:SHORT>): Likewise.
|
||||||
|
(atomic_cas_value_strong<mode>_amcas): Use TARGET_LAMCAS instead
|
||||||
|
of ISA_BASE_IS_LA64V110.
|
||||||
|
(atomic_compare_and_swap<mode>): Likewise.
|
||||||
|
(atomic_compare_and_swap<mode:GPR>): Likewise.
|
||||||
|
(atomic_compare_and_swap<mode:SHORT>): Likewise.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump
|
||||||
|
status if -mlam-bh and -mlamcas if -fverbose-asm.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/genopts/isa-evolution.in | 2 ++
|
||||||
|
gcc/config/loongarch/loongarch-cpu.cc | 3 ++-
|
||||||
|
gcc/config/loongarch/loongarch-cpucfg-map.h | 2 ++
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 2 ++
|
||||||
|
gcc/config/loongarch/loongarch.cc | 2 ++
|
||||||
|
gcc/config/loongarch/loongarch.opt | 8 ++++++++
|
||||||
|
gcc/config/loongarch/sync.md | 18 +++++++++---------
|
||||||
|
7 files changed, 27 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
index e58f0d6a1..a6bc3f87f 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
@@ -1,2 +1,4 @@
|
||||||
|
2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
+2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions.
|
||||||
|
+2 28 lamcas Support amcas[_db].{b/h/w/d} instructions.
|
||||||
|
3 23 ld-seq-sa Do not need load-load barriers (dbar 0x700).
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
index 76d66fa55..bbce82c9c 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
initializers! */
|
||||||
|
|
||||||
|
#define ISA_BASE_LA64V110_FEATURES \
|
||||||
|
- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
|
||||||
|
+ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
|
||||||
|
+ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
|
||||||
|
|
||||||
|
int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
|
||||||
|
/* [ISA_BASE_LA64V100] = */ 0,
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
index 0c078c397..02ff16712 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
@@ -30,6 +30,8 @@ static constexpr struct {
|
||||||
|
HOST_WIDE_INT isa_evolution_bit;
|
||||||
|
} cpucfg_map[] = {
|
||||||
|
{ 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
|
||||||
|
+ { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
|
||||||
|
+ { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
|
||||||
|
{ 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
|
||||||
|
};
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index cd9dbb41b..0fee9abe5 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_EXPLICIT_RELOCS_ALWAYS "always"
|
||||||
|
|
||||||
|
#define OPTSTR_DIV32 "div32"
|
||||||
|
+#define OPTSTR_LAM_BH "lam-bh"
|
||||||
|
+#define OPTSTR_LAMCAS "lamcas"
|
||||||
|
#define OPTSTR_LD_SEQ_SA "ld-seq-sa"
|
||||||
|
#endif /* LOONGARCH_STR_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index c86b787c4..33d23a731 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void)
|
||||||
|
fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
|
||||||
|
loongarch_isa_base_strings [la_target.isa.base]);
|
||||||
|
DUMP_FEATURE (TARGET_DIV32);
|
||||||
|
+ DUMP_FEATURE (TARGET_LAM_BH);
|
||||||
|
+ DUMP_FEATURE (TARGET_LAMCAS);
|
||||||
|
DUMP_FEATURE (TARGET_LD_SEQ_SA);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 5251f705d..ea0d5bb4e 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -267,6 +267,14 @@ mdiv32
|
||||||
|
Target Mask(ISA_DIV32) Var(isa_evolution)
|
||||||
|
Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
|
||||||
|
+mlam-bh
|
||||||
|
+Target Mask(ISA_LAM_BH) Var(isa_evolution)
|
||||||
|
+Support am{swap/add}[_db].{b/h} instructions.
|
||||||
|
+
|
||||||
|
+mlamcas
|
||||||
|
+Target Mask(ISA_LAMCAS) Var(isa_evolution)
|
||||||
|
+Support amcas[_db].{b/h/w/d} instructions.
|
||||||
|
+
|
||||||
|
mld-seq-sa
|
||||||
|
Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution)
|
||||||
|
Do not need load-load barriers (dbar 0x700).
|
||||||
|
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
|
||||||
|
index 65443c899..a678e7131 100644
|
||||||
|
--- a/gcc/config/loongarch/sync.md
|
||||||
|
+++ b/gcc/config/loongarch/sync.md
|
||||||
|
@@ -124,7 +124,7 @@
|
||||||
|
return "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
"dbar\t0x14";
|
||||||
|
case MEMMODEL_RELAXED:
|
||||||
|
- return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
+ return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1"
|
||||||
|
: "ld.<size>\t%0,%1\\n\\t"
|
||||||
|
"dbar\t0x700";
|
||||||
|
|
||||||
|
@@ -193,7 +193,7 @@
|
||||||
|
(match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
|
||||||
|
(match_operand:SI 2 "const_int_operand")] ;; model
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
- "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "TARGET_LAM_BH"
|
||||||
|
"amadd%A2.<amo>\t$zero,%z1,%0"
|
||||||
|
[(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
@@ -230,7 +230,7 @@
|
||||||
|
UNSPEC_SYNC_EXCHANGE))
|
||||||
|
(set (match_dup 1)
|
||||||
|
(match_operand:SHORT 2 "register_operand" "r"))]
|
||||||
|
- "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "TARGET_LAM_BH"
|
||||||
|
"amswap%A3.<amo>\t%0,%z2,%1"
|
||||||
|
[(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
@@ -266,7 +266,7 @@
|
||||||
|
(match_operand:QHWD 3 "reg_or_0_operand" "rJ")
|
||||||
|
(match_operand:SI 4 "const_int_operand")] ;; mod_s
|
||||||
|
UNSPEC_COMPARE_AND_SWAP))]
|
||||||
|
- "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "TARGET_LAMCAS"
|
||||||
|
"ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
|
||||||
|
[(set (attr "length") (const_int 8))])
|
||||||
|
|
||||||
|
@@ -296,7 +296,7 @@
|
||||||
|
|
||||||
|
operands[6] = mod_s;
|
||||||
|
|
||||||
|
- if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ if (TARGET_LAMCAS)
|
||||||
|
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
|
||||||
|
operands[3], operands[4],
|
||||||
|
operands[6]));
|
||||||
|
@@ -422,7 +422,7 @@
|
||||||
|
|
||||||
|
operands[6] = mod_s;
|
||||||
|
|
||||||
|
- if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ if (TARGET_LAMCAS)
|
||||||
|
emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
|
||||||
|
operands[3], operands[4],
|
||||||
|
operands[6]));
|
||||||
|
@@ -642,7 +642,7 @@
|
||||||
|
(match_operand:SHORT 2 "register_operand"))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ if (TARGET_LAM_BH)
|
||||||
|
emit_insn (gen_atomic_exchange<mode>_short (operands[0], operands[1], operands[2], operands[3]));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -663,7 +663,7 @@
|
||||||
|
(match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
|
||||||
|
(match_operand:SI 3 "const_int_operand")] ;; model
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
- "ISA_BASE_IS_LA64V110"
|
||||||
|
+ "TARGET_LAM_BH"
|
||||||
|
"amadd%A3.<amo>\t%0,%z2,%1"
|
||||||
|
[(set (attr "length") (const_int 4))])
|
||||||
|
|
||||||
|
@@ -678,7 +678,7 @@
|
||||||
|
UNSPEC_SYNC_OLD_OP))]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- if (ISA_BASE_IS_LA64V110)
|
||||||
|
+ if (TARGET_LAM_BH)
|
||||||
|
emit_insn (gen_atomic_fetch_add<mode>_short (operands[0], operands[1],
|
||||||
|
operands[2], operands[3]));
|
||||||
|
else
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,50 @@
|
|||||||
|
From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 19 Nov 2023 01:41:12 +0800
|
||||||
|
Subject: [PATCH 040/188] LoongArch: Fix "-mexplict-relocs=none
|
||||||
|
-mcmodel=medium" producing %call36 when the assembler does not support it
|
||||||
|
|
||||||
|
Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still
|
||||||
|
return false when -mexplict-relocs=none -mcmodel=medium to make
|
||||||
|
loongarch_legitimize_call_address emit la.local or la.global.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/predicates.md (const_call_insn_operand):
|
||||||
|
Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions. Change "1" to
|
||||||
|
"true" to make the coding style consistent.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/predicates.md | 6 ++----
|
||||||
|
1 file changed, 2 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 2aae87db4..30a0dee9f 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -444,21 +444,19 @@
|
||||||
|
case SYMBOL_PCREL:
|
||||||
|
if (TARGET_CMODEL_EXTREME
|
||||||
|
|| (TARGET_CMODEL_MEDIUM
|
||||||
|
- && HAVE_AS_SUPPORT_CALL36
|
||||||
|
&& (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
- return 1;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
case SYMBOL_GOT_DISP:
|
||||||
|
if (TARGET_CMODEL_EXTREME
|
||||||
|
|| !flag_plt
|
||||||
|
|| (flag_plt && TARGET_CMODEL_MEDIUM
|
||||||
|
- && HAVE_AS_SUPPORT_CALL36
|
||||||
|
&& (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
|
||||||
|
return false;
|
||||||
|
else
|
||||||
|
- return 1;
|
||||||
|
+ return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
43
0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
Normal file
43
0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Sat, 18 Nov 2023 11:04:42 +0800
|
||||||
|
Subject: [PATCH 041/188] LoongArch: Modify MUSL_DYNAMIC_LINKER.
|
||||||
|
|
||||||
|
Use no suffix at all in the musl dynamic linker name for hard
|
||||||
|
float ABI. Use -sf and -sp suffixes in musl dynamic linker name
|
||||||
|
for soft float and single precision ABIs. The following table
|
||||||
|
outlines the musl interpreter names for the LoongArch64 ABI names.
|
||||||
|
|
||||||
|
musl interpreter | LoongArch64 ABI
|
||||||
|
--------------------------- | -----------------
|
||||||
|
ld-musl-loongarch64.so.1 | loongarch64-lp64d
|
||||||
|
ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f
|
||||||
|
ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/gnu-user.h | 6 +++---
|
||||||
|
1 file changed, 3 insertions(+), 3 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
|
||||||
|
index 60ef75601..9fc49dc8f 100644
|
||||||
|
--- a/gcc/config/loongarch/gnu-user.h
|
||||||
|
+++ b/gcc/config/loongarch/gnu-user.h
|
||||||
|
@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
"/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
|
||||||
|
|
||||||
|
#define MUSL_ABI_SPEC \
|
||||||
|
- "%{mabi=lp64d:-lp64d}" \
|
||||||
|
- "%{mabi=lp64f:-lp64f}" \
|
||||||
|
- "%{mabi=lp64s:-lp64s}"
|
||||||
|
+ "%{mabi=lp64d:}" \
|
||||||
|
+ "%{mabi=lp64f:-sp}" \
|
||||||
|
+ "%{mabi=lp64s:-sf}"
|
||||||
|
|
||||||
|
#undef MUSL_DYNAMIC_LINKER
|
||||||
|
#define MUSL_DYNAMIC_LINKER \
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,85 @@
|
|||||||
|
From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 21 Nov 2023 09:09:25 +0800
|
||||||
|
Subject: [PATCH 042/188] LoongArch: Fix libgcc build failure when libc is not
|
||||||
|
available
|
||||||
|
|
||||||
|
To use int64_t we included <stdint.h> in loongarch-def.h.
|
||||||
|
Unfortunately, loongarch-def.h is also used by libgcc etc., causing a
|
||||||
|
build failure when building a "stage1" cross compiler at which the
|
||||||
|
target libc is not built yet.
|
||||||
|
|
||||||
|
As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's
|
||||||
|
not directly or indirectly referred by the target libraries. So
|
||||||
|
guard everything requiring stdint.h with #if then they'll not block
|
||||||
|
target libraries.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-def.h (stdint.h): Guard with #if to
|
||||||
|
exclude it for target libraries.
|
||||||
|
(loongarch_isa_base_features): Likewise.
|
||||||
|
(loongarch_isa): Likewise.
|
||||||
|
(loongarch_abi): Likewise.
|
||||||
|
(loongarch_target): Likewise.
|
||||||
|
(loongarch_cpu_default_isa): Likewise.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 10 +++++++++-
|
||||||
|
1 file changed, 9 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index ca0a324dd..ef848f606 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef LOONGARCH_DEF_H
|
||||||
|
#define LOONGARCH_DEF_H
|
||||||
|
|
||||||
|
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
#include <stdint.h>
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#include "loongarch-tune.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings[];
|
||||||
|
#define ISA_BASE_LA64V110 1
|
||||||
|
#define N_ISA_BASE_TYPES 2
|
||||||
|
|
||||||
|
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is
|
||||||
|
we cannot use the C++ header options.h in loongarch-def.c. */
|
||||||
|
extern int64_t loongarch_isa_base_features[];
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* enum isa_ext_* */
|
||||||
|
extern const char* loongarch_isa_ext_strings[];
|
||||||
|
@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings[];
|
||||||
|
#define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET)
|
||||||
|
|
||||||
|
|
||||||
|
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
/* Internal representation of the target. */
|
||||||
|
struct loongarch_isa
|
||||||
|
{
|
||||||
|
@@ -150,6 +156,9 @@ struct loongarch_target
|
||||||
|
int cmodel; /* CMODEL_ */
|
||||||
|
};
|
||||||
|
|
||||||
|
+extern struct loongarch_isa loongarch_cpu_default_isa[];
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* CPU properties. */
|
||||||
|
/* index */
|
||||||
|
#define CPU_NATIVE 0
|
||||||
|
@@ -162,7 +171,6 @@ struct loongarch_target
|
||||||
|
|
||||||
|
/* parallel tables. */
|
||||||
|
extern const char* loongarch_cpu_strings[];
|
||||||
|
-extern struct loongarch_isa loongarch_cpu_default_isa[];
|
||||||
|
extern int loongarch_cpu_issue_rate[];
|
||||||
|
extern int loongarch_cpu_multipass_dfa_lookahead[];
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
148
0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
Normal file
148
0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 19 Nov 2023 06:12:22 +0800
|
||||||
|
Subject: [PATCH 043/188] LoongArch: Optimize LSX vector shuffle on
|
||||||
|
floating-point vector
|
||||||
|
MIME-Version: 1.0
|
||||||
|
Content-Type: text/plain; charset=UTF-8
|
||||||
|
Content-Transfer-Encoding: 8bit
|
||||||
|
|
||||||
|
The vec_perm expander was wrongly defined. GCC internal says:
|
||||||
|
|
||||||
|
Operand 3 is the “selector”. It is an integral mode vector of the same
|
||||||
|
width and number of elements as mode M.
|
||||||
|
|
||||||
|
But we made operand 3 in the same mode as the shuffled vectors, so it
|
||||||
|
would be a FP mode vector if the shuffled vectors are FP mode.
|
||||||
|
|
||||||
|
With this mistake, the generic code manages to work around and it ends
|
||||||
|
up creating some very nasty code for a simple __builtin_shuffle (a, b,
|
||||||
|
c) where a and b are V4SF, c is V4SI:
|
||||||
|
|
||||||
|
la.local $r12,.LANCHOR0
|
||||||
|
la.local $r13,.LANCHOR1
|
||||||
|
vld $vr1,$r12,48
|
||||||
|
vslli.w $vr1,$vr1,2
|
||||||
|
vld $vr2,$r12,16
|
||||||
|
vld $vr0,$r13,0
|
||||||
|
vld $vr3,$r13,16
|
||||||
|
vshuf.b $vr0,$vr1,$vr1,$vr0
|
||||||
|
vld $vr1,$r12,32
|
||||||
|
vadd.b $vr0,$vr0,$vr3
|
||||||
|
vandi.b $vr0,$vr0,31
|
||||||
|
vshuf.b $vr0,$vr1,$vr2,$vr0
|
||||||
|
vst $vr0,$r12,0
|
||||||
|
jr $r1
|
||||||
|
|
||||||
|
This is obviously stupid. Fix the expander definition and adjust
|
||||||
|
loongarch_expand_vec_perm to handle it correctly.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the
|
||||||
|
selector VIMODE.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_expand_vec_perm):
|
||||||
|
Use the mode of the selector (instead of the shuffled vector)
|
||||||
|
for truncating it. Operate on subregs in the selector mode if
|
||||||
|
the shuffled vector has a different mode (i. e. it's a
|
||||||
|
floating-point vector).
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-shuf-fp.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 18 ++++++++++--------
|
||||||
|
gcc/config/loongarch/lsx.md | 2 +-
|
||||||
|
.../gcc.target/loongarch/vect-shuf-fp.c | 16 ++++++++++++++++
|
||||||
|
3 files changed, 27 insertions(+), 9 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 33d23a731..d95ac68e8 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -8603,8 +8603,9 @@ void
|
||||||
|
loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||||||
|
{
|
||||||
|
machine_mode vmode = GET_MODE (target);
|
||||||
|
+ machine_mode vimode = GET_MODE (sel);
|
||||||
|
auto nelt = GET_MODE_NUNITS (vmode);
|
||||||
|
- auto round_reg = gen_reg_rtx (vmode);
|
||||||
|
+ auto round_reg = gen_reg_rtx (vimode);
|
||||||
|
rtx round_data[MAX_VECT_LEN];
|
||||||
|
|
||||||
|
for (int i = 0; i < nelt; i += 1)
|
||||||
|
@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||||||
|
round_data[i] = GEN_INT (0x1f);
|
||||||
|
}
|
||||||
|
|
||||||
|
- rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
|
||||||
|
+ rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
|
||||||
|
emit_move_insn (round_reg, round_data_rtx);
|
||||||
|
|
||||||
|
+ if (vmode != vimode)
|
||||||
|
+ {
|
||||||
|
+ target = lowpart_subreg (vimode, target, vmode);
|
||||||
|
+ op0 = lowpart_subreg (vimode, op0, vmode);
|
||||||
|
+ op1 = lowpart_subreg (vimode, op1, vmode);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
switch (vmode)
|
||||||
|
{
|
||||||
|
case E_V16QImode:
|
||||||
|
@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||||||
|
emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
|
||||||
|
break;
|
||||||
|
case E_V2DFmode:
|
||||||
|
- emit_insn (gen_andv2di3 (sel, sel, round_reg));
|
||||||
|
- emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
|
||||||
|
- break;
|
||||||
|
case E_V2DImode:
|
||||||
|
emit_insn (gen_andv2di3 (sel, sel, round_reg));
|
||||||
|
emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
|
||||||
|
break;
|
||||||
|
case E_V4SFmode:
|
||||||
|
- emit_insn (gen_andv4si3 (sel, sel, round_reg));
|
||||||
|
- emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
|
||||||
|
- break;
|
||||||
|
case E_V4SImode:
|
||||||
|
emit_insn (gen_andv4si3 (sel, sel, round_reg));
|
||||||
|
emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 8ea41c85b..5e8d8d74b 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -837,7 +837,7 @@
|
||||||
|
[(match_operand:LSX 0 "register_operand")
|
||||||
|
(match_operand:LSX 1 "register_operand")
|
||||||
|
(match_operand:LSX 2 "register_operand")
|
||||||
|
- (match_operand:LSX 3 "register_operand")]
|
||||||
|
+ (match_operand:<VIMODE> 3 "register_operand")]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
{
|
||||||
|
loongarch_expand_vec_perm (operands[0], operands[1],
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..7acc2113a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
|
||||||
|
@@ -0,0 +1,16 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mlasx -O3" } */
|
||||||
|
+/* { dg-final { scan-assembler "vshuf\.w" } } */
|
||||||
|
+
|
||||||
|
+#define V __attribute__ ((vector_size (16)))
|
||||||
|
+
|
||||||
|
+int a V;
|
||||||
|
+float b V;
|
||||||
|
+float c V;
|
||||||
|
+float d V;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test (void)
|
||||||
|
+{
|
||||||
|
+ d = __builtin_shuffle (b, c, a);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
112
0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
Normal file
112
0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001
|
||||||
|
From: Guo Jie <guojie@loongson.cn>
|
||||||
|
Date: Thu, 23 Nov 2023 11:04:17 +0800
|
||||||
|
Subject: [PATCH 044/188] LoongArch: Optimize the loading of immediate numbers
|
||||||
|
with the same high and low 32-bit values
|
||||||
|
|
||||||
|
For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
|
||||||
|
|
||||||
|
long long r = 0x0101010101010101;
|
||||||
|
|
||||||
|
Before this patch:
|
||||||
|
|
||||||
|
lu12i.w $r15,16842752>>12
|
||||||
|
ori $r15,$r15,257
|
||||||
|
lu32i.d $r15,0x1010100000000>>32
|
||||||
|
lu52i.d $r15,$r15,0x100000000000000>>52
|
||||||
|
|
||||||
|
After this patch:
|
||||||
|
|
||||||
|
lu12i.w $r15,16842752>>12
|
||||||
|
ori $r15,$r15,257
|
||||||
|
bstrins.d $r15,$r15,63,32
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(enum loongarch_load_imm_method): Add new method.
|
||||||
|
(loongarch_build_integer): Add relevant implementations for
|
||||||
|
new method.
|
||||||
|
(loongarch_move_integer): Ditto.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/imm-load1.c: Change old check.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 22 ++++++++++++++++++-
|
||||||
|
.../gcc.target/loongarch/imm-load1.c | 3 ++-
|
||||||
|
2 files changed, 23 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index d95ac68e8..048d3802b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -142,12 +142,16 @@ struct loongarch_address_info
|
||||||
|
|
||||||
|
METHOD_LU52I:
|
||||||
|
Load 52-63 bit of the immediate number.
|
||||||
|
+
|
||||||
|
+ METHOD_MIRROR:
|
||||||
|
+ Copy 0-31 bit of the immediate number to 32-63bit.
|
||||||
|
*/
|
||||||
|
enum loongarch_load_imm_method
|
||||||
|
{
|
||||||
|
METHOD_NORMAL,
|
||||||
|
METHOD_LU32I,
|
||||||
|
- METHOD_LU52I
|
||||||
|
+ METHOD_LU52I,
|
||||||
|
+ METHOD_MIRROR
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_integer_op
|
||||||
|
@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
|
||||||
|
|
||||||
|
int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
|
||||||
|
int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
|
||||||
|
+
|
||||||
|
+ uint32_t hival = (uint32_t) (value >> 32);
|
||||||
|
+ uint32_t loval = (uint32_t) value;
|
||||||
|
+
|
||||||
|
/* Determine whether the upper 32 bits are sign-extended from the lower
|
||||||
|
32 bits. If it is, the instructions to load the high order can be
|
||||||
|
ommitted. */
|
||||||
|
if (lu32i[sign31] && lu52i[sign31])
|
||||||
|
return cost;
|
||||||
|
+ /* If the lower 32 bits are the same as the upper 32 bits, just copy
|
||||||
|
+ the lower 32 bits to the upper 32 bits. */
|
||||||
|
+ else if (loval == hival)
|
||||||
|
+ {
|
||||||
|
+ codes[cost].method = METHOD_MIRROR;
|
||||||
|
+ codes[cost].curr_value = value;
|
||||||
|
+ return cost + 1;
|
||||||
|
+ }
|
||||||
|
/* Determine whether bits 32-51 are sign-extended from the lower 32
|
||||||
|
bits. If so, directly load 52-63 bits. */
|
||||||
|
else if (lu32i[sign31])
|
||||||
|
@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
|
||||||
|
gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
|
||||||
|
GEN_INT (codes[i].value));
|
||||||
|
break;
|
||||||
|
+ case METHOD_MIRROR:
|
||||||
|
+ gcc_assert (mode == DImode);
|
||||||
|
+ emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
|
||||||
|
+ break;
|
||||||
|
default:
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||||
|
index 2ff029712..f64cc2956 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mabi=lp64d -O2" } */
|
||||||
|
-/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
|
||||||
|
|
||||||
|
|
||||||
|
extern long long b[10];
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001
|
||||||
|
From: Guo Jie <guojie@loongson.cn>
|
||||||
|
Date: Thu, 23 Nov 2023 11:05:56 +0800
|
||||||
|
Subject: [PATCH 045/188] LoongArch: Fix runtime error in a gcc build with
|
||||||
|
--with-build-config=bootstrap-ubsan
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_split_plus_constant):
|
||||||
|
avoid left shift of negative value -0x8000.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 048d3802b..ecceca22d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
|
||||||
|
else if (loongarch_addu16i_imm12_operand_p (v, mode))
|
||||||
|
a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
|
||||||
|
else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
|
||||||
|
- a = (v > 0 ? 0x7fff : -0x8000) << 16;
|
||||||
|
+ a = (v > 0 ? 0x7fff0000 : ~0x7fffffff);
|
||||||
|
else
|
||||||
|
gcc_unreachable ();
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
1295
0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
Normal file
1295
0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
Normal file
File diff suppressed because it is too large
Load Diff
268
0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Normal file
268
0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Normal file
@ -0,0 +1,268 @@
|
|||||||
|
From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 19 Nov 2023 16:28:59 +0800
|
||||||
|
Subject: [PATCH 047/188] LoongArch: Use standard pattern name and RTX code for
|
||||||
|
LSX/LASX muh instructions
|
||||||
|
|
||||||
|
Removes unnecessary UNSPECs and make the muh instructions useful with
|
||||||
|
GNU vectors or auto vectorization.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/simd.md (muh): New code attribute mapping
|
||||||
|
any_extend to smul_highpart or umul_highpart.
|
||||||
|
(<su>mul<mode>3_highpart): New define_insn.
|
||||||
|
* config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove.
|
||||||
|
(UNSPEC_LSX_VMUH_U): Remove.
|
||||||
|
(lsx_vmuh_s_<lsxfmt>): Remove.
|
||||||
|
(lsx_vmuh_u_<lsxfmt>): Remove.
|
||||||
|
* config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove.
|
||||||
|
(UNSPEC_LASX_XVMUH_U): Remove.
|
||||||
|
(lasx_xvmuh_s_<lasxfmt>): Remove.
|
||||||
|
(lasx_xvmuh_u_<lasxfmt>): Remove.
|
||||||
|
* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b):
|
||||||
|
Redefine to standard pattern name.
|
||||||
|
(CODE_FOR_lsx_vmuh_h): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_w): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_d): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_bu): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_hu): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_wu): Likewise.
|
||||||
|
(CODE_FOR_lsx_vmuh_du): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_b): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_h): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_w): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_d): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_bu): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_hu): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_wu): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvmuh_du): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-muh.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 22 ------------
|
||||||
|
gcc/config/loongarch/loongarch-builtins.cc | 32 ++++++++---------
|
||||||
|
gcc/config/loongarch/lsx.md | 22 ------------
|
||||||
|
gcc/config/loongarch/simd.md | 16 +++++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++
|
||||||
|
5 files changed, 68 insertions(+), 60 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index d4a56c307..023a023b4 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -68,8 +68,6 @@
|
||||||
|
UNSPEC_LASX_BRANCH
|
||||||
|
UNSPEC_LASX_BRANCH_V
|
||||||
|
|
||||||
|
- UNSPEC_LASX_XVMUH_S
|
||||||
|
- UNSPEC_LASX_XVMUH_U
|
||||||
|
UNSPEC_LASX_MXVEXTW_U
|
||||||
|
UNSPEC_LASX_XVSLLWIL_S
|
||||||
|
UNSPEC_LASX_XVSLLWIL_U
|
||||||
|
@@ -2823,26 +2821,6 @@
|
||||||
|
[(set_attr "type" "simd_logic")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lasx_xvmuh_s_<lasxfmt>"
|
||||||
|
- [(set (match_operand:ILASX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILASX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVMUH_S))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvmuh.<lasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_insn "lasx_xvmuh_u_<lasxfmt_u>"
|
||||||
|
- [(set (match_operand:ILASX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILASX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVMUH_U))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>"
|
||||||
|
[(set (match_operand:<VDMODE256> 0 "register_operand" "=f")
|
||||||
|
(unspec:<VDMODE256> [(match_operand:ILASX_WHB 1 "register_operand" "f")
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
index fb458feac..41ea357cf 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3
|
||||||
|
#define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3
|
||||||
|
#define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3
|
||||||
|
+#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart
|
||||||
|
+#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart
|
||||||
|
#define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3
|
||||||
|
#define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3
|
||||||
|
#define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3
|
||||||
|
@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4
|
||||||
|
#define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4
|
||||||
|
|
||||||
|
-#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b
|
||||||
|
-#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h
|
||||||
|
-#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w
|
||||||
|
-#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d
|
||||||
|
-#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu
|
||||||
|
-#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu
|
||||||
|
-#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu
|
||||||
|
-#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du
|
||||||
|
#define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b
|
||||||
|
#define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h
|
||||||
|
#define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w
|
||||||
|
@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3
|
||||||
|
#define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3
|
||||||
|
#define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart
|
||||||
|
+#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart
|
||||||
|
#define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2
|
||||||
|
#define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2
|
||||||
|
#define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2
|
||||||
|
@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu
|
||||||
|
#define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu
|
||||||
|
#define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu
|
||||||
|
-#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du
|
||||||
|
#define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h
|
||||||
|
#define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w
|
||||||
|
#define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index c1c3719e3..537afaf96 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -64,8 +64,6 @@
|
||||||
|
UNSPEC_LSX_VSRLR
|
||||||
|
UNSPEC_LSX_VSRLRI
|
||||||
|
UNSPEC_LSX_VSHUF
|
||||||
|
- UNSPEC_LSX_VMUH_S
|
||||||
|
- UNSPEC_LSX_VMUH_U
|
||||||
|
UNSPEC_LSX_VEXTW_S
|
||||||
|
UNSPEC_LSX_VEXTW_U
|
||||||
|
UNSPEC_LSX_VSLLWIL_S
|
||||||
|
@@ -2506,26 +2504,6 @@
|
||||||
|
[(set_attr "type" "simd_logic")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vmuh_s_<lsxfmt>"
|
||||||
|
- [(set (match_operand:ILSX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILSX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VMUH_S))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vmuh.<lsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_insn "lsx_vmuh_u_<lsxfmt_u>"
|
||||||
|
- [(set (match_operand:ILSX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILSX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VMUH_U))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vmuh.<lsxfmt_u>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_insn "lsx_vextw_s_d"
|
||||||
|
[(set (match_operand:V2DI 0 "register_operand" "=f")
|
||||||
|
(unspec:V2DI [(match_operand:V4SI 1 "register_operand" "f")]
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index 27d1ffecd..a0e8db3c0 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -206,6 +206,22 @@
|
||||||
|
[(set_attr "type" "simd_fcvt")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; <x>vmuh.{b/h/w/d}
|
||||||
|
+
|
||||||
|
+(define_code_attr muh
|
||||||
|
+ [(sign_extend "smul_highpart")
|
||||||
|
+ (zero_extend "umul_highpart")])
|
||||||
|
+
|
||||||
|
+(define_insn "<su>mul<mode>3_highpart"
|
||||||
|
+ [(set (match_operand:IVEC 0 "register_operand" "=f")
|
||||||
|
+ (<muh>:IVEC (match_operand:IVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:IVEC 2 "register_operand" "f")))
|
||||||
|
+ (any_extend (const_int 0))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vmuh.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_int_arith")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
; The LoongArch SX Instructions.
|
||||||
|
(include "lsx.md")
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-muh.c b/gcc/testsuite/gcc.target/loongarch/vect-muh.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a788840b2
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-muh.c
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mlasx -O3" } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvmuh\.w\t" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvmuh\.wu\t" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvmuh\.w\t" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvmuh\.wu\t" } } */
|
||||||
|
+
|
||||||
|
+int a[8], b[8], c[8];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test1 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ c[i] = ((long)a[i] * (long)b[i]) >> 32;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test2 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test3 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 8; i++)
|
||||||
|
+ c[i] = ((long)a[i] * (long)b[i]) >> 32;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test4 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 8; i++)
|
||||||
|
+ c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
285
0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Normal file
285
0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
Normal file
@ -0,0 +1,285 @@
|
|||||||
|
From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 19 Nov 2023 17:28:06 +0800
|
||||||
|
Subject: [PATCH 048/188] LoongArch: Use standard pattern name and RTX code for
|
||||||
|
LSX/LASX rotate shift
|
||||||
|
|
||||||
|
Remove unnecessary UNSPECs and make the [x]vrotr[i] instructions useful
|
||||||
|
with GNU vectors and auto vectorization.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lsx.md (bitimm): Move to ...
|
||||||
|
(UNSPEC_LSX_VROTR): Remove.
|
||||||
|
(lsx_vrotr_<lsxfmt>): Remove.
|
||||||
|
(lsx_vrotri_<lsxfmt>): Remove.
|
||||||
|
* config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove.
|
||||||
|
(lsx_vrotr_<lsxfmt>): Remove.
|
||||||
|
(lsx_vrotri_<lsxfmt>): Remove.
|
||||||
|
* config/loongarch/simd.md (bitimm): ... here. Expand it to
|
||||||
|
cover LASX modes.
|
||||||
|
(vrotr<mode>3): New define_insn.
|
||||||
|
(vrotri<mode>3): New define_insn.
|
||||||
|
* config/loongarch/loongarch-builtins.cc:
|
||||||
|
(CODE_FOR_lsx_vrotr_b): Use standard pattern name.
|
||||||
|
(CODE_FOR_lsx_vrotr_h): Likewise.
|
||||||
|
(CODE_FOR_lsx_vrotr_w): Likewise.
|
||||||
|
(CODE_FOR_lsx_vrotr_d): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotr_b): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotr_h): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotr_w): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotr_d): Likewise.
|
||||||
|
(CODE_FOR_lsx_vrotri_b): Define to standard pattern name.
|
||||||
|
(CODE_FOR_lsx_vrotri_h): Likewise.
|
||||||
|
(CODE_FOR_lsx_vrotri_w): Likewise.
|
||||||
|
(CODE_FOR_lsx_vrotri_d): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotri_b): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotri_h): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotri_w): Likewise.
|
||||||
|
(CODE_FOR_lasx_xvrotri_d): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-rotr.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 22 ------------
|
||||||
|
gcc/config/loongarch/loongarch-builtins.cc | 16 +++++++++
|
||||||
|
gcc/config/loongarch/lsx.md | 28 ---------------
|
||||||
|
gcc/config/loongarch/simd.md | 29 +++++++++++++++
|
||||||
|
.../gcc.target/loongarch/vect-rotr.c | 36 +++++++++++++++++++
|
||||||
|
5 files changed, 81 insertions(+), 50 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 023a023b4..116b30c07 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -138,7 +138,6 @@
|
||||||
|
UNSPEC_LASX_XVHSUBW_Q_D
|
||||||
|
UNSPEC_LASX_XVHADDW_QU_DU
|
||||||
|
UNSPEC_LASX_XVHSUBW_QU_DU
|
||||||
|
- UNSPEC_LASX_XVROTR
|
||||||
|
UNSPEC_LASX_XVADD_Q
|
||||||
|
UNSPEC_LASX_XVSUB_Q
|
||||||
|
UNSPEC_LASX_XVREPLVE
|
||||||
|
@@ -4232,18 +4231,6 @@
|
||||||
|
[(set_attr "type" "simd_int_arith")
|
||||||
|
(set_attr "mode" "V4DI")])
|
||||||
|
|
||||||
|
-;;XVROTR.B XVROTR.H XVROTR.W XVROTR.D
|
||||||
|
-;;TODO-478
|
||||||
|
-(define_insn "lasx_xvrotr_<lasxfmt>"
|
||||||
|
- [(set (match_operand:ILASX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILASX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVROTR))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvrotr.<lasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
;;XVADD.Q
|
||||||
|
;;TODO2
|
||||||
|
(define_insn "lasx_xvadd_q"
|
||||||
|
@@ -4426,15 +4413,6 @@
|
||||||
|
[(set_attr "type" "simd_fcvt")
|
||||||
|
(set_attr "mode" "V4DI")])
|
||||||
|
|
||||||
|
-(define_insn "lasx_xvrotri_<lasxfmt>"
|
||||||
|
- [(set (match_operand:ILASX 0 "register_operand" "=f")
|
||||||
|
- (rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f")
|
||||||
|
- (match_operand 2 "const_<bitimm256>_operand" "")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvrotri.<lasxfmt>\t%u0,%u1,%2"
|
||||||
|
- [(set_attr "type" "simd_shf")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_insn "lasx_xvextl_q_d"
|
||||||
|
[(set (match_operand:V4DI 0 "register_operand" "=f")
|
||||||
|
(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
index 41ea357cf..f4523c8bf 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3
|
||||||
|
#define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3
|
||||||
|
#define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3
|
||||||
|
+#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3
|
||||||
|
+#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3
|
||||||
|
+#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3
|
||||||
|
+#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3
|
||||||
|
+#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3
|
||||||
|
+#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3
|
||||||
|
+#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3
|
||||||
|
+#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3
|
||||||
|
#define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3
|
||||||
|
#define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3
|
||||||
|
#define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3
|
||||||
|
@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
#define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3
|
||||||
|
#define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3
|
||||||
|
#define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3
|
||||||
|
+#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3
|
||||||
|
+#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3
|
||||||
|
+#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3
|
||||||
|
+#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3
|
||||||
|
+#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3
|
||||||
|
+#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3
|
||||||
|
+#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3
|
||||||
|
+#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3
|
||||||
|
#define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3
|
||||||
|
#define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3
|
||||||
|
#define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 537afaf96..232399934 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -141,7 +141,6 @@
|
||||||
|
UNSPEC_LSX_VMADDWOD
|
||||||
|
UNSPEC_LSX_VMADDWOD2
|
||||||
|
UNSPEC_LSX_VMADDWOD3
|
||||||
|
- UNSPEC_LSX_VROTR
|
||||||
|
UNSPEC_LSX_VADD_Q
|
||||||
|
UNSPEC_LSX_VSUB_Q
|
||||||
|
UNSPEC_LSX_VEXTH_Q_D
|
||||||
|
@@ -363,14 +362,6 @@
|
||||||
|
(V8HI "exp_8")
|
||||||
|
(V16QI "exp_16")])
|
||||||
|
|
||||||
|
-;; This attribute is used to form an immediate operand constraint using
|
||||||
|
-;; "const_<bitimm>_operand".
|
||||||
|
-(define_mode_attr bitimm
|
||||||
|
- [(V16QI "uimm3")
|
||||||
|
- (V8HI "uimm4")
|
||||||
|
- (V4SI "uimm5")
|
||||||
|
- (V2DI "uimm6")])
|
||||||
|
-
|
||||||
|
(define_expand "vec_init<mode><unitmode>"
|
||||||
|
[(match_operand:LSX 0 "register_operand")
|
||||||
|
(match_operand:LSX 1 "")]
|
||||||
|
@@ -4152,16 +4143,6 @@
|
||||||
|
[(set_attr "type" "simd_int_arith")
|
||||||
|
(set_attr "mode" "V2DI")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vrotr_<lsxfmt>"
|
||||||
|
- [(set (match_operand:ILSX 0 "register_operand" "=f")
|
||||||
|
- (unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:ILSX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VROTR))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vrotr.<lsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_int_arith")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_insn "lsx_vadd_q"
|
||||||
|
[(set (match_operand:V2DI 0 "register_operand" "=f")
|
||||||
|
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
|
||||||
|
@@ -4255,15 +4236,6 @@
|
||||||
|
[(set_attr "type" "simd_fcvt")
|
||||||
|
(set_attr "mode" "V2DI")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vrotri_<lsxfmt>"
|
||||||
|
- [(set (match_operand:ILSX 0 "register_operand" "=f")
|
||||||
|
- (rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f")
|
||||||
|
- (match_operand 2 "const_<bitimm>_operand" "")))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vrotri.<lsxfmt>\t%w0,%w1,%2"
|
||||||
|
- [(set_attr "type" "simd_shf")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_insn "lsx_vextl_q_d"
|
||||||
|
[(set (match_operand:V2DI 0 "register_operand" "=f")
|
||||||
|
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")]
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index a0e8db3c0..4ecf7a55e 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -91,6 +91,13 @@
|
||||||
|
(V8HI "16") (V16HI "16")
|
||||||
|
(V16QI "8") (V32QI "8")])
|
||||||
|
|
||||||
|
+;; This attribute is used to form an immediate operand constraint using
|
||||||
|
+;; "const_<bitimm>_operand".
|
||||||
|
+(define_mode_attr bitimm [(V16QI "uimm3") (V32QI "uimm3")
|
||||||
|
+ (V8HI "uimm4") (V16HI "uimm4")
|
||||||
|
+ (V4SI "uimm5") (V8SI "uimm5")
|
||||||
|
+ (V2DI "uimm6") (V4DI "uimm6")])
|
||||||
|
+
|
||||||
|
;; =======================================================================
|
||||||
|
;; For many LASX instructions, the only difference of it from the LSX
|
||||||
|
;; counterpart is the length of vector operands. Describe these LSX/LASX
|
||||||
|
@@ -222,6 +229,28 @@
|
||||||
|
[(set_attr "type" "simd_int_arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; <x>vrotr.{b/h/w/d}
|
||||||
|
+
|
||||||
|
+(define_insn "vrotr<mode>3"
|
||||||
|
+ [(set (match_operand:IVEC 0 "register_operand" "=f")
|
||||||
|
+ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:IVEC 2 "register_operand" "f")))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vrotr.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_int_arith")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+;; <x>vrotri.{b/h/w/d}
|
||||||
|
+
|
||||||
|
+(define_insn "rotr<mode>3"
|
||||||
|
+ [(set (match_operand:IVEC 0 "register_operand" "=f")
|
||||||
|
+ (rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:SI 2 "const_<bitimm>_operand")))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2";
|
||||||
|
+ [(set_attr "type" "simd_int_arith")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
; The LoongArch SX Instructions.
|
||||||
|
(include "lsx.md")
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-rotr.c b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..733c36334
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c
|
||||||
|
@@ -0,0 +1,36 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvrotr\.w\t" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvrotr\.w\t" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvrotri\.w\t\[^\n\]*7\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvrotri\.w\t\[^\n\]*7\n" } } */
|
||||||
|
+
|
||||||
|
+unsigned int a[8], b[8];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test1 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ a[i] = a[i] >> b[i] | a[i] << (32 - b[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test2 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 8; i++)
|
||||||
|
+ a[i] = a[i] >> b[i] | a[i] << (32 - b[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test3 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ a[i] = a[i] >> 7 | a[i] << 25;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test4 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 8; i++)
|
||||||
|
+ a[i] = a[i] >> 7 | a[i] << 25;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
42
0049-LoongArch-Remove-lrint_allow_inexact.patch
Normal file
42
0049-LoongArch-Remove-lrint_allow_inexact.patch
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 20 Nov 2023 01:34:26 +0800
|
||||||
|
Subject: [PATCH 049/188] LoongArch: Remove lrint_allow_inexact
|
||||||
|
|
||||||
|
No functional change, just a cleanup.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (lrint_allow_inexact): Remove.
|
||||||
|
(<lrint_pattern><ANYF:mode><ANYFI:mode>2): Check if <LRINT>
|
||||||
|
== UNSPEC_FTINT instead of <lrint_allow_inexact>.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 5 +----
|
||||||
|
1 file changed, 1 insertion(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index d1c766cbf..11577f407 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -585,9 +585,6 @@
|
||||||
|
(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
|
||||||
|
(UNSPEC_FTINTRM "rm")
|
||||||
|
(UNSPEC_FTINTRP "rp")])
|
||||||
|
-(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
|
||||||
|
- (UNSPEC_FTINTRM "0")
|
||||||
|
- (UNSPEC_FTINTRP "0")])
|
||||||
|
|
||||||
|
;; Iterator and attributes for bytepick.d
|
||||||
|
(define_int_iterator bytepick_w_ashift_amount [8 16 24])
|
||||||
|
@@ -2384,7 +2381,7 @@
|
||||||
|
(unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
|
||||||
|
LRINT))]
|
||||||
|
"TARGET_HARD_FLOAT &&
|
||||||
|
- (<lrint_allow_inexact>
|
||||||
|
+ (<LRINT> == UNSPEC_FTINT
|
||||||
|
|| flag_fp_int_builtin_inexact
|
||||||
|
|| !flag_trapping_math)"
|
||||||
|
"ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
150
0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
Normal file
150
0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
Normal file
@ -0,0 +1,150 @@
|
|||||||
|
From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Mon, 20 Nov 2023 03:51:56 +0800
|
||||||
|
Subject: [PATCH 050/188] LoongArch: Use LSX for scalar FP rounding with
|
||||||
|
explicit rounding mode
|
||||||
|
|
||||||
|
In LoongArch FP base ISA there is only the frint.{s/d} instruction which
|
||||||
|
reads the global rounding mode. Utilize LSX for explicit rounding mode
|
||||||
|
even if the operand is scalar. It seems wasting the CPU power, but
|
||||||
|
still much faster than calling the library function.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator.
|
||||||
|
(VLSX_FOR_FMODE): New mode attribute.
|
||||||
|
(<simd_for_scalar_frint_pattern><mode>2): New expander,
|
||||||
|
expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-frint-scalar.c: New test.
|
||||||
|
* gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/simd.md | 28 ++++++++++++
|
||||||
|
.../loongarch/vect-frint-scalar-no-inexact.c | 23 ++++++++++
|
||||||
|
.../gcc.target/loongarch/vect-frint-scalar.c | 43 +++++++++++++++++++
|
||||||
|
3 files changed, 94 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index 4ecf7a55e..843b1a41f 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -169,6 +169,34 @@
|
||||||
|
UNSPEC_SIMD_FRINTRZ))]
|
||||||
|
"")
|
||||||
|
|
||||||
|
+;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int-
|
||||||
|
+;; builtin-inexact. The base FP instruction set lacks these operations.
|
||||||
|
+;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still
|
||||||
|
+;; much faster than calling a libc function: on LA464 and LA664 there is a
|
||||||
|
+;; 3x ~ 5x speed up.
|
||||||
|
+;;
|
||||||
|
+;; Note that a vreplvei instruction is needed or we'll also operate on the
|
||||||
|
+;; junk in high bits of the vector register and produce random FP exceptions.
|
||||||
|
+
|
||||||
|
+(define_int_iterator LSX_SCALAR_FRINT
|
||||||
|
+ [UNSPEC_SIMD_FRINTRP
|
||||||
|
+ UNSPEC_SIMD_FRINTRZ
|
||||||
|
+ UNSPEC_SIMD_FRINTRM
|
||||||
|
+ UNSPEC_SIMD_FRINTRNE])
|
||||||
|
+
|
||||||
|
+(define_mode_attr VLSX_FOR_FMODE [(DF "V2DF") (SF "V4SF")])
|
||||||
|
+
|
||||||
|
+(define_expand "<simd_frint_pattern><mode>2"
|
||||||
|
+ [(set (match_dup 2)
|
||||||
|
+ (vec_duplicate:<VLSX_FOR_FMODE>
|
||||||
|
+ (match_operand:ANYF 1 "register_operand")))
|
||||||
|
+ (set (match_dup 2)
|
||||||
|
+ (unspec:<VLSX_FOR_FMODE> [(match_dup 2)] LSX_SCALAR_FRINT))
|
||||||
|
+ (set (match_operand:ANYF 0 "register_operand")
|
||||||
|
+ (vec_select:ANYF (match_dup 2) (parallel [(const_int 0)])))]
|
||||||
|
+ "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
|
||||||
|
+ "operands[2] = gen_reg_rtx (<VLSX_FOR_FMODE>mode);")
|
||||||
|
+
|
||||||
|
;; <x>vftint.{/rp/rz/rm}
|
||||||
|
(define_insn
|
||||||
|
"<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..002e3b92d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */
|
||||||
|
+
|
||||||
|
+#include "vect-frint-scalar.c"
|
||||||
|
+
|
||||||
|
+/* cannot use LSX for these with -fno-fp-int-builtin-inexact,
|
||||||
|
+ call library function. */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */
|
||||||
|
+
|
||||||
|
+/* nearbyint is not allowed to rasie FE_INEXACT for decades */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
|
||||||
|
+
|
||||||
|
+/* rint should just use basic FP operation */
|
||||||
|
+/* { dg-final { scan-assembler "\tfrint\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tfrint\.d" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..c7cb40be7
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
|
||||||
|
@@ -0,0 +1,43 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx" } */
|
||||||
|
+
|
||||||
|
+#define test(func, suffix) \
|
||||||
|
+__typeof__ (1.##suffix) \
|
||||||
|
+_##func##suffix (__typeof__ (1.##suffix) x) \
|
||||||
|
+{ \
|
||||||
|
+ return __builtin_##func##suffix (x); \
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+test (ceil, f)
|
||||||
|
+test (ceil, )
|
||||||
|
+test (floor, f)
|
||||||
|
+test (floor, )
|
||||||
|
+test (trunc, f)
|
||||||
|
+test (trunc, )
|
||||||
|
+test (roundeven, f)
|
||||||
|
+test (roundeven, )
|
||||||
|
+test (nearbyint, f)
|
||||||
|
+test (nearbyint, )
|
||||||
|
+test (rint, f)
|
||||||
|
+test (rint, )
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */
|
||||||
|
+
|
||||||
|
+/* must do vreplvei first */
|
||||||
|
+/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */
|
||||||
|
+
|
||||||
|
+/* nearbyint is not allowed to rasie FE_INEXACT for decades */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
|
||||||
|
+
|
||||||
|
+/* rint should just use basic FP operation */
|
||||||
|
+/* { dg-final { scan-assembler "\tfrint\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tfrint\.d" } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,49 @@
|
|||||||
|
From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001
|
||||||
|
From: Li Wei <liwei@loongson.cn>
|
||||||
|
Date: Tue, 28 Nov 2023 15:56:35 +0800
|
||||||
|
Subject: [PATCH 051/188] LoongArch: Remove duplicate definition of
|
||||||
|
CLZ_DEFINED_VALUE_AT_ZERO.
|
||||||
|
|
||||||
|
In the r14-5547 commit, C[LT]Z_DEFINED_VALUE_AT_ZERO were defined at
|
||||||
|
the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been
|
||||||
|
defined, so remove the duplicate definition.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add
|
||||||
|
description.
|
||||||
|
(CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.h | 9 +++------
|
||||||
|
1 file changed, 3 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
|
||||||
|
index 19cf6fd33..8b28be0e4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.h
|
||||||
|
@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
/* Define if loading short immediate values into registers sign extends. */
|
||||||
|
#define SHORT_IMMEDIATES_SIGN_EXTEND 1
|
||||||
|
|
||||||
|
-/* The clz.{w/d} instructions have the natural values at 0. */
|
||||||
|
+/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0. */
|
||||||
|
|
||||||
|
#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
+ ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
|
||||||
|
/* Standard register usage. */
|
||||||
|
|
||||||
|
@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function
|
||||||
|
|
||||||
|
#define TARGET_EXPLICIT_RELOCS \
|
||||||
|
(la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
|
||||||
|
-
|
||||||
|
-#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
-#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
|
||||||
|
- ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
4375
0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
Normal file
4375
0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
Normal file
File diff suppressed because it is too large
Load Diff
148
0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
Normal file
148
0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Li Wei <liwei@loongson.cn>
|
||||||
|
Date: Tue, 28 Nov 2023 15:38:37 +0800
|
||||||
|
Subject: [PATCH 053/188] LoongArch: Accelerate optimization of scalar
|
||||||
|
signed/unsigned popcount.
|
||||||
|
|
||||||
|
In LoongArch, the vector popcount has corresponding instructions, while
|
||||||
|
the scalar does not. Currently, the scalar popcount is calculated
|
||||||
|
through a loop, and the value of a non-power of two needs to be iterated
|
||||||
|
several times, so the vector popcount instruction is considered for
|
||||||
|
optimization.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (v2di): Used to simplify the
|
||||||
|
following templates.
|
||||||
|
(popcount<mode>2): New.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/popcnt.c: New test.
|
||||||
|
* gcc.target/loongarch/popcount.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 27 +++++++++++-
|
||||||
|
gcc/testsuite/gcc.target/loongarch/popcnt.c | 41 +++++++++++++++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++
|
||||||
|
3 files changed, 83 insertions(+), 2 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 11577f407..cfd7a8ec6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -1512,7 +1512,30 @@
|
||||||
|
(set_attr "cnv_mode" "D2S")
|
||||||
|
(set_attr "mode" "SF")])
|
||||||
|
|
||||||
|
-
|
||||||
|
+;; In vector registers, popcount can be implemented directly through
|
||||||
|
+;; the vector instruction [X]VPCNT. For GP registers, we can implement
|
||||||
|
+;; it through the following method. Compared with loop implementation
|
||||||
|
+;; of popcount, the following method has better performance.
|
||||||
|
+
|
||||||
|
+;; This attribute used for get connection of scalar mode and corresponding
|
||||||
|
+;; vector mode.
|
||||||
|
+(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
|
||||||
|
+
|
||||||
|
+(define_expand "popcount<mode>2"
|
||||||
|
+ [(set (match_operand:GPR 0 "register_operand")
|
||||||
|
+ (popcount:GPR (match_operand:GPR 1 "register_operand")))]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ rtx in = operands[1];
|
||||||
|
+ rtx out = operands[0];
|
||||||
|
+ rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
|
||||||
|
+ gen_reg_rtx (V2DImode);
|
||||||
|
+ emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
|
||||||
|
+ emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
|
||||||
|
+ emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
;;
|
||||||
|
;; ....................
|
||||||
|
;;
|
||||||
|
@@ -3879,7 +3902,7 @@
|
||||||
|
(any_extend:SI (match_dup 3)))])]
|
||||||
|
"")
|
||||||
|
|
||||||
|
-
|
||||||
|
+
|
||||||
|
|
||||||
|
(define_mode_iterator QHSD [QI HI SI DI])
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a10fca420
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c
|
||||||
|
@@ -0,0 +1,41 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx" } */
|
||||||
|
+/* { dg-final { scan-assembler-not {popcount} } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+foo (int x)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcount (x);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+long
|
||||||
|
+foo1 (long x)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcountl (x);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+long long
|
||||||
|
+foo2 (long long x)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcountll (x);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+foo3 (int *p)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcount (*p);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+foo4 (int x)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcount (x);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned long
|
||||||
|
+foo5 (int x)
|
||||||
|
+{
|
||||||
|
+ return __builtin_popcount (x);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..390ff0676
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/popcount.c
|
||||||
|
@@ -0,0 +1,17 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+PopCount (long b)
|
||||||
|
+{
|
||||||
|
+ int c = 0;
|
||||||
|
+
|
||||||
|
+ while (b)
|
||||||
|
+ {
|
||||||
|
+ b &= b - 1;
|
||||||
|
+ c++;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return c;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
163
0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
Normal file
163
0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
Normal file
@ -0,0 +1,163 @@
|
|||||||
|
From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Li Wei <liwei@loongson.cn>
|
||||||
|
Date: Tue, 28 Nov 2023 15:39:00 +0800
|
||||||
|
Subject: [PATCH 054/188] LoongArch: Optimize vector constant
|
||||||
|
extract-{even/odd} permutation.
|
||||||
|
|
||||||
|
For vector constant extract-{even/odd} permutation replace the default
|
||||||
|
[x]vshuf instruction combination with [x]vilv{l/h} instruction, which
|
||||||
|
can reduce instructions and improves performance.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_is_odd_extraction):
|
||||||
|
Supplementary function prototype.
|
||||||
|
(loongarch_is_even_extraction): Adjust.
|
||||||
|
(loongarch_try_expand_lsx_vshuf_const): Adjust.
|
||||||
|
(loongarch_is_extraction_permutation): Adjust.
|
||||||
|
(loongarch_expand_vec_perm_const_2): Adjust.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 33 +++++++++++-
|
||||||
|
.../loongarch/lasx-extract-even_odd-opt.c | 54 +++++++++++++++++++
|
||||||
|
2 files changed, 85 insertions(+), 2 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index ecceca22d..3ef7e3605 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool
|
||||||
|
+loongarch_is_odd_extraction (struct expand_vec_perm_d *);
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+loongarch_is_even_extraction (struct expand_vec_perm_d *);
|
||||||
|
+
|
||||||
|
static bool
|
||||||
|
loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
|
||||||
|
if (d->testing_p)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
+ /* If match extract-even and extract-odd permutations pattern, use
|
||||||
|
+ * vselect much better than vshuf. */
|
||||||
|
+ if (loongarch_is_odd_extraction (d)
|
||||||
|
+ || loongarch_is_even_extraction (d))
|
||||||
|
+ {
|
||||||
|
+ if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
|
||||||
|
+ d->perm, d->nelt))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
+ unsigned char perm2[MAX_VECT_LEN];
|
||||||
|
+ for (i = 0; i < d->nelt; ++i)
|
||||||
|
+ perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1);
|
||||||
|
+
|
||||||
|
+ if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
|
||||||
|
+ perm2, d->nelt))
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
for (i = 0; i < d->nelt; i += 1)
|
||||||
|
{
|
||||||
|
rperm[i] = GEN_INT (d->perm[i]);
|
||||||
|
@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d)
|
||||||
|
result = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- buf += 1;
|
||||||
|
+ buf += 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
|
||||||
|
result = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
- buf += 2;
|
||||||
|
+ buf += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
Selector after: { 1, 3, 1, 3 }.
|
||||||
|
Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 }
|
||||||
|
Selector after: { 0, 2, 0, 2 }. */
|
||||||
|
+
|
||||||
|
+ /* Better implement of extract-even and extract-odd permutations. */
|
||||||
|
+ if (loongarch_expand_vec_perm_even_odd (d))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
for (i = 0; i < d->nelt / 2; i += 1)
|
||||||
|
{
|
||||||
|
idx = d->perm[i];
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..515f0c862
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
|
||||||
|
@@ -0,0 +1,54 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvilvl.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvilvh.d" } } */
|
||||||
|
+
|
||||||
|
+#define CMUL(a, b, c) \
|
||||||
|
+ { \
|
||||||
|
+ (c).ai = (a).ai * (b).ai - (a).bi * (b).bi; \
|
||||||
|
+ (c).bi = (a).ai * (b).bi + (a).bi * (b).ai; \
|
||||||
|
+ (c).ci = (a).ci * (b).ci - (a).di * (b).di; \
|
||||||
|
+ (c).di = (a).ci * (b).di + (a).di * (b).ci; \
|
||||||
|
+ }
|
||||||
|
+#define CSUM(a, b) \
|
||||||
|
+ { \
|
||||||
|
+ (a).ai += (b).ai; \
|
||||||
|
+ (a).bi += (b).bi; \
|
||||||
|
+ (a).ci += (b).ci; \
|
||||||
|
+ (a).di += (b).di; \
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+typedef struct
|
||||||
|
+{
|
||||||
|
+ double ai;
|
||||||
|
+ double bi;
|
||||||
|
+ double ci;
|
||||||
|
+ double di;
|
||||||
|
+} complex;
|
||||||
|
+
|
||||||
|
+typedef struct
|
||||||
|
+{
|
||||||
|
+ complex e[6][6];
|
||||||
|
+} matrix;
|
||||||
|
+
|
||||||
|
+typedef struct
|
||||||
|
+{
|
||||||
|
+ complex c[6];
|
||||||
|
+} vector;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+mult_adj_mat_vec (matrix *a, vector *b, vector *c)
|
||||||
|
+{
|
||||||
|
+ register int i, j;
|
||||||
|
+ register complex x, y;
|
||||||
|
+ for (i = 0; i < 6; i++)
|
||||||
|
+ {
|
||||||
|
+ x.ai = x.bi = x.ci = x.di = 0.0;
|
||||||
|
+ for (j = 0; j < 6; j++)
|
||||||
|
+ {
|
||||||
|
+ CMUL (a->e[j][i], b->c[j], y);
|
||||||
|
+ CSUM (x, y);
|
||||||
|
+ }
|
||||||
|
+ c->c[i] = x;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
1697
0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
Normal file
1697
0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
Normal file
File diff suppressed because it is too large
Load Diff
925
0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
Normal file
925
0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
Normal file
@ -0,0 +1,925 @@
|
|||||||
|
From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 1 Dec 2023 10:09:33 +0800
|
||||||
|
Subject: [PATCH 056/188] LoongArch: Switch loongarch-def from C to C++ to make
|
||||||
|
it possible.
|
||||||
|
|
||||||
|
We'll use HOST_WIDE_INT in LoongArch static properties in following patches.
|
||||||
|
|
||||||
|
To keep the same readability as C99 designated initializers, create a
|
||||||
|
std::array like data structure with position setter function, and add
|
||||||
|
field setter functions for structs used in loongarch-def.cc.
|
||||||
|
|
||||||
|
Remove unneeded guards #if
|
||||||
|
!defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
in loongarch-def.h and loongarch-opts.h.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-def.h: Remove extern "C".
|
||||||
|
(loongarch_isa_base_strings): Declare as loongarch_def_array
|
||||||
|
instead of plain array.
|
||||||
|
(loongarch_isa_ext_strings): Likewise.
|
||||||
|
(loongarch_abi_base_strings): Likewise.
|
||||||
|
(loongarch_abi_ext_strings): Likewise.
|
||||||
|
(loongarch_cmodel_strings): Likewise.
|
||||||
|
(loongarch_cpu_strings): Likewise.
|
||||||
|
(loongarch_cpu_default_isa): Likewise.
|
||||||
|
(loongarch_cpu_issue_rate): Likewise.
|
||||||
|
(loongarch_cpu_multipass_dfa_lookahead): Likewise.
|
||||||
|
(loongarch_cpu_cache): Likewise.
|
||||||
|
(loongarch_cpu_align): Likewise.
|
||||||
|
(loongarch_cpu_rtx_cost_data): Likewise.
|
||||||
|
(loongarch_isa): Add a constructor and field setter functions.
|
||||||
|
* config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not
|
||||||
|
include for target libraries.
|
||||||
|
* config/loongarch/loongarch-opts.cc: Comment code that doesn't
|
||||||
|
run and causes compilation errors.
|
||||||
|
* config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise.
|
||||||
|
(struct loongarch_rtx_cost_data): Likewise.
|
||||||
|
(struct loongarch_cache): Likewise.
|
||||||
|
(struct loongarch_align): Likewise.
|
||||||
|
* config/loongarch/t-loongarch: Compile loongarch-def.cc with the
|
||||||
|
C++ compiler.
|
||||||
|
* config/loongarch/loongarch-def-array.h: New file for a
|
||||||
|
std:array like data structure with position setter function.
|
||||||
|
* config/loongarch/loongarch-def.c: Rename to ...
|
||||||
|
* config/loongarch/loongarch-def.cc: ... here.
|
||||||
|
(loongarch_cpu_strings): Define as loongarch_def_array instead
|
||||||
|
of plain array.
|
||||||
|
(loongarch_cpu_default_isa): Likewise.
|
||||||
|
(loongarch_cpu_cache): Likewise.
|
||||||
|
(loongarch_cpu_align): Likewise.
|
||||||
|
(loongarch_cpu_rtx_cost_data): Likewise.
|
||||||
|
(loongarch_cpu_issue_rate): Likewise.
|
||||||
|
(loongarch_cpu_multipass_dfa_lookahead): Likewise.
|
||||||
|
(loongarch_isa_base_strings): Likewise.
|
||||||
|
(loongarch_isa_ext_strings): Likewise.
|
||||||
|
(loongarch_abi_base_strings): Likewise.
|
||||||
|
(loongarch_abi_ext_strings): Likewise.
|
||||||
|
(loongarch_cmodel_strings): Likewise.
|
||||||
|
(abi_minimal_isa): Likewise.
|
||||||
|
(loongarch_rtx_cost_optimize_size): Use field setter functions
|
||||||
|
instead of designated initializers.
|
||||||
|
(loongarch_rtx_cost_data): Implement default constructor.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def-array.h | 40 ++++
|
||||||
|
gcc/config/loongarch/loongarch-def.c | 227 ---------------------
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 187 +++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 55 ++---
|
||||||
|
gcc/config/loongarch/loongarch-opts.cc | 7 +
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 5 +-
|
||||||
|
gcc/config/loongarch/loongarch-tune.h | 123 ++++++++++-
|
||||||
|
gcc/config/loongarch/t-loongarch | 4 +-
|
||||||
|
8 files changed, 390 insertions(+), 258 deletions(-)
|
||||||
|
create mode 100644 gcc/config/loongarch/loongarch-def-array.h
|
||||||
|
delete mode 100644 gcc/config/loongarch/loongarch-def.c
|
||||||
|
create mode 100644 gcc/config/loongarch/loongarch-def.cc
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bdb3e9c6a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def-array.h
|
||||||
|
@@ -0,0 +1,40 @@
|
||||||
|
+/* A std::array like data structure for LoongArch static properties.
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#ifndef _LOONGARCH_DEF_ARRAY_H
|
||||||
|
+#define _LOONGARCH_DEF_ARRAY_H 1
|
||||||
|
+
|
||||||
|
+template <class T, int N>
|
||||||
|
+class loongarch_def_array {
|
||||||
|
+private:
|
||||||
|
+ T arr[N];
|
||||||
|
+public:
|
||||||
|
+ loongarch_def_array () : arr{} {}
|
||||||
|
+
|
||||||
|
+ T &operator[] (int n) { return arr[n]; }
|
||||||
|
+ const T &operator[] (int n) const { return arr[n]; }
|
||||||
|
+
|
||||||
|
+ loongarch_def_array set (int idx, T &&value)
|
||||||
|
+ {
|
||||||
|
+ (*this)[idx] = value;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+#endif
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||||
|
deleted file mode 100644
|
||||||
|
index fe4474e77..000000000
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.c
|
||||||
|
+++ /dev/null
|
||||||
|
@@ -1,227 +0,0 @@
|
||||||
|
-/* LoongArch static properties.
|
||||||
|
- Copyright (C) 2021-2022 Free Software Foundation, Inc.
|
||||||
|
- Contributed by Loongson Ltd.
|
||||||
|
-
|
||||||
|
-This file is part of GCC.
|
||||||
|
-
|
||||||
|
-GCC is free software; you can redistribute it and/or modify
|
||||||
|
-it under the terms of the GNU General Public License as published by
|
||||||
|
-the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
-any later version.
|
||||||
|
-
|
||||||
|
-GCC is distributed in the hope that it will be useful,
|
||||||
|
-but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
-GNU General Public License for more details.
|
||||||
|
-
|
||||||
|
-You should have received a copy of the GNU General Public License
|
||||||
|
-along with GCC; see the file COPYING3. If not see
|
||||||
|
-<http://www.gnu.org/licenses/>. */
|
||||||
|
-
|
||||||
|
-#include "loongarch-def.h"
|
||||||
|
-#include "loongarch-str.h"
|
||||||
|
-
|
||||||
|
-/* CPU property tables. */
|
||||||
|
-const char*
|
||||||
|
-loongarch_cpu_strings[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_NATIVE] = STR_CPU_NATIVE,
|
||||||
|
- [CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT,
|
||||||
|
- [CPU_LOONGARCH64] = STR_CPU_LOONGARCH64,
|
||||||
|
- [CPU_LA464] = STR_CPU_LA464,
|
||||||
|
- [CPU_LA664] = STR_CPU_LA664,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-struct loongarch_isa
|
||||||
|
-loongarch_cpu_default_isa[N_ARCH_TYPES] = {
|
||||||
|
- [CPU_LOONGARCH64] = {
|
||||||
|
- .base = ISA_BASE_LA64V100,
|
||||||
|
- .fpu = ISA_EXT_FPU64,
|
||||||
|
- .simd = 0,
|
||||||
|
- },
|
||||||
|
- [CPU_LA464] = {
|
||||||
|
- .base = ISA_BASE_LA64V100,
|
||||||
|
- .fpu = ISA_EXT_FPU64,
|
||||||
|
- .simd = ISA_EXT_SIMD_LASX,
|
||||||
|
- },
|
||||||
|
- [CPU_LA664] = {
|
||||||
|
- .base = ISA_BASE_LA64V110,
|
||||||
|
- .fpu = ISA_EXT_FPU64,
|
||||||
|
- .simd = ISA_EXT_SIMD_LASX,
|
||||||
|
- },
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-struct loongarch_cache
|
||||||
|
-loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_LOONGARCH64] = {
|
||||||
|
- .l1d_line_size = 64,
|
||||||
|
- .l1d_size = 64,
|
||||||
|
- .l2d_size = 256,
|
||||||
|
- .simultaneous_prefetches = 4,
|
||||||
|
- },
|
||||||
|
- [CPU_LA464] = {
|
||||||
|
- .l1d_line_size = 64,
|
||||||
|
- .l1d_size = 64,
|
||||||
|
- .l2d_size = 256,
|
||||||
|
- .simultaneous_prefetches = 4,
|
||||||
|
- },
|
||||||
|
- [CPU_LA664] = {
|
||||||
|
- .l1d_line_size = 64,
|
||||||
|
- .l1d_size = 64,
|
||||||
|
- .l2d_size = 256,
|
||||||
|
- .simultaneous_prefetches = 4,
|
||||||
|
- },
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-struct loongarch_align
|
||||||
|
-loongarch_cpu_align[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_LOONGARCH64] = {
|
||||||
|
- .function = "32",
|
||||||
|
- .label = "16",
|
||||||
|
- },
|
||||||
|
- [CPU_LA464] = {
|
||||||
|
- .function = "32",
|
||||||
|
- .label = "16",
|
||||||
|
- },
|
||||||
|
- [CPU_LA664] = {
|
||||||
|
- .function = "32",
|
||||||
|
- .label = "16",
|
||||||
|
- },
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-/* Default RTX cost initializer. */
|
||||||
|
-#define COSTS_N_INSNS(N) ((N) * 4)
|
||||||
|
-#define DEFAULT_COSTS \
|
||||||
|
- .fp_add = COSTS_N_INSNS (1), \
|
||||||
|
- .fp_mult_sf = COSTS_N_INSNS (2), \
|
||||||
|
- .fp_mult_df = COSTS_N_INSNS (4), \
|
||||||
|
- .fp_div_sf = COSTS_N_INSNS (6), \
|
||||||
|
- .fp_div_df = COSTS_N_INSNS (8), \
|
||||||
|
- .int_mult_si = COSTS_N_INSNS (1), \
|
||||||
|
- .int_mult_di = COSTS_N_INSNS (1), \
|
||||||
|
- .int_div_si = COSTS_N_INSNS (4), \
|
||||||
|
- .int_div_di = COSTS_N_INSNS (6), \
|
||||||
|
- .branch_cost = 6, \
|
||||||
|
- .memory_latency = 4
|
||||||
|
-
|
||||||
|
-/* The following properties cannot be looked up directly using "cpucfg".
|
||||||
|
- So it is necessary to provide a default value for "unknown native"
|
||||||
|
- tune targets (i.e. -mtune=native while PRID does not correspond to
|
||||||
|
- any known "-mtune" type). */
|
||||||
|
-
|
||||||
|
-struct loongarch_rtx_cost_data
|
||||||
|
-loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_NATIVE] = {
|
||||||
|
- DEFAULT_COSTS
|
||||||
|
- },
|
||||||
|
- [CPU_LOONGARCH64] = {
|
||||||
|
- DEFAULT_COSTS
|
||||||
|
- },
|
||||||
|
- [CPU_LA464] = {
|
||||||
|
- DEFAULT_COSTS
|
||||||
|
- },
|
||||||
|
- [CPU_LA664] = {
|
||||||
|
- DEFAULT_COSTS
|
||||||
|
- },
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-/* RTX costs to use when optimizing for size. */
|
||||||
|
-const struct loongarch_rtx_cost_data
|
||||||
|
-loongarch_rtx_cost_optimize_size = {
|
||||||
|
- .fp_add = 4,
|
||||||
|
- .fp_mult_sf = 4,
|
||||||
|
- .fp_mult_df = 4,
|
||||||
|
- .fp_div_sf = 4,
|
||||||
|
- .fp_div_df = 4,
|
||||||
|
- .int_mult_si = 4,
|
||||||
|
- .int_mult_di = 4,
|
||||||
|
- .int_div_si = 4,
|
||||||
|
- .int_div_di = 4,
|
||||||
|
- .branch_cost = 6,
|
||||||
|
- .memory_latency = 4,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-int
|
||||||
|
-loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_NATIVE] = 4,
|
||||||
|
- [CPU_LOONGARCH64] = 4,
|
||||||
|
- [CPU_LA464] = 4,
|
||||||
|
- [CPU_LA664] = 6,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-int
|
||||||
|
-loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
|
||||||
|
- [CPU_NATIVE] = 4,
|
||||||
|
- [CPU_LOONGARCH64] = 4,
|
||||||
|
- [CPU_LA464] = 4,
|
||||||
|
- [CPU_LA664] = 6,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-/* Wiring string definitions from loongarch-str.h to global arrays
|
||||||
|
- with standard index values from loongarch-opts.h, so we can
|
||||||
|
- print config-related messages and do ABI self-spec filtering
|
||||||
|
- from the driver in a self-consistent manner. */
|
||||||
|
-
|
||||||
|
-const char*
|
||||||
|
-loongarch_isa_base_strings[N_ISA_BASE_TYPES] = {
|
||||||
|
- [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100,
|
||||||
|
- [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-const char*
|
||||||
|
-loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = {
|
||||||
|
- [ISA_EXT_NONE] = STR_NONE,
|
||||||
|
- [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32,
|
||||||
|
- [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64,
|
||||||
|
- [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX,
|
||||||
|
- [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-const char*
|
||||||
|
-loongarch_abi_base_strings[N_ABI_BASE_TYPES] = {
|
||||||
|
- [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D,
|
||||||
|
- [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F,
|
||||||
|
- [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-const char*
|
||||||
|
-loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = {
|
||||||
|
- [ABI_EXT_BASE] = STR_ABI_EXT_BASE,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-const char*
|
||||||
|
-loongarch_cmodel_strings[] = {
|
||||||
|
- [CMODEL_NORMAL] = STR_CMODEL_NORMAL,
|
||||||
|
- [CMODEL_TINY] = STR_CMODEL_TINY,
|
||||||
|
- [CMODEL_TINY_STATIC] = STR_CMODEL_TS,
|
||||||
|
- [CMODEL_MEDIUM] = STR_CMODEL_MEDIUM,
|
||||||
|
- [CMODEL_LARGE] = STR_CMODEL_LARGE,
|
||||||
|
- [CMODEL_EXTREME] = STR_CMODEL_EXTREME,
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-/* ABI-related definitions. */
|
||||||
|
-const struct loongarch_isa
|
||||||
|
-abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = {
|
||||||
|
- [ABI_BASE_LP64D] = {
|
||||||
|
- [ABI_EXT_BASE] = {
|
||||||
|
- .base = ISA_BASE_LA64V100,
|
||||||
|
- .fpu = ISA_EXT_FPU64,
|
||||||
|
- .simd = 0
|
||||||
|
- },
|
||||||
|
- },
|
||||||
|
- [ABI_BASE_LP64F] = {
|
||||||
|
- [ABI_EXT_BASE] = {
|
||||||
|
- .base = ISA_BASE_LA64V100,
|
||||||
|
- .fpu = ISA_EXT_FPU32,
|
||||||
|
- .simd = 0
|
||||||
|
- },
|
||||||
|
- },
|
||||||
|
- [ABI_BASE_LP64S] = {
|
||||||
|
- [ABI_EXT_BASE] = {
|
||||||
|
- .base = ISA_BASE_LA64V100,
|
||||||
|
- .fpu = ISA_EXT_NONE,
|
||||||
|
- .simd = 0
|
||||||
|
- },
|
||||||
|
- },
|
||||||
|
-};
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..6990c86c2
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -0,0 +1,187 @@
|
||||||
|
+/* LoongArch static properties.
|
||||||
|
+ Copyright (C) 2021-2023 Free Software Foundation, Inc.
|
||||||
|
+ Contributed by Loongson Ltd.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include "loongarch-def.h"
|
||||||
|
+#include "loongarch-str.h"
|
||||||
|
+
|
||||||
|
+template <class T, int N>
|
||||||
|
+using array = loongarch_def_array<T, N>;
|
||||||
|
+
|
||||||
|
+template <class T>
|
||||||
|
+using array_tune = array<T, N_TUNE_TYPES>;
|
||||||
|
+
|
||||||
|
+template <class T>
|
||||||
|
+using array_arch = array<T, N_ARCH_TYPES>;
|
||||||
|
+
|
||||||
|
+/* CPU property tables. */
|
||||||
|
+array_tune<const char *> loongarch_cpu_strings = array_tune<const char *> ()
|
||||||
|
+ .set (CPU_NATIVE, STR_CPU_NATIVE)
|
||||||
|
+ .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT)
|
||||||
|
+ .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64)
|
||||||
|
+ .set (CPU_LA464, STR_CPU_LA464)
|
||||||
|
+ .set (CPU_LA664, STR_CPU_LA664);
|
||||||
|
+
|
||||||
|
+array_arch<loongarch_isa> loongarch_cpu_default_isa =
|
||||||
|
+ array_arch<loongarch_isa> ()
|
||||||
|
+ .set (CPU_LOONGARCH64,
|
||||||
|
+ loongarch_isa ()
|
||||||
|
+ .base_ (ISA_BASE_LA64V100)
|
||||||
|
+ .fpu_ (ISA_EXT_FPU64))
|
||||||
|
+ .set (CPU_LA464,
|
||||||
|
+ loongarch_isa ()
|
||||||
|
+ .base_ (ISA_BASE_LA64V100)
|
||||||
|
+ .fpu_ (ISA_EXT_FPU64)
|
||||||
|
+ .simd_ (ISA_EXT_SIMD_LASX))
|
||||||
|
+ .set (CPU_LA664,
|
||||||
|
+ loongarch_isa ()
|
||||||
|
+ .base_ (ISA_BASE_LA64V110)
|
||||||
|
+ .fpu_ (ISA_EXT_FPU64)
|
||||||
|
+ .simd_ (ISA_EXT_SIMD_LASX));
|
||||||
|
+
|
||||||
|
+static inline loongarch_cache la464_cache ()
|
||||||
|
+{
|
||||||
|
+ return loongarch_cache ()
|
||||||
|
+ .l1d_line_size_ (64)
|
||||||
|
+ .l1d_size_ (64)
|
||||||
|
+ .l2d_size_ (256)
|
||||||
|
+ .simultaneous_prefetches_ (4);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+array_tune<loongarch_cache> loongarch_cpu_cache =
|
||||||
|
+ array_tune<loongarch_cache> ()
|
||||||
|
+ .set (CPU_LOONGARCH64, la464_cache ())
|
||||||
|
+ .set (CPU_LA464, la464_cache ())
|
||||||
|
+ .set (CPU_LA664, la464_cache ());
|
||||||
|
+
|
||||||
|
+static inline loongarch_align la464_align ()
|
||||||
|
+{
|
||||||
|
+ return loongarch_align ().function_ ("32").label_ ("16");
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+array_tune<loongarch_align> loongarch_cpu_align =
|
||||||
|
+ array_tune<loongarch_align> ()
|
||||||
|
+ .set (CPU_LOONGARCH64, la464_align ())
|
||||||
|
+ .set (CPU_LA464, la464_align ())
|
||||||
|
+ .set (CPU_LA664, la464_align ());
|
||||||
|
+
|
||||||
|
+#define COSTS_N_INSNS(N) ((N) * 4)
|
||||||
|
+
|
||||||
|
+/* Default RTX cost initializer. */
|
||||||
|
+loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
||||||
|
+ : fp_add (COSTS_N_INSNS (1)),
|
||||||
|
+ fp_mult_sf (COSTS_N_INSNS (2)),
|
||||||
|
+ fp_mult_df (COSTS_N_INSNS (4)),
|
||||||
|
+ fp_div_sf (COSTS_N_INSNS (6)),
|
||||||
|
+ fp_div_df (COSTS_N_INSNS (8)),
|
||||||
|
+ int_mult_si (COSTS_N_INSNS (1)),
|
||||||
|
+ int_mult_di (COSTS_N_INSNS (1)),
|
||||||
|
+ int_div_si (COSTS_N_INSNS (4)),
|
||||||
|
+ int_div_di (COSTS_N_INSNS (6)),
|
||||||
|
+ branch_cost (6),
|
||||||
|
+ memory_latency (4) {}
|
||||||
|
+
|
||||||
|
+/* The following properties cannot be looked up directly using "cpucfg".
|
||||||
|
+ So it is necessary to provide a default value for "unknown native"
|
||||||
|
+ tune targets (i.e. -mtune=native while PRID does not correspond to
|
||||||
|
+ any known "-mtune" type). Currently all numbers are default. */
|
||||||
|
+array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
|
||||||
|
+ array_tune<loongarch_rtx_cost_data> ();
|
||||||
|
+
|
||||||
|
+/* RTX costs to use when optimizing for size. */
|
||||||
|
+const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
|
||||||
|
+ loongarch_rtx_cost_data ()
|
||||||
|
+ .fp_add_ (4)
|
||||||
|
+ .fp_mult_sf_ (4)
|
||||||
|
+ .fp_mult_df_ (4)
|
||||||
|
+ .fp_div_sf_ (4)
|
||||||
|
+ .fp_div_df_ (4)
|
||||||
|
+ .int_mult_si_ (4)
|
||||||
|
+ .int_mult_di_ (4)
|
||||||
|
+ .int_div_si_ (4)
|
||||||
|
+ .int_div_di_ (4);
|
||||||
|
+
|
||||||
|
+array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
|
||||||
|
+ .set (CPU_NATIVE, 4)
|
||||||
|
+ .set (CPU_LOONGARCH64, 4)
|
||||||
|
+ .set (CPU_LA464, 4)
|
||||||
|
+ .set (CPU_LA664, 6);
|
||||||
|
+
|
||||||
|
+array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
|
||||||
|
+ .set (CPU_NATIVE, 4)
|
||||||
|
+ .set (CPU_LOONGARCH64, 4)
|
||||||
|
+ .set (CPU_LA464, 4)
|
||||||
|
+ .set (CPU_LA664, 6);
|
||||||
|
+
|
||||||
|
+/* Wiring string definitions from loongarch-str.h to global arrays
|
||||||
|
+ with standard index values from loongarch-opts.h, so we can
|
||||||
|
+ print config-related messages and do ABI self-spec filtering
|
||||||
|
+ from the driver in a self-consistent manner. */
|
||||||
|
+
|
||||||
|
+array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
|
||||||
|
+ array<const char *, N_ISA_BASE_TYPES> ()
|
||||||
|
+ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100)
|
||||||
|
+ .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110);
|
||||||
|
+
|
||||||
|
+array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
|
||||||
|
+ array<const char *, N_ISA_EXT_TYPES> ()
|
||||||
|
+ .set (ISA_EXT_NONE, STR_NONE)
|
||||||
|
+ .set (ISA_EXT_FPU32, STR_ISA_EXT_FPU32)
|
||||||
|
+ .set (ISA_EXT_FPU64, STR_ISA_EXT_FPU64)
|
||||||
|
+ .set (ISA_EXT_SIMD_LSX, STR_ISA_EXT_LSX)
|
||||||
|
+ .set (ISA_EXT_SIMD_LASX, STR_ISA_EXT_LASX);
|
||||||
|
+
|
||||||
|
+array<const char *, N_ABI_BASE_TYPES> loongarch_abi_base_strings =
|
||||||
|
+ array<const char *, N_ABI_BASE_TYPES> ()
|
||||||
|
+ .set (ABI_BASE_LP64D, STR_ABI_BASE_LP64D)
|
||||||
|
+ .set (ABI_BASE_LP64F, STR_ABI_BASE_LP64F)
|
||||||
|
+ .set (ABI_BASE_LP64S, STR_ABI_BASE_LP64S);
|
||||||
|
+
|
||||||
|
+array<const char *, N_ABI_EXT_TYPES> loongarch_abi_ext_strings =
|
||||||
|
+ array<const char *, N_ABI_EXT_TYPES> ()
|
||||||
|
+ .set (ABI_EXT_BASE, STR_ABI_EXT_BASE);
|
||||||
|
+
|
||||||
|
+array<const char *, N_CMODEL_TYPES> loongarch_cmodel_strings =
|
||||||
|
+ array<const char *, N_CMODEL_TYPES> ()
|
||||||
|
+ .set (CMODEL_NORMAL, STR_CMODEL_NORMAL)
|
||||||
|
+ .set (CMODEL_TINY, STR_CMODEL_TINY)
|
||||||
|
+ .set (CMODEL_TINY_STATIC, STR_CMODEL_TS)
|
||||||
|
+ .set (CMODEL_MEDIUM, STR_CMODEL_MEDIUM)
|
||||||
|
+ .set (CMODEL_LARGE, STR_CMODEL_LARGE)
|
||||||
|
+ .set (CMODEL_EXTREME, STR_CMODEL_EXTREME);
|
||||||
|
+
|
||||||
|
+array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES>
|
||||||
|
+ abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>,
|
||||||
|
+ N_ABI_BASE_TYPES> ()
|
||||||
|
+ .set (ABI_BASE_LP64D,
|
||||||
|
+ array<loongarch_isa, N_ABI_EXT_TYPES> ()
|
||||||
|
+ .set (ABI_EXT_BASE,
|
||||||
|
+ loongarch_isa ()
|
||||||
|
+ .base_ (ISA_BASE_LA64V100)
|
||||||
|
+ .fpu_ (ISA_EXT_FPU64)))
|
||||||
|
+ .set (ABI_BASE_LP64F,
|
||||||
|
+ array<loongarch_isa, N_ABI_EXT_TYPES> ()
|
||||||
|
+ .set (ABI_EXT_BASE,
|
||||||
|
+ loongarch_isa ()
|
||||||
|
+ .base_ (ISA_BASE_LA64V100)
|
||||||
|
+ .fpu_ (ISA_EXT_FPU32)))
|
||||||
|
+ .set (ABI_BASE_LP64S,
|
||||||
|
+ array<loongarch_isa, N_ABI_EXT_TYPES> ()
|
||||||
|
+ .set (ABI_EXT_BASE,
|
||||||
|
+ loongarch_isa ().base_ (ISA_BASE_LA64V100)));
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index ef848f606..5ac70dfdd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -50,20 +50,18 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include <stdint.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#include "loongarch-def-array.h"
|
||||||
|
#include "loongarch-tune.h"
|
||||||
|
|
||||||
|
-#ifdef __cplusplus
|
||||||
|
-extern "C" {
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
/* enum isa_base */
|
||||||
|
-extern const char* loongarch_isa_base_strings[];
|
||||||
|
|
||||||
|
/* LoongArch V1.00. */
|
||||||
|
#define ISA_BASE_LA64V100 0
|
||||||
|
/* LoongArch V1.10. */
|
||||||
|
#define ISA_BASE_LA64V110 1
|
||||||
|
#define N_ISA_BASE_TYPES 2
|
||||||
|
+extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
|
||||||
|
+ loongarch_isa_base_strings;
|
||||||
|
|
||||||
|
#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is
|
||||||
|
@@ -72,7 +70,6 @@ extern int64_t loongarch_isa_base_features[];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* enum isa_ext_* */
|
||||||
|
-extern const char* loongarch_isa_ext_strings[];
|
||||||
|
#define ISA_EXT_NONE 0
|
||||||
|
#define ISA_EXT_FPU32 1
|
||||||
|
#define ISA_EXT_FPU64 2
|
||||||
|
@@ -80,13 +77,16 @@ extern const char* loongarch_isa_ext_strings[];
|
||||||
|
#define ISA_EXT_SIMD_LSX 3
|
||||||
|
#define ISA_EXT_SIMD_LASX 4
|
||||||
|
#define N_ISA_EXT_TYPES 5
|
||||||
|
+extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
|
||||||
|
+ loongarch_isa_ext_strings;
|
||||||
|
|
||||||
|
/* enum abi_base */
|
||||||
|
-extern const char* loongarch_abi_base_strings[];
|
||||||
|
#define ABI_BASE_LP64D 0
|
||||||
|
#define ABI_BASE_LP64F 1
|
||||||
|
#define ABI_BASE_LP64S 2
|
||||||
|
#define N_ABI_BASE_TYPES 3
|
||||||
|
+extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
|
||||||
|
+ loongarch_abi_base_strings;
|
||||||
|
|
||||||
|
#define TO_LP64_ABI_BASE(C) (C)
|
||||||
|
|
||||||
|
@@ -99,12 +99,12 @@ extern const char* loongarch_abi_base_strings[];
|
||||||
|
|
||||||
|
|
||||||
|
/* enum abi_ext */
|
||||||
|
-extern const char* loongarch_abi_ext_strings[];
|
||||||
|
#define ABI_EXT_BASE 0
|
||||||
|
#define N_ABI_EXT_TYPES 1
|
||||||
|
+extern loongarch_def_array<const char *, N_ABI_EXT_TYPES>
|
||||||
|
+ loongarch_abi_ext_strings;
|
||||||
|
|
||||||
|
/* enum cmodel */
|
||||||
|
-extern const char* loongarch_cmodel_strings[];
|
||||||
|
#define CMODEL_NORMAL 0
|
||||||
|
#define CMODEL_TINY 1
|
||||||
|
#define CMODEL_TINY_STATIC 2
|
||||||
|
@@ -112,6 +112,8 @@ extern const char* loongarch_cmodel_strings[];
|
||||||
|
#define CMODEL_LARGE 4
|
||||||
|
#define CMODEL_EXTREME 5
|
||||||
|
#define N_CMODEL_TYPES 6
|
||||||
|
+extern loongarch_def_array<const char *, N_CMODEL_TYPES>
|
||||||
|
+ loongarch_cmodel_strings;
|
||||||
|
|
||||||
|
/* enum explicit_relocs */
|
||||||
|
#define EXPLICIT_RELOCS_AUTO 0
|
||||||
|
@@ -126,7 +128,6 @@ extern const char* loongarch_cmodel_strings[];
|
||||||
|
#define M_OPT_ABSENT(opt_enum) ((opt_enum) == M_OPT_UNSET)
|
||||||
|
|
||||||
|
|
||||||
|
-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
/* Internal representation of the target. */
|
||||||
|
struct loongarch_isa
|
||||||
|
{
|
||||||
|
@@ -139,6 +140,13 @@ struct loongarch_isa
|
||||||
|
|
||||||
|
Using int64_t instead of HOST_WIDE_INT for C compatibility. */
|
||||||
|
int64_t evolution;
|
||||||
|
+
|
||||||
|
+ loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {}
|
||||||
|
+ loongarch_isa base_ (int _base) { base = _base; return *this; }
|
||||||
|
+ loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; }
|
||||||
|
+ loongarch_isa simd_ (int _simd) { simd = _simd; return *this; }
|
||||||
|
+ loongarch_isa evolution_ (int64_t _evolution)
|
||||||
|
+ { evolution = _evolution; return *this; }
|
||||||
|
};
|
||||||
|
|
||||||
|
struct loongarch_abi
|
||||||
|
@@ -156,9 +164,6 @@ struct loongarch_target
|
||||||
|
int cmodel; /* CMODEL_ */
|
||||||
|
};
|
||||||
|
|
||||||
|
-extern struct loongarch_isa loongarch_cpu_default_isa[];
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
/* CPU properties. */
|
||||||
|
/* index */
|
||||||
|
#define CPU_NATIVE 0
|
||||||
|
@@ -170,15 +175,19 @@ extern struct loongarch_isa loongarch_cpu_default_isa[];
|
||||||
|
#define N_TUNE_TYPES 5
|
||||||
|
|
||||||
|
/* parallel tables. */
|
||||||
|
-extern const char* loongarch_cpu_strings[];
|
||||||
|
-extern int loongarch_cpu_issue_rate[];
|
||||||
|
-extern int loongarch_cpu_multipass_dfa_lookahead[];
|
||||||
|
+extern loongarch_def_array<const char *, N_ARCH_TYPES>
|
||||||
|
+ loongarch_cpu_strings;
|
||||||
|
+extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES>
|
||||||
|
+ loongarch_cpu_default_isa;
|
||||||
|
+extern loongarch_def_array<int, N_TUNE_TYPES>
|
||||||
|
+ loongarch_cpu_issue_rate;
|
||||||
|
+extern loongarch_def_array<int, N_TUNE_TYPES>
|
||||||
|
+ loongarch_cpu_multipass_dfa_lookahead;
|
||||||
|
+extern loongarch_def_array<loongarch_cache, N_TUNE_TYPES>
|
||||||
|
+ loongarch_cpu_cache;
|
||||||
|
+extern loongarch_def_array<loongarch_align, N_TUNE_TYPES>
|
||||||
|
+ loongarch_cpu_align;
|
||||||
|
+extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES>
|
||||||
|
+ loongarch_cpu_rtx_cost_data;
|
||||||
|
|
||||||
|
-extern struct loongarch_cache loongarch_cpu_cache[];
|
||||||
|
-extern struct loongarch_align loongarch_cpu_align[];
|
||||||
|
-extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
|
||||||
|
-
|
||||||
|
-#ifdef __cplusplus
|
||||||
|
-}
|
||||||
|
-#endif
|
||||||
|
#endif /* LOONGARCH_DEF_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
index 390720479..45fc521e4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
@@ -163,6 +163,7 @@ loongarch_config_target (struct loongarch_target *target,
|
||||||
|
int follow_multilib_list_p)
|
||||||
|
{
|
||||||
|
struct loongarch_target t;
|
||||||
|
+
|
||||||
|
if (!target)
|
||||||
|
return;
|
||||||
|
|
||||||
|
@@ -657,12 +658,18 @@ abi_str (struct loongarch_abi abi)
|
||||||
|
strlen (loongarch_abi_base_strings[abi.base]));
|
||||||
|
else
|
||||||
|
{
|
||||||
|
+ /* This situation has not yet occurred, so in order to avoid the
|
||||||
|
+ -Warray-bounds warning during C++ syntax checking, this part
|
||||||
|
+ of the code is commented first. */
|
||||||
|
+ /*
|
||||||
|
APPEND_STRING (loongarch_abi_base_strings[abi.base])
|
||||||
|
APPEND1 ('/')
|
||||||
|
APPEND_STRING (loongarch_abi_ext_strings[abi.ext])
|
||||||
|
APPEND1 ('\0')
|
||||||
|
|
||||||
|
return XOBFINISH (&msg_obstack, const char *);
|
||||||
|
+ */
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 9b3d023ac..0dabf1551 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -21,7 +21,10 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef LOONGARCH_OPTS_H
|
||||||
|
#define LOONGARCH_OPTS_H
|
||||||
|
|
||||||
|
+/* This is a C++ header and it shouldn't be used by target libraries. */
|
||||||
|
+#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
/* Target configuration */
|
||||||
|
extern struct loongarch_target la_target;
|
||||||
|
@@ -33,7 +36,6 @@ struct loongarch_flags {
|
||||||
|
int sx[2];
|
||||||
|
};
|
||||||
|
|
||||||
|
-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
|
||||||
|
/* Initialize loongarch_target from separate option variables. */
|
||||||
|
void
|
||||||
|
@@ -54,7 +56,6 @@ void
|
||||||
|
loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
struct gcc_options *opts,
|
||||||
|
struct gcc_options *opts_set);
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Macros for common conditional expressions used in loongarch.{c,h,md} */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
index d961963f0..616b94e87 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef LOONGARCH_TUNE_H
|
||||||
|
#define LOONGARCH_TUNE_H
|
||||||
|
|
||||||
|
+#include "loongarch-def-array.h"
|
||||||
|
+
|
||||||
|
/* RTX costs of various operations on the different architectures. */
|
||||||
|
struct loongarch_rtx_cost_data
|
||||||
|
{
|
||||||
|
@@ -35,6 +37,76 @@ struct loongarch_rtx_cost_data
|
||||||
|
unsigned short int_div_di;
|
||||||
|
unsigned short branch_cost;
|
||||||
|
unsigned short memory_latency;
|
||||||
|
+
|
||||||
|
+ /* Default RTX cost initializer, implemented in loongarch-def.cc. */
|
||||||
|
+ loongarch_rtx_cost_data ();
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data fp_add_ (unsigned short _fp_add)
|
||||||
|
+ {
|
||||||
|
+ fp_add = _fp_add;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data fp_mult_sf_ (unsigned short _fp_mult_sf)
|
||||||
|
+ {
|
||||||
|
+ fp_mult_sf = _fp_mult_sf;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data fp_mult_df_ (unsigned short _fp_mult_df)
|
||||||
|
+ {
|
||||||
|
+ fp_mult_df = _fp_mult_df;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data fp_div_sf_ (unsigned short _fp_div_sf)
|
||||||
|
+ {
|
||||||
|
+ fp_div_sf = _fp_div_sf;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data fp_div_df_ (unsigned short _fp_div_df)
|
||||||
|
+ {
|
||||||
|
+ fp_div_df = _fp_div_df;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data int_mult_si_ (unsigned short _int_mult_si)
|
||||||
|
+ {
|
||||||
|
+ int_mult_si = _int_mult_si;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data int_mult_di_ (unsigned short _int_mult_di)
|
||||||
|
+ {
|
||||||
|
+ int_mult_di = _int_mult_di;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data int_div_si_ (unsigned short _int_div_si)
|
||||||
|
+ {
|
||||||
|
+ int_div_si = _int_div_si;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data int_div_di_ (unsigned short _int_div_di)
|
||||||
|
+ {
|
||||||
|
+ int_div_di = _int_div_di;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost)
|
||||||
|
+ {
|
||||||
|
+ branch_cost = _branch_cost;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency)
|
||||||
|
+ {
|
||||||
|
+ memory_latency = _memory_latency;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Costs to use when optimizing for size. */
|
||||||
|
@@ -42,10 +114,39 @@ extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size;
|
||||||
|
|
||||||
|
/* Cache size record of known processor models. */
|
||||||
|
struct loongarch_cache {
|
||||||
|
- int l1d_line_size; /* bytes */
|
||||||
|
- int l1d_size; /* KiB */
|
||||||
|
- int l2d_size; /* kiB */
|
||||||
|
- int simultaneous_prefetches; /* number of parallel prefetch */
|
||||||
|
+ int l1d_line_size; /* bytes */
|
||||||
|
+ int l1d_size; /* KiB */
|
||||||
|
+ int l2d_size; /* kiB */
|
||||||
|
+ int simultaneous_prefetches; /* number of parallel prefetch */
|
||||||
|
+
|
||||||
|
+ loongarch_cache () : l1d_line_size (0),
|
||||||
|
+ l1d_size (0),
|
||||||
|
+ l2d_size (0),
|
||||||
|
+ simultaneous_prefetches (0) {}
|
||||||
|
+
|
||||||
|
+ loongarch_cache l1d_line_size_ (int _l1d_line_size)
|
||||||
|
+ {
|
||||||
|
+ l1d_line_size = _l1d_line_size;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_cache l1d_size_ (int _l1d_size)
|
||||||
|
+ {
|
||||||
|
+ l1d_size = _l1d_size;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_cache l2d_size_ (int _l2d_size)
|
||||||
|
+ {
|
||||||
|
+ l2d_size = _l2d_size;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_cache simultaneous_prefetches_ (int _simultaneous_prefetches)
|
||||||
|
+ {
|
||||||
|
+ simultaneous_prefetches = _simultaneous_prefetches;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Alignment for functions and labels for best performance. For new uarchs
|
||||||
|
@@ -54,6 +155,20 @@ struct loongarch_cache {
|
||||||
|
struct loongarch_align {
|
||||||
|
const char *function; /* default value for -falign-functions */
|
||||||
|
const char *label; /* default value for -falign-labels */
|
||||||
|
+
|
||||||
|
+ loongarch_align () : function (nullptr), label (nullptr) {}
|
||||||
|
+
|
||||||
|
+ loongarch_align function_ (const char *_function)
|
||||||
|
+ {
|
||||||
|
+ function = _function;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_align label_ (const char *_label)
|
||||||
|
+ {
|
||||||
|
+ label = _label;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* LOONGARCH_TUNE_H */
|
||||||
|
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
|
||||||
|
index 57b1176bc..a1a40431f 100644
|
||||||
|
--- a/gcc/config/loongarch/t-loongarch
|
||||||
|
+++ b/gcc/config/loongarch/t-loongarch
|
||||||
|
@@ -64,8 +64,8 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
|
||||||
|
$(srcdir)/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||||
|
|
||||||
|
-loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
|
||||||
|
- $(CC) -c $(ALL_CFLAGS) $(INCLUDES) $<
|
||||||
|
+loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H)
|
||||||
|
+ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||||
|
|
||||||
|
$(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
|
||||||
|
s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
261
0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
Normal file
261
0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
Normal file
@ -0,0 +1,261 @@
|
|||||||
|
From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Fri, 1 Dec 2023 11:51:51 +0800
|
||||||
|
Subject: [PATCH 057/188] LoongArch: Remove the definition of ISA_BASE_LA64V110
|
||||||
|
from the code.
|
||||||
|
|
||||||
|
The instructions defined in LoongArch Reference Manual v1.1 are not the instruction
|
||||||
|
set v1.1 version. The CPU defined later may only support some instructions in
|
||||||
|
LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and
|
||||||
|
related definitions are removed here.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110.
|
||||||
|
* config/loongarch/genopts/loongarch.opt.in: Likewise.
|
||||||
|
* config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro.
|
||||||
|
(fill_native_cpu_config): Define a new variable hw_isa_evolution record the
|
||||||
|
extended instruction set support read from cpucfg.
|
||||||
|
* config/loongarch/loongarch-def.cc: Set evolution at initialization.
|
||||||
|
* config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete.
|
||||||
|
(ISA_BASE_LA64V110): Likewise.
|
||||||
|
(N_ISA_BASE_TYPES): Likewise.
|
||||||
|
(defined): Likewise.
|
||||||
|
* config/loongarch/loongarch-opts.cc: Likewise.
|
||||||
|
* config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise.
|
||||||
|
(ISA_BASE_IS_LA64V110): Likewise.
|
||||||
|
* config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
---
|
||||||
|
.../loongarch/genopts/loongarch-strings | 1 -
|
||||||
|
gcc/config/loongarch/genopts/loongarch.opt.in | 3 ---
|
||||||
|
gcc/config/loongarch/loongarch-cpu.cc | 23 +++++--------------
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 14 +++++++----
|
||||||
|
gcc/config/loongarch/loongarch-def.h | 12 ++--------
|
||||||
|
gcc/config/loongarch/loongarch-opts.cc | 3 ---
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 +---
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 1 -
|
||||||
|
gcc/config/loongarch/loongarch.opt | 3 ---
|
||||||
|
9 files changed, 19 insertions(+), 45 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
index 6c8a42af2..411ad5696 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch-strings
|
||||||
|
@@ -30,7 +30,6 @@ STR_CPU_LA664 la664
|
||||||
|
|
||||||
|
# Base architecture
|
||||||
|
STR_ISA_BASE_LA64V100 la64
|
||||||
|
-STR_ISA_BASE_LA64V110 la64v1.1
|
||||||
|
|
||||||
|
# -mfpu
|
||||||
|
OPTSTR_ISA_EXT_FPU fpu
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
index a49de07c9..cd5e75e4f 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||||
|
@@ -32,9 +32,6 @@ Basic ISAs of LoongArch:
|
||||||
|
EnumValue
|
||||||
|
Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
|
||||||
|
|
||||||
|
-EnumValue
|
||||||
|
-Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
|
||||||
|
-
|
||||||
|
;; ISA extensions / adjustments
|
||||||
|
Enum
|
||||||
|
Name(isa_ext_fpu) Type(int)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
index bbce82c9c..7e0625835 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||||
|
@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "config.h"
|
||||||
|
#include "system.h"
|
||||||
|
#include "coretypes.h"
|
||||||
|
-#include "tm.h"
|
||||||
|
#include "diagnostic-core.h"
|
||||||
|
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "loongarch-cpucfg-map.h"
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
|
||||||
|
-/* loongarch_isa_base_features defined here instead of loongarch-def.c
|
||||||
|
- because we need to use options.h. Pay attention on the order of elements
|
||||||
|
- in the initializer becaue ISO C++ does not allow C99 designated
|
||||||
|
- initializers! */
|
||||||
|
-
|
||||||
|
-#define ISA_BASE_LA64V110_FEATURES \
|
||||||
|
- (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
|
||||||
|
- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
|
||||||
|
-
|
||||||
|
-int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
|
||||||
|
- /* [ISA_BASE_LA64V100] = */ 0,
|
||||||
|
- /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES,
|
||||||
|
-};
|
||||||
|
|
||||||
|
/* Native CPU detection with "cpucfg" */
|
||||||
|
static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
|
||||||
|
@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
/* Use the native value anyways. */
|
||||||
|
preset.simd = tmp;
|
||||||
|
|
||||||
|
+
|
||||||
|
+ int64_t hw_isa_evolution = 0;
|
||||||
|
+
|
||||||
|
/* Features added during ISA evolution. */
|
||||||
|
for (const auto &entry: cpucfg_map)
|
||||||
|
if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
|
||||||
|
- preset.evolution |= entry.isa_evolution_bit;
|
||||||
|
+ hw_isa_evolution |= entry.isa_evolution_bit;
|
||||||
|
|
||||||
|
if (native_cpu_type != CPU_NATIVE)
|
||||||
|
{
|
||||||
|
/* Check if the local CPU really supports the features of the base
|
||||||
|
ISA of probed native_cpu_type. If any feature is not detected,
|
||||||
|
either GCC or the hardware is buggy. */
|
||||||
|
- auto base_isa_feature = loongarch_isa_base_features[preset.base];
|
||||||
|
- if ((preset.evolution & base_isa_feature) != base_isa_feature)
|
||||||
|
+ if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution)
|
||||||
|
warning (0,
|
||||||
|
"detected base architecture %qs, but some of its "
|
||||||
|
"features are not detected; the detected base "
|
||||||
|
@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||||
|
"features will be enabled",
|
||||||
|
loongarch_isa_base_strings[preset.base]);
|
||||||
|
}
|
||||||
|
+ preset.evolution = hw_isa_evolution;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (tune_native_p)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
index 6990c86c2..bc6997e45 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License
|
||||||
|
along with GCC; see the file COPYING3. If not see
|
||||||
|
<http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
+#include "config.h"
|
||||||
|
+#include "system.h"
|
||||||
|
+#include "coretypes.h"
|
||||||
|
+#include "tm.h"
|
||||||
|
+
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
|
||||||
|
@@ -51,9 +56,11 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
|
||||||
|
.simd_ (ISA_EXT_SIMD_LASX))
|
||||||
|
.set (CPU_LA664,
|
||||||
|
loongarch_isa ()
|
||||||
|
- .base_ (ISA_BASE_LA64V110)
|
||||||
|
+ .base_ (ISA_BASE_LA64V100)
|
||||||
|
.fpu_ (ISA_EXT_FPU64)
|
||||||
|
- .simd_ (ISA_EXT_SIMD_LASX));
|
||||||
|
+ .simd_ (ISA_EXT_SIMD_LASX)
|
||||||
|
+ .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
|
||||||
|
+ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
|
||||||
|
|
||||||
|
static inline loongarch_cache la464_cache ()
|
||||||
|
{
|
||||||
|
@@ -136,8 +143,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
|
||||||
|
|
||||||
|
array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
|
||||||
|
array<const char *, N_ISA_BASE_TYPES> ()
|
||||||
|
- .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100)
|
||||||
|
- .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110);
|
||||||
|
+ .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100);
|
||||||
|
|
||||||
|
array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
|
||||||
|
array<const char *, N_ISA_EXT_TYPES> ()
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
index 5ac70dfdd..f8f36f0e2 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||||
|
@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
/* enum isa_base */
|
||||||
|
|
||||||
|
/* LoongArch V1.00. */
|
||||||
|
-#define ISA_BASE_LA64V100 0
|
||||||
|
-/* LoongArch V1.10. */
|
||||||
|
-#define ISA_BASE_LA64V110 1
|
||||||
|
-#define N_ISA_BASE_TYPES 2
|
||||||
|
+#define ISA_BASE_LA64V100 0
|
||||||
|
+#define N_ISA_BASE_TYPES 1
|
||||||
|
extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
|
||||||
|
loongarch_isa_base_strings;
|
||||||
|
|
||||||
|
-#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
-/* Unlike other arrays, this is defined in loongarch-cpu.cc. The problem is
|
||||||
|
- we cannot use the C++ header options.h in loongarch-def.c. */
|
||||||
|
-extern int64_t loongarch_isa_base_features[];
|
||||||
|
-#endif
|
||||||
|
-
|
||||||
|
/* enum isa_ext_* */
|
||||||
|
#define ISA_EXT_NONE 0
|
||||||
|
#define ISA_EXT_FPU32 1
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
index 45fc521e4..d31becc67 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||||
|
@@ -285,9 +285,6 @@ config_target_isa:
|
||||||
|
/* Get default ISA from "-march" or its default value. */
|
||||||
|
t.isa = loongarch_cpu_default_isa[t.cpu_arch];
|
||||||
|
|
||||||
|
- if (t.cpu_arch != CPU_NATIVE)
|
||||||
|
- t.isa.evolution |= loongarch_isa_base_features[t.isa.base];
|
||||||
|
-
|
||||||
|
/* Apply incremental changes. */
|
||||||
|
/* "-march=native" overrides the default FPU type. */
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 0dabf1551..7010ddfec 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -77,8 +77,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
#define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64)
|
||||||
|
#define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D)
|
||||||
|
|
||||||
|
-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \
|
||||||
|
- || la_target.isa.base == ISA_BASE_LA64V110)
|
||||||
|
+#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100)
|
||||||
|
#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \
|
||||||
|
|| la_target.abi.base == ABI_BASE_LP64F \
|
||||||
|
|| la_target.abi.base == ABI_BASE_LP64S)
|
||||||
|
@@ -90,7 +89,6 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
/* TARGET_ macros for use in *.md template conditionals */
|
||||||
|
#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464)
|
||||||
|
#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664)
|
||||||
|
-#define ISA_BASE_IS_LA64V110 (la_target.isa.base == ISA_BASE_LA64V110)
|
||||||
|
|
||||||
|
/* Note: optimize_size may vary across functions,
|
||||||
|
while -m[no]-memcpy imposes a global constraint. */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index 0fee9abe5..7144bbe28 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -33,7 +33,6 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_CPU_LA664 "la664"
|
||||||
|
|
||||||
|
#define STR_ISA_BASE_LA64V100 "la64"
|
||||||
|
-#define STR_ISA_BASE_LA64V110 "la64v1.1"
|
||||||
|
|
||||||
|
#define OPTSTR_ISA_EXT_FPU "fpu"
|
||||||
|
#define STR_NONE "none"
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index ea0d5bb4e..7fe36feb9 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -40,9 +40,6 @@ Basic ISAs of LoongArch:
|
||||||
|
EnumValue
|
||||||
|
Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
|
||||||
|
|
||||||
|
-EnumValue
|
||||||
|
-Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110)
|
||||||
|
-
|
||||||
|
;; ISA extensions / adjustments
|
||||||
|
Enum
|
||||||
|
Name(isa_ext_fpu) Type(int)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
412
0058-LoongArch-Add-support-for-xorsign.patch
Normal file
412
0058-LoongArch-Add-support-for-xorsign.patch
Normal file
@ -0,0 +1,412 @@
|
|||||||
|
From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Fri, 17 Nov 2023 17:00:21 +0800
|
||||||
|
Subject: [PATCH 058/188] LoongArch: Add support for xorsign.
|
||||||
|
|
||||||
|
This patch adds support for xorsign pattern to scalar fp and vector. With the
|
||||||
|
new expands, uniformly using vector bitwise logical operations to handle xorsign.
|
||||||
|
|
||||||
|
On LoongArch64, floating-point registers and vector registers share the same register,
|
||||||
|
so this patch also allows conversion between LSX vector mode and scalar fp mode to
|
||||||
|
avoid unnecessary instruction generation.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (xorsign<mode>3): New expander.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow
|
||||||
|
conversion between LSX vector mode and scalar fp mode.
|
||||||
|
* config/loongarch/loongarch.md (@xorsign<mode>3): New expander.
|
||||||
|
* config/loongarch/lsx.md (@xorsign<mode>3): Ditto.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test.
|
||||||
|
* gcc.target/loongarch/xorsign-run.c: New test.
|
||||||
|
* gcc.target/loongarch/xorsign.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 22 +++++--
|
||||||
|
gcc/config/loongarch/loongarch.cc | 5 ++
|
||||||
|
gcc/config/loongarch/loongarch.md | 17 ++++++
|
||||||
|
gcc/config/loongarch/lsx.md | 23 +++++--
|
||||||
|
.../loongarch/vector/lasx/lasx-xorsign-run.c | 60 +++++++++++++++++++
|
||||||
|
.../loongarch/vector/lasx/lasx-xorsign.c | 19 ++++++
|
||||||
|
.../loongarch/vector/lsx/lsx-xorsign-run.c | 60 +++++++++++++++++++
|
||||||
|
.../loongarch/vector/lsx/lsx-xorsign.c | 19 ++++++
|
||||||
|
.../gcc.target/loongarch/xorsign-run.c | 25 ++++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/xorsign.c | 18 ++++++
|
||||||
|
10 files changed, 260 insertions(+), 8 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 116b30c07..de7c88f14 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -1065,10 +1065,10 @@
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "xor<mode>3"
|
||||||
|
- [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
|
||||||
|
- (xor:ILASX
|
||||||
|
- (match_operand:ILASX 1 "register_operand" "f,f,f")
|
||||||
|
- (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
|
||||||
|
+ [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
|
||||||
|
+ (xor:LASX
|
||||||
|
+ (match_operand:LASX 1 "register_operand" "f,f,f")
|
||||||
|
+ (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
"@
|
||||||
|
xvxor.v\t%u0,%u1,%u2
|
||||||
|
@@ -3061,6 +3061,20 @@
|
||||||
|
operands[5] = gen_reg_rtx (<MODE>mode);
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_expand "xorsign<mode>3"
|
||||||
|
+ [(set (match_dup 4)
|
||||||
|
+ (and:FLASX (match_dup 3)
|
||||||
|
+ (match_operand:FLASX 2 "register_operand")))
|
||||||
|
+ (set (match_operand:FLASX 0 "register_operand")
|
||||||
|
+ (xor:FLASX (match_dup 4)
|
||||||
|
+ (match_operand:FLASX 1 "register_operand")))]
|
||||||
|
+ "ISA_HAS_LASX"
|
||||||
|
+{
|
||||||
|
+ operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
|
||||||
|
+
|
||||||
|
+ operands[4] = gen_reg_rtx (<MODE>mode);
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
|
||||||
|
(define_insn "absv4df2"
|
||||||
|
[(set (match_operand:V4DF 0 "register_operand" "=f")
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 3ef7e3605..3c8ae9a42 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to,
|
||||||
|
if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
+ /* Allow conversion between LSX vector mode and scalar fp mode. */
|
||||||
|
+ if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to))
|
||||||
|
+ || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))))
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
return !reg_classes_intersect_p (FP_REGS, rclass);
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index cfd7a8ec6..afc3c591f 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -1164,6 +1164,23 @@
|
||||||
|
"fcopysign.<fmt>\t%0,%1,%2"
|
||||||
|
[(set_attr "type" "fcopysign")
|
||||||
|
(set_attr "mode" "<UNITMODE>")])
|
||||||
|
+
|
||||||
|
+(define_expand "@xorsign<mode>3"
|
||||||
|
+ [(match_operand:ANYF 0 "register_operand")
|
||||||
|
+ (match_operand:ANYF 1 "register_operand")
|
||||||
|
+ (match_operand:ANYF 2 "register_operand")]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ machine_mode lsx_mode
|
||||||
|
+ = <MODE>mode == SFmode ? V4SFmode : V2DFmode;
|
||||||
|
+ rtx tmp = gen_reg_rtx (lsx_mode);
|
||||||
|
+ rtx op1 = lowpart_subreg (lsx_mode, operands[1], <MODE>mode);
|
||||||
|
+ rtx op2 = lowpart_subreg (lsx_mode, operands[2], <MODE>mode);
|
||||||
|
+ emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2));
|
||||||
|
+ emit_move_insn (operands[0],
|
||||||
|
+ lowpart_subreg (<MODE>mode, tmp, lsx_mode));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; ....................
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 232399934..ce6ec6d69 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -957,10 +957,10 @@
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_insn "xor<mode>3"
|
||||||
|
- [(set (match_operand:ILSX 0 "register_operand" "=f,f,f")
|
||||||
|
- (xor:ILSX
|
||||||
|
- (match_operand:ILSX 1 "register_operand" "f,f,f")
|
||||||
|
- (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
|
||||||
|
+ [(set (match_operand:LSX 0 "register_operand" "=f,f,f")
|
||||||
|
+ (xor:LSX
|
||||||
|
+ (match_operand:LSX 1 "register_operand" "f,f,f")
|
||||||
|
+ (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
"@
|
||||||
|
vxor.v\t%w0,%w1,%w2
|
||||||
|
@@ -2786,6 +2786,21 @@
|
||||||
|
operands[5] = gen_reg_rtx (<MODE>mode);
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_expand "@xorsign<mode>3"
|
||||||
|
+ [(set (match_dup 4)
|
||||||
|
+ (and:FLSX (match_dup 3)
|
||||||
|
+ (match_operand:FLSX 2 "register_operand")))
|
||||||
|
+ (set (match_operand:FLSX 0 "register_operand")
|
||||||
|
+ (xor:FLSX (match_dup 4)
|
||||||
|
+ (match_operand:FLSX 1 "register_operand")))]
|
||||||
|
+ "ISA_HAS_LSX"
|
||||||
|
+{
|
||||||
|
+ operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
|
||||||
|
+
|
||||||
|
+ operands[4] = gen_reg_rtx (<MODE>mode);
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+
|
||||||
|
(define_insn "absv2df2"
|
||||||
|
[(set (match_operand:V2DF 0 "register_operand" "=f")
|
||||||
|
(abs:V2DF (match_operand:V2DF 1 "register_operand" "f")))]
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..2295503d4
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
|
||||||
|
@@ -0,0 +1,60 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -mlasx" } */
|
||||||
|
+/* { dg-require-effective-target loongarch_asx_hw } */
|
||||||
|
+
|
||||||
|
+#include "lasx-xorsign.c"
|
||||||
|
+
|
||||||
|
+extern void abort ();
|
||||||
|
+
|
||||||
|
+#define N 16
|
||||||
|
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
|
||||||
|
+ -12.5f, -15.6f, -18.7f, -21.8f,
|
||||||
|
+ 24.9f, 27.1f, 30.2f, 33.3f,
|
||||||
|
+ 36.4f, 39.5f, 42.6f, 45.7f};
|
||||||
|
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
|
||||||
|
+ -9.0f, 1.0f, -2.0f, 3.0f,
|
||||||
|
+ -4.0f, -5.0f, 6.0f, 7.0f,
|
||||||
|
+ -8.0f, -9.0f, 10.0f, 11.0f};
|
||||||
|
+float r[N];
|
||||||
|
+
|
||||||
|
+double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d,
|
||||||
|
+ -12.5d, -15.6d, -18.7d, -21.8d,
|
||||||
|
+ 24.9d, 27.1d, 30.2d, 33.3d,
|
||||||
|
+ 36.4d, 39.5d, 42.6d, 45.7d};
|
||||||
|
+double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d,
|
||||||
|
+ -9.0d, 1.0d, -2.0d, 3.0d,
|
||||||
|
+ -4.0d, -5.0d, 6.0d, 7.0d,
|
||||||
|
+ -8.0d, -9.0d, 10.0d, 11.0d};
|
||||||
|
+double rd[N];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+__attribute__ ((optimize ("-O0")))
|
||||||
|
+check_xorsignf (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
|
||||||
|
+ abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+__attribute__ ((optimize ("-O0")))
|
||||||
|
+check_xorsign (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
|
||||||
|
+ abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main (void)
|
||||||
|
+{
|
||||||
|
+ my_xorsignf (r, a, b, N);
|
||||||
|
+ /* check results: */
|
||||||
|
+ check_xorsignf ();
|
||||||
|
+
|
||||||
|
+ my_xorsign (rd, ad, bd, N);
|
||||||
|
+ /* check results: */
|
||||||
|
+ check_xorsign ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..190a9239b
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
|
||||||
|
@@ -0,0 +1,19 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvand\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvxor\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "xvfmul" } } */
|
||||||
|
+
|
||||||
|
+double
|
||||||
|
+my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < n; i++)
|
||||||
|
+ a[i] = b[i] * __builtin_copysign (1.0d, c[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+float
|
||||||
|
+my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < n; i++)
|
||||||
|
+ a[i] = b[i] * __builtin_copysignf (1.0f, c[i]);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..22c5c03cc
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
|
||||||
|
@@ -0,0 +1,60 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -mlsx" } */
|
||||||
|
+/* { dg-require-effective-target loongarch_sx_hw } */
|
||||||
|
+
|
||||||
|
+#include "lsx-xorsign.c"
|
||||||
|
+
|
||||||
|
+extern void abort ();
|
||||||
|
+
|
||||||
|
+#define N 16
|
||||||
|
+float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
|
||||||
|
+ -12.5f, -15.6f, -18.7f, -21.8f,
|
||||||
|
+ 24.9f, 27.1f, 30.2f, 33.3f,
|
||||||
|
+ 36.4f, 39.5f, 42.6f, 45.7f};
|
||||||
|
+float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
|
||||||
|
+ -9.0f, 1.0f, -2.0f, 3.0f,
|
||||||
|
+ -4.0f, -5.0f, 6.0f, 7.0f,
|
||||||
|
+ -8.0f, -9.0f, 10.0f, 11.0f};
|
||||||
|
+float r[N];
|
||||||
|
+
|
||||||
|
+double ad[N] = {-0.1d, -3.2d, -6.3d, -9.4d,
|
||||||
|
+ -12.5d, -15.6d, -18.7d, -21.8d,
|
||||||
|
+ 24.9d, 27.1d, 30.2d, 33.3d,
|
||||||
|
+ 36.4d, 39.5d, 42.6d, 45.7d};
|
||||||
|
+double bd[N] = {-1.2d, 3.4d, -5.6d, 7.8d,
|
||||||
|
+ -9.0d, 1.0d, -2.0d, 3.0d,
|
||||||
|
+ -4.0d, -5.0d, 6.0d, 7.0d,
|
||||||
|
+ -8.0d, -9.0d, 10.0d, 11.0d};
|
||||||
|
+double rd[N];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+__attribute__ ((optimize ("-O0")))
|
||||||
|
+check_xorsignf (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
|
||||||
|
+ abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+__attribute__ ((optimize ("-O0")))
|
||||||
|
+check_xorsign (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < N; i++)
|
||||||
|
+ if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
|
||||||
|
+ abort ();
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main (void)
|
||||||
|
+{
|
||||||
|
+ my_xorsignf (r, a, b, N);
|
||||||
|
+ /* check results: */
|
||||||
|
+ check_xorsignf ();
|
||||||
|
+
|
||||||
|
+ my_xorsign (rd, ad, bd, N);
|
||||||
|
+ /* check results: */
|
||||||
|
+ check_xorsign ();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..c2694c11e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
|
||||||
|
@@ -0,0 +1,19 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -ftree-vectorize -mlsx" } */
|
||||||
|
+/* { dg-final { scan-assembler "vand\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vxor\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "vfmul" } } */
|
||||||
|
+
|
||||||
|
+double
|
||||||
|
+my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < n; i++)
|
||||||
|
+ a[i] = b[i] * __builtin_copysign (1.0d, c[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+float
|
||||||
|
+my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < n; i++)
|
||||||
|
+ a[i] = b[i] * __builtin_copysignf (1.0f, c[i]);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..b4f28adf8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c
|
||||||
|
@@ -0,0 +1,25 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2 -mlsx" } */
|
||||||
|
+/* { dg-require-effective-target loongarch_sx_hw } */
|
||||||
|
+
|
||||||
|
+extern void abort(void);
|
||||||
|
+
|
||||||
|
+static double x = 2.0;
|
||||||
|
+static float y = 2.0;
|
||||||
|
+
|
||||||
|
+int main()
|
||||||
|
+{
|
||||||
|
+ if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5)
|
||||||
|
+ abort();
|
||||||
|
+
|
||||||
|
+ if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5)
|
||||||
|
+ abort();
|
||||||
|
+
|
||||||
|
+ if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5)
|
||||||
|
+ abort();
|
||||||
|
+
|
||||||
|
+ if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5)
|
||||||
|
+ abort();
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign.c b/gcc/testsuite/gcc.target/loongarch/xorsign.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..ca80603d4
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/xorsign.c
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx" } */
|
||||||
|
+/* { dg-final { scan-assembler "vand\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vxor\\.v" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "fcopysign" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "fmul" } } */
|
||||||
|
+
|
||||||
|
+double
|
||||||
|
+my_xorsign (double a, double b)
|
||||||
|
+{
|
||||||
|
+ return a * __builtin_copysign (1.0d, b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+float
|
||||||
|
+my_xorsignf (float a, float b)
|
||||||
|
+{
|
||||||
|
+ return a * __builtin_copysignf (1.0f, b);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
730
0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
Normal file
730
0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
Normal file
@ -0,0 +1,730 @@
|
|||||||
|
From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 6 Dec 2023 15:04:49 +0800
|
||||||
|
Subject: [PATCH 059/188] LoongArch: Add support for LoongArch V1.1 approximate
|
||||||
|
instructions.
|
||||||
|
|
||||||
|
This patch adds define_insn/builtins/intrinsics for these instructions, and add option
|
||||||
|
-mfrecipe to control instruction generation.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/genopts/isa-evolution.in (fecipe): Add.
|
||||||
|
* config/loongarch/larchintrin.h (__frecipe_s): New intrinsic.
|
||||||
|
(__frecipe_d): Ditto.
|
||||||
|
(__frsqrte_s): Ditto.
|
||||||
|
(__frsqrte_d): Ditto.
|
||||||
|
* config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern.
|
||||||
|
(lasx_xvfrsqrte_<flasxfmt>): Ditto.
|
||||||
|
* config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic.
|
||||||
|
(__lasx_xvfrecipe_d): Ditto.
|
||||||
|
(__lasx_xvfrsqrte_s): Ditto.
|
||||||
|
(__lasx_xvfrsqrte_d): Ditto.
|
||||||
|
* config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates.
|
||||||
|
(LSX_EXT_BUILTIN): New macro.
|
||||||
|
(LASX_EXT_BUILTIN): Ditto.
|
||||||
|
* config/loongarch/loongarch-cpucfg-map.h: Regenerate.
|
||||||
|
* config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe".
|
||||||
|
* config/loongarch/loongarch-def.cc: Regenerate.
|
||||||
|
* config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE.
|
||||||
|
* config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern.
|
||||||
|
(loongarch_frsqrte_<fmt>): Ditto.
|
||||||
|
* config/loongarch/loongarch.opt: Regenerate.
|
||||||
|
* config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern.
|
||||||
|
(lsx_vfrsqrte_<flsxfmt>): Ditto.
|
||||||
|
* config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic.
|
||||||
|
(__lsx_vfrecipe_d): Ditto.
|
||||||
|
(__lsx_vfrsqrte_s): Ditto.
|
||||||
|
(__lsx_vfrsqrte_d): Ditto.
|
||||||
|
* doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/larch-frecipe-builtin.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/genopts/isa-evolution.in | 1 +
|
||||||
|
gcc/config/loongarch/larchintrin.h | 38 +++++++++++++++++
|
||||||
|
gcc/config/loongarch/lasx.md | 24 +++++++++++
|
||||||
|
gcc/config/loongarch/lasxintrin.h | 34 +++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-builtins.cc | 42 +++++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-c.cc | 3 ++
|
||||||
|
gcc/config/loongarch/loongarch-cpucfg-map.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 3 +-
|
||||||
|
gcc/config/loongarch/loongarch-str.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.md | 35 +++++++++++++++-
|
||||||
|
gcc/config/loongarch/loongarch.opt | 4 ++
|
||||||
|
gcc/config/loongarch/lsx.md | 24 +++++++++++
|
||||||
|
gcc/config/loongarch/lsxintrin.h | 34 +++++++++++++++
|
||||||
|
gcc/doc/extend.texi | 35 ++++++++++++++++
|
||||||
|
.../loongarch/larch-frecipe-builtin.c | 28 +++++++++++++
|
||||||
|
.../vector/lasx/lasx-frecipe-builtin.c | 30 +++++++++++++
|
||||||
|
.../vector/lsx/lsx-frecipe-builtin.c | 30 +++++++++++++
|
||||||
|
18 files changed, 365 insertions(+), 3 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
index a6bc3f87f..11a198b64 100644
|
||||||
|
--- a/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
+++ b/gcc/config/loongarch/genopts/isa-evolution.in
|
||||||
|
@@ -1,3 +1,4 @@
|
||||||
|
+2 25 frecipe Support frecipe.{s/d} and frsqrte.{s/d} instructions.
|
||||||
|
2 26 div32 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
2 27 lam-bh Support am{swap/add}[_db].{b/h} instructions.
|
||||||
|
2 28 lamcas Support amcas[_db].{b/h/w/d} instructions.
|
||||||
|
diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
|
||||||
|
index 2833f1487..22035e767 100644
|
||||||
|
--- a/gcc/config/loongarch/larchintrin.h
|
||||||
|
+++ b/gcc/config/loongarch/larchintrin.h
|
||||||
|
@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifdef __loongarch_frecipe
|
||||||
|
+/* Assembly instruction format: fd, fj. */
|
||||||
|
+/* Data types in instruction templates: SF, SF. */
|
||||||
|
+extern __inline void
|
||||||
|
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__frecipe_s (float _1)
|
||||||
|
+{
|
||||||
|
+ __builtin_loongarch_frecipe_s ((float) _1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: fd, fj. */
|
||||||
|
+/* Data types in instruction templates: DF, DF. */
|
||||||
|
+extern __inline void
|
||||||
|
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__frecipe_d (double _1)
|
||||||
|
+{
|
||||||
|
+ __builtin_loongarch_frecipe_d ((double) _1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: fd, fj. */
|
||||||
|
+/* Data types in instruction templates: SF, SF. */
|
||||||
|
+extern __inline void
|
||||||
|
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__frsqrte_s (float _1)
|
||||||
|
+{
|
||||||
|
+ __builtin_loongarch_frsqrte_s ((float) _1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: fd, fj. */
|
||||||
|
+/* Data types in instruction templates: DF, DF. */
|
||||||
|
+extern __inline void
|
||||||
|
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__frsqrte_d (double _1)
|
||||||
|
+{
|
||||||
|
+ __builtin_loongarch_frsqrte_d ((double) _1);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Assembly instruction format: ui15. */
|
||||||
|
/* Data types in instruction templates: USI. */
|
||||||
|
#define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1))
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index de7c88f14..b1416f6c3 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -40,8 +40,10 @@
|
||||||
|
UNSPEC_LASX_XVFCVTL
|
||||||
|
UNSPEC_LASX_XVFLOGB
|
||||||
|
UNSPEC_LASX_XVFRECIP
|
||||||
|
+ UNSPEC_LASX_XVFRECIPE
|
||||||
|
UNSPEC_LASX_XVFRINT
|
||||||
|
UNSPEC_LASX_XVFRSQRT
|
||||||
|
+ UNSPEC_LASX_XVFRSQRTE
|
||||||
|
UNSPEC_LASX_XVFCMP_SAF
|
||||||
|
UNSPEC_LASX_XVFCMP_SEQ
|
||||||
|
UNSPEC_LASX_XVFCMP_SLE
|
||||||
|
@@ -1633,6 +1635,17 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; Approximate Reciprocal Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "lasx_xvfrecipe_<flasxfmt>"
|
||||||
|
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
|
||||||
|
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LASX_XVFRECIPE))]
|
||||||
|
+ "ISA_HAS_LASX && TARGET_FRECIPE"
|
||||||
|
+ "xvfrecipe.<flasxfmt>\t%u0,%u1"
|
||||||
|
+ [(set_attr "type" "simd_fdiv")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
(define_insn "lasx_xvfrsqrt_<flasxfmt>"
|
||||||
|
[(set (match_operand:FLASX 0 "register_operand" "=f")
|
||||||
|
(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
@@ -1642,6 +1655,17 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; Approximate Reciprocal Square Root Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "lasx_xvfrsqrte_<flasxfmt>"
|
||||||
|
+ [(set (match_operand:FLASX 0 "register_operand" "=f")
|
||||||
|
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LASX_XVFRSQRTE))]
|
||||||
|
+ "ISA_HAS_LASX && TARGET_FRECIPE"
|
||||||
|
+ "xvfrsqrte.<flasxfmt>\t%u0,%u1"
|
||||||
|
+ [(set_attr "type" "simd_fdiv")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
(define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>"
|
||||||
|
[(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
|
||||||
|
(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
|
||||||
|
index 7bce2c757..5e65e76e7 100644
|
||||||
|
--- a/gcc/config/loongarch/lasxintrin.h
|
||||||
|
+++ b/gcc/config/loongarch/lasxintrin.h
|
||||||
|
@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1)
|
||||||
|
return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
+#if defined(__loongarch_frecipe)
|
||||||
|
+/* Assembly instruction format: xd, xj. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m256 __lasx_xvfrecipe_s (__m256 _1)
|
||||||
|
+{
|
||||||
|
+ return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: xd, xj. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m256d __lasx_xvfrecipe_d (__m256d _1)
|
||||||
|
+{
|
||||||
|
+ return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: xd, xj. */
|
||||||
|
+/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m256 __lasx_xvfrsqrte_s (__m256 _1)
|
||||||
|
+{
|
||||||
|
+ return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: xd, xj. */
|
||||||
|
+/* Data types in instruction templates: V4DF, V4DF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m256d __lasx_xvfrsqrte_d (__m256d _1)
|
||||||
|
+{
|
||||||
|
+ return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Assembly instruction format: xd, xj. */
|
||||||
|
/* Data types in instruction templates: V8SF, V8SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
index f4523c8bf..bc156bd36 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
@@ -120,6 +120,9 @@ struct loongarch_builtin_description
|
||||||
|
AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
|
||||||
|
AVAIL_ALL (lsx, ISA_HAS_LSX)
|
||||||
|
AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
+AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI)
|
||||||
|
+AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE)
|
||||||
|
+AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
|
||||||
|
|
||||||
|
/* Construct a loongarch_builtin_description from the given arguments.
|
||||||
|
|
||||||
|
@@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
"__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \
|
||||||
|
FUNCTION_TYPE, loongarch_builtin_avail_lsx }
|
||||||
|
|
||||||
|
+ /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN>
|
||||||
|
+ for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description
|
||||||
|
+ field. AVAIL is the name of the availability predicate, without the leading
|
||||||
|
+ loongarch_builtin_avail_. */
|
||||||
|
+#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
|
||||||
|
+ { CODE_FOR_lsx_ ## INSN, \
|
||||||
|
+ "__builtin_lsx_" #INSN, LARCH_BUILTIN_DIRECT, \
|
||||||
|
+ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
|
||||||
|
+
|
||||||
|
|
||||||
|
/* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_<INSN>
|
||||||
|
for instruction CODE_FOR_lsx_<INSN>. FUNCTION_TYPE is a builtin_description
|
||||||
|
@@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
|
||||||
|
"__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \
|
||||||
|
FUNCTION_TYPE, loongarch_builtin_avail_lasx }
|
||||||
|
|
||||||
|
+/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_<INSN>
|
||||||
|
+ for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description
|
||||||
|
+ field. AVAIL is the name of the availability predicate, without the leading
|
||||||
|
+ loongarch_builtin_avail_. */
|
||||||
|
+#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL) \
|
||||||
|
+ { CODE_FOR_lasx_ ## INSN, \
|
||||||
|
+ "__builtin_lasx_" #INSN, LARCH_BUILTIN_LASX, \
|
||||||
|
+ FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
|
||||||
|
+
|
||||||
|
/* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_<INSN>
|
||||||
|
for instruction CODE_FOR_lasx_<INSN>. FUNCTION_TYPE is a builtin_description
|
||||||
|
field. */
|
||||||
|
@@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
|
||||||
|
DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default),
|
||||||
|
DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default),
|
||||||
|
|
||||||
|
+ /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}. */
|
||||||
|
+
|
||||||
|
+ DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe),
|
||||||
|
+ DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe),
|
||||||
|
+ DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe),
|
||||||
|
+ DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe),
|
||||||
|
+
|
||||||
|
+ /* Built-in functions for new LSX instructions. */
|
||||||
|
+
|
||||||
|
+ LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
|
||||||
|
+ LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
|
||||||
|
+ LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
|
||||||
|
+ LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
|
||||||
|
+
|
||||||
|
+ /* Built-in functions for new LASX instructions. */
|
||||||
|
+
|
||||||
|
+ LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
|
||||||
|
+ LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
|
||||||
|
+ LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
|
||||||
|
+ LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
|
||||||
|
+
|
||||||
|
/* Built-in functions for LSX. */
|
||||||
|
LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI),
|
||||||
|
LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI),
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
|
||||||
|
index 76c8ea8db..a89477a74 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-c.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-c.cc
|
||||||
|
@@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
|
||||||
|
else
|
||||||
|
builtin_define ("__loongarch_frlen=0");
|
||||||
|
|
||||||
|
+ if (TARGET_HARD_FLOAT && TARGET_FRECIPE)
|
||||||
|
+ builtin_define ("__loongarch_frecipe");
|
||||||
|
+
|
||||||
|
if (ISA_HAS_LSX)
|
||||||
|
{
|
||||||
|
builtin_define ("__loongarch_simd");
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
index 02ff16712..148333c24 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
|
||||||
|
@@ -29,6 +29,7 @@ static constexpr struct {
|
||||||
|
unsigned int cpucfg_bit;
|
||||||
|
HOST_WIDE_INT isa_evolution_bit;
|
||||||
|
} cpucfg_map[] = {
|
||||||
|
+ { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE },
|
||||||
|
{ 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
|
||||||
|
{ 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
|
||||||
|
{ 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
index bc6997e45..c41804a18 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -60,7 +60,8 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
|
||||||
|
.fpu_ (ISA_EXT_FPU64)
|
||||||
|
.simd_ (ISA_EXT_SIMD_LASX)
|
||||||
|
.evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
|
||||||
|
- | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
|
||||||
|
+ | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
|
||||||
|
+ | OPTION_MASK_ISA_FRECIPE));
|
||||||
|
|
||||||
|
static inline loongarch_cache la464_cache ()
|
||||||
|
{
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
index 7144bbe28..a8821acb0 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-str.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||||
|
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#define STR_EXPLICIT_RELOCS_NONE "none"
|
||||||
|
#define STR_EXPLICIT_RELOCS_ALWAYS "always"
|
||||||
|
|
||||||
|
+#define OPTSTR_FRECIPE "frecipe"
|
||||||
|
#define OPTSTR_DIV32 "div32"
|
||||||
|
#define OPTSTR_LAM_BH "lam-bh"
|
||||||
|
#define OPTSTR_LAMCAS "lamcas"
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 3c8ae9a42..ce1c0a8bd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void)
|
||||||
|
loongarch_cpu_strings [la_target.cpu_tune]);
|
||||||
|
fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
|
||||||
|
loongarch_isa_base_strings [la_target.isa.base]);
|
||||||
|
+ DUMP_FEATURE (TARGET_FRECIPE);
|
||||||
|
DUMP_FEATURE (TARGET_DIV32);
|
||||||
|
DUMP_FEATURE (TARGET_LAM_BH);
|
||||||
|
DUMP_FEATURE (TARGET_LAMCAS);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index afc3c591f..9080cec1c 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -59,6 +59,12 @@
|
||||||
|
;; Stack tie
|
||||||
|
UNSPEC_TIE
|
||||||
|
|
||||||
|
+ ;; RSQRT
|
||||||
|
+ UNSPEC_RSQRTE
|
||||||
|
+
|
||||||
|
+ ;; RECIP
|
||||||
|
+ UNSPEC_RECIPE
|
||||||
|
+
|
||||||
|
;; CRC
|
||||||
|
UNSPEC_CRC
|
||||||
|
UNSPEC_CRCC
|
||||||
|
@@ -220,6 +226,7 @@
|
||||||
|
;; fmadd floating point multiply-add
|
||||||
|
;; fdiv floating point divide
|
||||||
|
;; frdiv floating point reciprocal divide
|
||||||
|
+;; frecipe floating point approximate reciprocal
|
||||||
|
;; fabs floating point absolute value
|
||||||
|
;; flogb floating point exponent extract
|
||||||
|
;; fneg floating point negation
|
||||||
|
@@ -229,6 +236,7 @@
|
||||||
|
;; fscaleb floating point scale
|
||||||
|
;; fsqrt floating point square root
|
||||||
|
;; frsqrt floating point reciprocal square root
|
||||||
|
+;; frsqrte floating point approximate reciprocal square root
|
||||||
|
;; multi multiword sequence (or user asm statements)
|
||||||
|
;; atomic atomic memory update instruction
|
||||||
|
;; syncloop memory atomic operation implemented as a sync loop
|
||||||
|
@@ -238,8 +246,8 @@
|
||||||
|
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||||
|
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||||
|
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||||
|
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
|
||||||
|
- fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost,
|
||||||
|
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
|
||||||
|
+ fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost,
|
||||||
|
simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
|
||||||
|
simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
|
||||||
|
simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
|
||||||
|
@@ -908,6 +916,18 @@
|
||||||
|
[(set_attr "type" "frdiv")
|
||||||
|
(set_attr "mode" "<UNITMODE>")])
|
||||||
|
|
||||||
|
+;; Approximate Reciprocal Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "loongarch_frecipe_<fmt>"
|
||||||
|
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||||
|
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_RECIPE))]
|
||||||
|
+ "TARGET_FRECIPE"
|
||||||
|
+ "frecipe.<fmt>\t%0,%1"
|
||||||
|
+ [(set_attr "type" "frecipe")
|
||||||
|
+ (set_attr "mode" "<UNITMODE>")
|
||||||
|
+ (set_attr "insn_count" "1")])
|
||||||
|
+
|
||||||
|
;; Integer division and modulus.
|
||||||
|
(define_expand "<optab><mode>3"
|
||||||
|
[(set (match_operand:GPR 0 "register_operand")
|
||||||
|
@@ -1133,6 +1153,17 @@
|
||||||
|
[(set_attr "type" "frsqrt")
|
||||||
|
(set_attr "mode" "<UNITMODE>")
|
||||||
|
(set_attr "insn_count" "1")])
|
||||||
|
+
|
||||||
|
+;; Approximate Reciprocal Square Root Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "loongarch_frsqrte_<fmt>"
|
||||||
|
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||||
|
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_RSQRTE))]
|
||||||
|
+ "TARGET_FRECIPE"
|
||||||
|
+ "frsqrte.<fmt>\t%0,%1"
|
||||||
|
+ [(set_attr "type" "frsqrte")
|
||||||
|
+ (set_attr "mode" "<UNITMODE>")])
|
||||||
|
|
||||||
|
;;
|
||||||
|
;; ....................
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||||
|
index 7fe36feb9..e7bc8bed4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.opt
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.opt
|
||||||
|
@@ -260,6 +260,10 @@ default value is 4.
|
||||||
|
Variable
|
||||||
|
HOST_WIDE_INT isa_evolution = 0
|
||||||
|
|
||||||
|
+mfrecipe
|
||||||
|
+Target Mask(ISA_FRECIPE) Var(isa_evolution)
|
||||||
|
+Support frecipe.{s/d} and frsqrte.{s/d} instructions.
|
||||||
|
+
|
||||||
|
mdiv32
|
||||||
|
Target Mask(ISA_DIV32) Var(isa_evolution)
|
||||||
|
Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index ce6ec6d69..37bdc6910 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -42,8 +42,10 @@
|
||||||
|
UNSPEC_LSX_VFCVTL
|
||||||
|
UNSPEC_LSX_VFLOGB
|
||||||
|
UNSPEC_LSX_VFRECIP
|
||||||
|
+ UNSPEC_LSX_VFRECIPE
|
||||||
|
UNSPEC_LSX_VFRINT
|
||||||
|
UNSPEC_LSX_VFRSQRT
|
||||||
|
+ UNSPEC_LSX_VFRSQRTE
|
||||||
|
UNSPEC_LSX_VFCMP_SAF
|
||||||
|
UNSPEC_LSX_VFCMP_SEQ
|
||||||
|
UNSPEC_LSX_VFCMP_SLE
|
||||||
|
@@ -1546,6 +1548,17 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; Approximate Reciprocal Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "lsx_vfrecipe_<flsxfmt>"
|
||||||
|
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
|
||||||
|
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LSX_VFRECIPE))]
|
||||||
|
+ "ISA_HAS_LSX && TARGET_FRECIPE"
|
||||||
|
+ "vfrecipe.<flsxfmt>\t%w0,%w1"
|
||||||
|
+ [(set_attr "type" "simd_fdiv")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
(define_insn "lsx_vfrsqrt_<flsxfmt>"
|
||||||
|
[(set (match_operand:FLSX 0 "register_operand" "=f")
|
||||||
|
(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
@@ -1555,6 +1568,17 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; Approximate Reciprocal Square Root Instructions.
|
||||||
|
+
|
||||||
|
+(define_insn "lsx_vfrsqrte_<flsxfmt>"
|
||||||
|
+ [(set (match_operand:FLSX 0 "register_operand" "=f")
|
||||||
|
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LSX_VFRSQRTE))]
|
||||||
|
+ "ISA_HAS_LSX && TARGET_FRECIPE"
|
||||||
|
+ "vfrsqrte.<flsxfmt>\t%w0,%w1"
|
||||||
|
+ [(set_attr "type" "simd_fdiv")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
(define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>"
|
||||||
|
[(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
|
||||||
|
index 29553c093..57a6fc40a 100644
|
||||||
|
--- a/gcc/config/loongarch/lsxintrin.h
|
||||||
|
+++ b/gcc/config/loongarch/lsxintrin.h
|
||||||
|
@@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1)
|
||||||
|
return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1);
|
||||||
|
}
|
||||||
|
|
||||||
|
+#if defined(__loongarch_frecipe)
|
||||||
|
+/* Assembly instruction format: vd, vj. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m128 __lsx_vfrecipe_s (__m128 _1)
|
||||||
|
+{
|
||||||
|
+ return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: vd, vj. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m128d __lsx_vfrecipe_d (__m128d _1)
|
||||||
|
+{
|
||||||
|
+ return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: vd, vj. */
|
||||||
|
+/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m128 __lsx_vfrsqrte_s (__m128 _1)
|
||||||
|
+{
|
||||||
|
+ return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Assembly instruction format: vd, vj. */
|
||||||
|
+/* Data types in instruction templates: V2DF, V2DF. */
|
||||||
|
+extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
+__m128d __lsx_vfrsqrte_d (__m128d _1)
|
||||||
|
+{
|
||||||
|
+ return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1);
|
||||||
|
+}
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
/* Assembly instruction format: vd, vj. */
|
||||||
|
/* Data types in instruction templates: V4SF, V4SF. */
|
||||||
|
extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||||
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||||
|
index 7edd3974d..bb042ae78 100644
|
||||||
|
--- a/gcc/doc/extend.texi
|
||||||
|
+++ b/gcc/doc/extend.texi
|
||||||
|
@@ -16187,6 +16187,14 @@ The intrinsics provided are listed below:
|
||||||
|
void __builtin_loongarch_break (imm0_32767)
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
+These instrisic functions are available by using @option{-mfrecipe}.
|
||||||
|
+@smallexample
|
||||||
|
+ float __builtin_loongarch_frecipe_s (float);
|
||||||
|
+ double __builtin_loongarch_frecipe_d (double);
|
||||||
|
+ float __builtin_loongarch_frsqrte_s (float);
|
||||||
|
+ double __builtin_loongarch_frsqrte_d (double);
|
||||||
|
+@end smallexample
|
||||||
|
+
|
||||||
|
@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
|
||||||
|
but the access instruction is not distinguished. So GCC renames the control
|
||||||
|
instructions when implementing intrinsics.
|
||||||
|
@@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}.
|
||||||
|
void __break (imm0_32767)
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
+These instrisic functions are available by including @code{larchintrin.h} and
|
||||||
|
+using @option{-mfrecipe}.
|
||||||
|
+@smallexample
|
||||||
|
+ float __frecipe_s (float);
|
||||||
|
+ double __frecipe_d (double);
|
||||||
|
+ float __frsqrte_s (float);
|
||||||
|
+ double __frsqrte_d (double);
|
||||||
|
+@end smallexample
|
||||||
|
+
|
||||||
|
Returns the value that is currently set in the @samp{tp} register.
|
||||||
|
@smallexample
|
||||||
|
void * __builtin_thread_pointer (void)
|
||||||
|
@@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255);
|
||||||
|
__m128i __lsx_vxor_v (__m128i, __m128i);
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
+These instrisic functions are available by including @code{lsxintrin.h} and
|
||||||
|
+using @option{-mfrecipe} and @option{-mlsx}.
|
||||||
|
+@smallexample
|
||||||
|
+__m128d __lsx_vfrecipe_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrecipe_s (__m128);
|
||||||
|
+__m128d __lsx_vfrsqrte_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrsqrte_s (__m128);
|
||||||
|
+@end smallexample
|
||||||
|
+
|
||||||
|
@node LoongArch ASX Vector Intrinsics
|
||||||
|
@subsection LoongArch ASX Vector Intrinsics
|
||||||
|
|
||||||
|
@@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255);
|
||||||
|
__m256i __lasx_xvxor_v (__m256i, __m256i);
|
||||||
|
@end smallexample
|
||||||
|
|
||||||
|
+These instrisic functions are available by including @code{lasxintrin.h} and
|
||||||
|
+using @option{-mfrecipe} and @option{-mlasx}.
|
||||||
|
+@smallexample
|
||||||
|
+__m256d __lasx_xvfrecipe_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrecipe_s (__m256);
|
||||||
|
+__m256d __lasx_xvfrsqrte_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrsqrte_s (__m256);
|
||||||
|
+@end smallexample
|
||||||
|
+
|
||||||
|
@node MIPS DSP Built-in Functions
|
||||||
|
@subsection MIPS DSP Built-in Functions
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..b9329f346
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mfrecipe" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */
|
||||||
|
+
|
||||||
|
+float
|
||||||
|
+test_frecipe_s (float _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_loongarch_frecipe_s (_1);
|
||||||
|
+}
|
||||||
|
+double
|
||||||
|
+test_frecipe_d (double _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_loongarch_frecipe_d (_1);
|
||||||
|
+}
|
||||||
|
+float
|
||||||
|
+test_frsqrte_s (float _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_loongarch_frsqrte_s (_1);
|
||||||
|
+}
|
||||||
|
+double
|
||||||
|
+test_frsqrte_d (double _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_loongarch_frsqrte_d (_1);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..522535b45
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
|
||||||
|
@@ -0,0 +1,30 @@
|
||||||
|
+/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mlasx -mfrecipe" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */
|
||||||
|
+
|
||||||
|
+#include <lasxintrin.h>
|
||||||
|
+
|
||||||
|
+v8f32
|
||||||
|
+__lasx_xvfrecipe_s (v8f32 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lasx_xvfrecipe_s (_1);
|
||||||
|
+}
|
||||||
|
+v4f64
|
||||||
|
+__lasx_xvfrecipe_d (v4f64 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lasx_xvfrecipe_d (_1);
|
||||||
|
+}
|
||||||
|
+v8f32
|
||||||
|
+__lasx_xvfrsqrte_s (v8f32 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lasx_xvfrsqrte_s (_1);
|
||||||
|
+}
|
||||||
|
+v4f64
|
||||||
|
+__lasx_xvfrsqrte_d (v4f64 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lasx_xvfrsqrte_d (_1);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4ad0cb0ff
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
|
||||||
|
@@ -0,0 +1,30 @@
|
||||||
|
+/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mlsx -mfrecipe" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */
|
||||||
|
+
|
||||||
|
+#include <lsxintrin.h>
|
||||||
|
+
|
||||||
|
+v4f32
|
||||||
|
+__lsx_vfrecipe_s (v4f32 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lsx_vfrecipe_s (_1);
|
||||||
|
+}
|
||||||
|
+v2f64
|
||||||
|
+__lsx_vfrecipe_d (v2f64 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lsx_vfrecipe_d (_1);
|
||||||
|
+}
|
||||||
|
+v4f32
|
||||||
|
+__lsx_vfrsqrte_s (v4f32 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lsx_vfrsqrte_s (_1);
|
||||||
|
+}
|
||||||
|
+v2f64
|
||||||
|
+__lsx_vfrsqrte_d (v2f64 _1)
|
||||||
|
+{
|
||||||
|
+ return __builtin_lsx_vfrsqrte_d (_1);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
257
0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
Normal file
257
0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
Normal file
@ -0,0 +1,257 @@
|
|||||||
|
From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 6 Dec 2023 15:04:50 +0800
|
||||||
|
Subject: [PATCH 060/188] LoongArch: Use standard pattern name for
|
||||||
|
xvfrsqrt/vfrsqrt instructions.
|
||||||
|
|
||||||
|
Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt<mode>2 to align with standard
|
||||||
|
pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (lasx_xvfrsqrt_<flasxfmt>): Renamed to ..
|
||||||
|
(rsqrt<mode>2): .. this.
|
||||||
|
* config/loongarch/loongarch-builtins.cc
|
||||||
|
(CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name.
|
||||||
|
(CODE_FOR_lsx_vfrsqrt_s): Ditto.
|
||||||
|
(CODE_FOR_lasx_xvfrsqrt_d): Ditto.
|
||||||
|
(CODE_FOR_lasx_xvfrsqrt_s): Ditto.
|
||||||
|
* config/loongarch/loongarch.cc (use_rsqrt_p): New function.
|
||||||
|
(loongarch_optab_supported_p): Ditto.
|
||||||
|
(TARGET_OPTAB_SUPPORTED_P): New hook.
|
||||||
|
* config/loongarch/loongarch.md (*rsqrt<mode>a): Remove.
|
||||||
|
(*rsqrt<mode>2): New insn pattern.
|
||||||
|
(*rsqrt<mode>b): Remove.
|
||||||
|
* config/loongarch/lsx.md (lsx_vfrsqrt_<flsxfmt>): Renamed to ..
|
||||||
|
(rsqrt<mode>2): .. this.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 6 ++---
|
||||||
|
gcc/config/loongarch/loongarch-builtins.cc | 4 +++
|
||||||
|
gcc/config/loongarch/loongarch.cc | 27 +++++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch.md | 24 +++++------------
|
||||||
|
gcc/config/loongarch/lsx.md | 6 ++---
|
||||||
|
.../loongarch/vector/lasx/lasx-rsqrt.c | 26 ++++++++++++++++++
|
||||||
|
.../loongarch/vector/lsx/lsx-rsqrt.c | 26 ++++++++++++++++++
|
||||||
|
7 files changed, 96 insertions(+), 23 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index b1416f6c3..3a4a1fe51 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -1646,10 +1646,10 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lasx_xvfrsqrt_<flasxfmt>"
|
||||||
|
+(define_insn "rsqrt<mode>2"
|
||||||
|
[(set (match_operand:FLASX 0 "register_operand" "=f")
|
||||||
|
- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVFRSQRT))]
|
||||||
|
+ (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LASX_XVFRSQRT))]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
"xvfrsqrt.<flasxfmt>\t%u0,%u1"
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
index bc156bd36..4aae27a5e 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
|
||||||
|
#define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h
|
||||||
|
#define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w
|
||||||
|
#define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
|
||||||
|
+#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
|
||||||
|
+#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
|
||||||
|
|
||||||
|
/* LoongArch ASX define CODE_FOR_lasx_mxxx */
|
||||||
|
#define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
|
||||||
|
@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
|
||||||
|
#define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu
|
||||||
|
#define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu
|
||||||
|
#define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
|
||||||
|
+#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
|
||||||
|
+#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
|
||||||
|
|
||||||
|
static const struct loongarch_builtin_description loongarch_builtins[] = {
|
||||||
|
#define LARCH_MOVFCSR2GR 0
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index ce1c0a8bd..95aa9453b 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
|
||||||
|
is_packed);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static bool
|
||||||
|
+use_rsqrt_p (void)
|
||||||
|
+{
|
||||||
|
+ return (flag_finite_math_only
|
||||||
|
+ && !flag_trapping_math
|
||||||
|
+ && flag_unsafe_math_optimizations);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Implement the TARGET_OPTAB_SUPPORTED_P hook. */
|
||||||
|
+
|
||||||
|
+static bool
|
||||||
|
+loongarch_optab_supported_p (int op, machine_mode, machine_mode,
|
||||||
|
+ optimization_type opt_type)
|
||||||
|
+{
|
||||||
|
+ switch (op)
|
||||||
|
+ {
|
||||||
|
+ case rsqrt_optab:
|
||||||
|
+ return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ return true;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* If -fverbose-asm, dump some info for debugging. */
|
||||||
|
static void
|
||||||
|
loongarch_asm_code_end (void)
|
||||||
|
@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void)
|
||||||
|
#undef TARGET_FUNCTION_ARG_BOUNDARY
|
||||||
|
#define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
|
||||||
|
|
||||||
|
+#undef TARGET_OPTAB_SUPPORTED_P
|
||||||
|
+#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
|
||||||
|
+
|
||||||
|
#undef TARGET_VECTOR_MODE_SUPPORTED_P
|
||||||
|
#define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 9080cec1c..4dfe583e2 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -60,6 +60,7 @@
|
||||||
|
UNSPEC_TIE
|
||||||
|
|
||||||
|
;; RSQRT
|
||||||
|
+ UNSPEC_RSQRT
|
||||||
|
UNSPEC_RSQRTE
|
||||||
|
|
||||||
|
;; RECIP
|
||||||
|
@@ -1134,25 +1135,14 @@
|
||||||
|
(set_attr "mode" "<UNITMODE>")
|
||||||
|
(set_attr "insn_count" "1")])
|
||||||
|
|
||||||
|
-(define_insn "*rsqrt<mode>a"
|
||||||
|
+(define_insn "*rsqrt<mode>2"
|
||||||
|
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||||
|
- (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
|
||||||
|
- (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
|
||||||
|
- "flag_unsafe_math_optimizations"
|
||||||
|
- "frsqrt.<fmt>\t%0,%2"
|
||||||
|
- [(set_attr "type" "frsqrt")
|
||||||
|
- (set_attr "mode" "<UNITMODE>")
|
||||||
|
- (set_attr "insn_count" "1")])
|
||||||
|
-
|
||||||
|
-(define_insn "*rsqrt<mode>b"
|
||||||
|
- [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||||
|
- (sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
|
||||||
|
- (match_operand:ANYF 2 "register_operand" "f"))))]
|
||||||
|
- "flag_unsafe_math_optimizations"
|
||||||
|
- "frsqrt.<fmt>\t%0,%2"
|
||||||
|
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_RSQRT))]
|
||||||
|
+ "TARGET_HARD_FLOAT"
|
||||||
|
+ "frsqrt.<fmt>\t%0,%1"
|
||||||
|
[(set_attr "type" "frsqrt")
|
||||||
|
- (set_attr "mode" "<UNITMODE>")
|
||||||
|
- (set_attr "insn_count" "1")])
|
||||||
|
+ (set_attr "mode" "<UNITMODE>")])
|
||||||
|
|
||||||
|
;; Approximate Reciprocal Square Root Instructions.
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 37bdc6910..cb4a448e7 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -1559,10 +1559,10 @@
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vfrsqrt_<flsxfmt>"
|
||||||
|
+(define_insn "rsqrt<mode>2"
|
||||||
|
[(set (match_operand:FLSX 0 "register_operand" "=f")
|
||||||
|
- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VFRSQRT))]
|
||||||
|
+ (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
+ UNSPEC_LSX_VFRSQRT))]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
"vfrsqrt.<flsxfmt>\t%w0,%w1"
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..24316944d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -ffast-math" } */
|
||||||
|
+/* { dg-final { scan-assembler "xvfrsqrt.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "xvfrsqrt.d" } } */
|
||||||
|
+
|
||||||
|
+extern float sqrtf (float);
|
||||||
|
+
|
||||||
|
+float a[8], b[8];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo1(void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 8; i++)
|
||||||
|
+ a[i] = 1 / sqrtf (b[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+extern double sqrt (double);
|
||||||
|
+
|
||||||
|
+double da[4], db[4];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo2(void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ da[i] = 1 / sqrt (db[i]);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..519cc4764
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -ffast-math" } */
|
||||||
|
+/* { dg-final { scan-assembler "vfrsqrt.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "vfrsqrt.d" } } */
|
||||||
|
+
|
||||||
|
+extern float sqrtf (float);
|
||||||
|
+
|
||||||
|
+float a[4], b[4];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo1(void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 4; i++)
|
||||||
|
+ a[i] = 1 / sqrtf (b[i]);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+extern double sqrt (double);
|
||||||
|
+
|
||||||
|
+double da[2], db[2];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo2(void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < 2; i++)
|
||||||
|
+ da[i] = 1 / sqrt (db[i]);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
135
0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
Normal file
135
0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 6 Dec 2023 15:04:51 +0800
|
||||||
|
Subject: [PATCH 061/188] LoongArch: Redefine pattern for xvfrecip/vfrecip
|
||||||
|
instructions.
|
||||||
|
|
||||||
|
Redefine pattern for [x]vfrecip instructions use rtx code instead of unspec, and enable
|
||||||
|
[x]vfrecip instructions to be generated during auto-vectorization.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (lasx_xvfrecip_<flasxfmt>): Renamed to ..
|
||||||
|
(recip<mode>3): .. this.
|
||||||
|
* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine
|
||||||
|
to new pattern name.
|
||||||
|
(CODE_FOR_lsx_vfrecip_s): Ditto.
|
||||||
|
(CODE_FOR_lasx_xvfrecip_d): Ditto.
|
||||||
|
(CODE_FOR_lasx_xvfrecip_s): Ditto.
|
||||||
|
(loongarch_expand_builtin_direct): For the vector recip instructions, construct a
|
||||||
|
temporary parameter const1_vector.
|
||||||
|
* config/loongarch/lsx.md (lsx_vfrecip_<flsxfmt>): Renamed to ..
|
||||||
|
(recip<mode>3): .. this.
|
||||||
|
* config/loongarch/predicates.md (const_vector_1_operand): New predicate.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 8 ++++----
|
||||||
|
gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++
|
||||||
|
gcc/config/loongarch/lsx.md | 8 ++++----
|
||||||
|
gcc/config/loongarch/predicates.md | 4 ++++
|
||||||
|
4 files changed, 32 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 3a4a1fe51..ad49a3ffb 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -1626,12 +1626,12 @@
|
||||||
|
[(set_attr "type" "simd_fminmax")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lasx_xvfrecip_<flasxfmt>"
|
||||||
|
+(define_insn "recip<mode>3"
|
||||||
|
[(set (match_operand:FLASX 0 "register_operand" "=f")
|
||||||
|
- (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVFRECIP))]
|
||||||
|
+ (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "")
|
||||||
|
+ (match_operand:FLASX 2 "register_operand" "f")))]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
- "xvfrecip.<flasxfmt>\t%u0,%u1"
|
||||||
|
+ "xvfrecip.<flasxfmt>\t%u0,%u2"
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
index 4aae27a5e..85849ed29 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-builtins.cc
|
||||||
|
@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
|
||||||
|
#define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
|
||||||
|
#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
|
||||||
|
#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
|
||||||
|
+#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3
|
||||||
|
+#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3
|
||||||
|
|
||||||
|
/* LoongArch ASX define CODE_FOR_lasx_mxxx */
|
||||||
|
#define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
|
||||||
|
@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
|
||||||
|
#define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
|
||||||
|
#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
|
||||||
|
#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
|
||||||
|
+#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3
|
||||||
|
+#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3
|
||||||
|
|
||||||
|
static const struct loongarch_builtin_description loongarch_builtins[] = {
|
||||||
|
#define LARCH_MOVFCSR2GR 0
|
||||||
|
@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
|
||||||
|
if (has_target_p)
|
||||||
|
create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
|
||||||
|
|
||||||
|
+ /* For the vector reciprocal instructions, we need to construct a temporary
|
||||||
|
+ parameter const1_vector. */
|
||||||
|
+ switch (icode)
|
||||||
|
+ {
|
||||||
|
+ case CODE_FOR_recipv8sf3:
|
||||||
|
+ case CODE_FOR_recipv4df3:
|
||||||
|
+ case CODE_FOR_recipv4sf3:
|
||||||
|
+ case CODE_FOR_recipv2df3:
|
||||||
|
+ loongarch_prepare_builtin_arg (&ops[2], exp, 0);
|
||||||
|
+ create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode);
|
||||||
|
+ return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
|
||||||
|
+
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
/* Map the arguments to the other operands. */
|
||||||
|
gcc_assert (opno + call_expr_nargs (exp)
|
||||||
|
== insn_data[icode].n_generator_args);
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index cb4a448e7..f2774f021 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -1539,12 +1539,12 @@
|
||||||
|
[(set_attr "type" "simd_fminmax")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vfrecip_<flsxfmt>"
|
||||||
|
+(define_insn "recip<mode>3"
|
||||||
|
[(set (match_operand:FLSX 0 "register_operand" "=f")
|
||||||
|
- (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VFRECIP))]
|
||||||
|
+ (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "")
|
||||||
|
+ (match_operand:FLSX 2 "register_operand" "f")))]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
- "vfrecip.<flsxfmt>\t%w0,%w1"
|
||||||
|
+ "vfrecip.<flsxfmt>\t%w0,%w2"
|
||||||
|
[(set_attr "type" "simd_fdiv")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 30a0dee9f..572550dbc 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -227,6 +227,10 @@
|
||||||
|
(and (match_code "const_int,const_wide_int,const_double,const_vector")
|
||||||
|
(match_test "op == CONST1_RTX (GET_MODE (op))")))
|
||||||
|
|
||||||
|
+(define_predicate "const_vector_1_operand"
|
||||||
|
+ (and (match_code "const_vector")
|
||||||
|
+ (match_test "op == CONST1_RTX (GET_MODE (op))")))
|
||||||
|
+
|
||||||
|
(define_predicate "reg_or_1_operand"
|
||||||
|
(ior (match_operand 0 "const_1_operand")
|
||||||
|
(match_operand 0 "register_operand")))
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
1096
0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
Normal file
1096
0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,83 @@
|
|||||||
|
From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 6 Dec 2023 15:04:53 +0800
|
||||||
|
Subject: [PATCH 063/188] LoongArch: Vectorized loop unrolling is disable for
|
||||||
|
divf/sqrtf/rsqrtf when -mrecip is enabled.
|
||||||
|
|
||||||
|
Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number
|
||||||
|
of generated instructions is close to or exceeds the maximum issue instructions per cycle of the
|
||||||
|
LoongArch, so vectorized loop unrolling is not performed on them.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor):
|
||||||
|
If m_has_recip is true, uf return 1.
|
||||||
|
(loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++--
|
||||||
|
1 file changed, 34 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 18326ce47..d64777179 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -3970,7 +3970,9 @@ protected:
|
||||||
|
/* Reduction factor for suggesting unroll factor. */
|
||||||
|
unsigned m_reduc_factor = 0;
|
||||||
|
/* True if the loop contains an average operation. */
|
||||||
|
- bool m_has_avg =false;
|
||||||
|
+ bool m_has_avg = false;
|
||||||
|
+ /* True if the loop uses approximation instruction sequence. */
|
||||||
|
+ bool m_has_recip = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Implement TARGET_VECTORIZE_CREATE_COSTS. */
|
||||||
|
@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
|
||||||
|
{
|
||||||
|
class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||||
|
|
||||||
|
- if (m_has_avg)
|
||||||
|
+ if (m_has_avg || m_has_recip)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
/* Don't unroll if it's specified explicitly not to be unrolled. */
|
||||||
|
@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
+ combined_fn cfn;
|
||||||
|
+ if (kind == vector_stmt
|
||||||
|
+ && stmt_info
|
||||||
|
+ && stmt_info->stmt)
|
||||||
|
+ {
|
||||||
|
+ /* Detect the use of approximate instruction sequence. */
|
||||||
|
+ if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT)
|
||||||
|
+ && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
|
||||||
|
+ switch (cfn)
|
||||||
|
+ {
|
||||||
|
+ case CFN_BUILT_IN_SQRTF:
|
||||||
|
+ m_has_recip = true;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ else if (TARGET_RECIP_VEC_DIV
|
||||||
|
+ && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
|
||||||
|
+ {
|
||||||
|
+ machine_mode mode = TYPE_MODE (vectype);
|
||||||
|
+ switch (gimple_assign_rhs_code (stmt_info->stmt))
|
||||||
|
+ {
|
||||||
|
+ case RDIV_EXPR:
|
||||||
|
+ if (GET_MODE_INNER (mode) == SFmode)
|
||||||
|
+ m_has_recip = true;
|
||||||
|
+ default:
|
||||||
|
+ break;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
130
0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Normal file
130
0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
Normal file
@ -0,0 +1,130 @@
|
|||||||
|
From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Fri, 25 Oct 2024 02:45:35 +0000
|
||||||
|
Subject: [PATCH 064/188] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests
|
||||||
|
fail on LA664 [PR112611]
|
||||||
|
|
||||||
|
For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers
|
||||||
|
undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
|
||||||
|
tests on both LA464 and LA664, we have modified both tests to ensure that the index
|
||||||
|
value in the selector does not exceed 63.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112611
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
|
||||||
|
---
|
||||||
|
.../loongarch/vector/lasx/lasx-xvshuf_b.c | 14 +++++++-------
|
||||||
|
.../gcc.target/loongarch/vector/lsx/lsx-vshuf.c | 12 ++++++------
|
||||||
|
2 files changed, 13 insertions(+), 13 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
index b8ab38711..910d29339 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
|
||||||
|
@@ -99,9 +99,9 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op1[2]) = 0x7ff0000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[1]) = 0x7ff0000000000000;
|
||||||
|
*((unsigned long *)&__m256i_op1[0]) = 0x7ff0000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[3]) = 0x3ff0010000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[3]) = 0x3f11010000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_op2[1]) = 0x3ff0010000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_op2[1]) = 0x3f11010000000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
@@ -200,7 +200,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0xffffffff00000000;
|
||||||
|
*((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
@@ -351,7 +351,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110;
|
||||||
|
*((unsigned long *)&__m256i_result[3]) = 0x0000000000000001;
|
||||||
|
- *((unsigned long *)&__m256i_result[2]) = 0x0000000200000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110;
|
||||||
|
*((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
@@ -426,10 +426,10 @@ main ()
|
||||||
|
*((unsigned long *)&__m256i_op2[2]) = 0x8000000080000000;
|
||||||
|
*((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff;
|
||||||
|
*((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[3]) = 0x8000000080000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[3]) = 0xdfffffff80000000;
|
||||||
|
*((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff;
|
||||||
|
- *((unsigned long *)&__m256i_result[1]) = 0x8000000080000000;
|
||||||
|
- *((unsigned long *)&__m256i_result[0]) = 0x8000000080000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[1]) = 0x7fc0000000000000;
|
||||||
|
+ *((unsigned long *)&__m256i_result[0]) = 0x8000000000000000;
|
||||||
|
__m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
index f3b800f88..93a3078fa 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
|
||||||
|
@@ -33,7 +33,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00000000;
|
||||||
|
__m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -153,7 +153,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x00007fff00000000;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||||
|
__m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
@@ -198,7 +198,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29;
|
||||||
|
*((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29;
|
||||||
|
- *((unsigned long *)&__m128i_result[0]) = 0x0000000100000001;
|
||||||
|
+ *((unsigned long *)&__m128i_result[0]) = 0xffffff2900000001;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
|
||||||
|
@@ -219,7 +219,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x0000000020000020;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x2000002000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x2000002020000020;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
@@ -241,7 +241,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000010300000103;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0x8000000000000103;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0x0000010380000001;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
@@ -252,7 +252,7 @@ main ()
|
||||||
|
*((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||||
|
*((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff;
|
||||||
|
*((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff;
|
||||||
|
- *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||||
|
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff00000000;
|
||||||
|
*((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||||
|
__m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
|
||||||
|
ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
318
0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
Normal file
318
0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
Normal file
@ -0,0 +1,318 @@
|
|||||||
|
From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Wed, 29 Nov 2023 11:18:00 +0800
|
||||||
|
Subject: [PATCH 065/188] LoongArch: Fix ICE and use simplify_gen_subreg
|
||||||
|
instead of gen_rtx_SUBREG directly.
|
||||||
|
|
||||||
|
loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported
|
||||||
|
in gcc, it causes an ICE:
|
||||||
|
|
||||||
|
ice.c:55:1: error: unrecognizable insn:
|
||||||
|
55 | }
|
||||||
|
| ^
|
||||||
|
(insn 63 62 64 8 (set (reg:V4DI 278)
|
||||||
|
(subreg:V4DI (subreg:V4DF (reg:V4DI 273 [ vect__53.26 ]) 0) 0)) -1
|
||||||
|
(nil))
|
||||||
|
during RTL pass: vregs
|
||||||
|
ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804
|
||||||
|
|
||||||
|
Last time, Ruoyao has fixed a similar ICE:
|
||||||
|
https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html
|
||||||
|
|
||||||
|
This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible
|
||||||
|
to avoid the same ice happening again.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use
|
||||||
|
simplify_gen_subreg instead of gen_rtx_SUBREG.
|
||||||
|
(loongarch_expand_vec_perm_const_2): Ditto.
|
||||||
|
(loongarch_expand_vec_cond_expr): Ditto.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/pr112476-3.c: New test.
|
||||||
|
* gcc.target/loongarch/pr112476-4.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 79 +++++++++++--------
|
||||||
|
.../gcc.target/loongarch/pr112476-3.c | 58 ++++++++++++++
|
||||||
|
.../gcc.target/loongarch/pr112476-4.c | 4 +
|
||||||
|
3 files changed, 108 insertions(+), 33 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index d64777179..4a3a7a246 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
|
||||||
|
if (d->vmode == E_V2DFmode)
|
||||||
|
{
|
||||||
|
sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
|
||||||
|
- tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0);
|
||||||
|
+ tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
|
||||||
|
emit_move_insn (tmp, sel);
|
||||||
|
}
|
||||||
|
else if (d->vmode == E_V4SFmode)
|
||||||
|
{
|
||||||
|
sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
|
||||||
|
- tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0);
|
||||||
|
+ tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
|
||||||
|
emit_move_insn (tmp, sel);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
/* Adjust op1 for selecting correct value in high 128bit of target
|
||||||
|
register.
|
||||||
|
op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }. */
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||||||
|
emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
|
||||||
|
conv_op0, GEN_INT (0x21)));
|
||||||
|
|
||||||
|
@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
|
||||||
|
/* Generate subreg for fitting into insn gen function. */
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
|
||||||
|
|
||||||
|
/* Adjust op value in temp register.
|
||||||
|
op0 = {0,1,2,3}, op1 = {4,5,0,1} */
|
||||||
|
@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op1_alt, d->op1);
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
|
||||||
|
emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
|
||||||
|
conv_op0, GEN_INT (0x02)));
|
||||||
|
@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
Selector sample: E_V4DImode, { 0, 1, 4 ,5 } */
|
||||||
|
if (!d->testing_p)
|
||||||
|
{
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
|
||||||
|
- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||||||
|
+ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
|
||||||
|
/* We can achieve the expectation by using sinple xvpermi.q insn. */
|
||||||
|
emit_move_insn (conv_target, conv_op1);
|
||||||
|
@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op1_alt, d->op1);
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
|
||||||
|
/* Adjust op value in temp regiter.
|
||||||
|
op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 } */
|
||||||
|
emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
|
||||||
|
@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op1_alt, d->op1);
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
- rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
|
||||||
|
+ rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
|
||||||
|
emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
|
||||||
|
conv_op0, GEN_INT (0x13)));
|
||||||
|
@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 } */
|
||||||
|
if (!d->testing_p)
|
||||||
|
{
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||||||
|
rtx temp_reg = gen_reg_rtx (d->vmode);
|
||||||
|
- rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0);
|
||||||
|
+ rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
|
||||||
|
emit_move_insn (temp_reg, d->op0);
|
||||||
|
|
||||||
|
@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
emit_move_insn (op1_alt, d->op1);
|
||||||
|
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
|
||||||
|
- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||||||
|
+ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
+ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
|
||||||
|
/* Duplicate op0's low 128bit in op0, then duplicate high 128bit
|
||||||
|
in op1. After this, xvshuf.* insn's selector argument can
|
||||||
|
@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
emit_move_insn (op0_alt, d->op0);
|
||||||
|
emit_move_insn (op1_alt, d->op1);
|
||||||
|
|
||||||
|
- rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
|
||||||
|
- rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
|
||||||
|
- rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
|
||||||
|
- rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
|
||||||
|
+ rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
+ rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
|
||||||
|
+ d->vmode, 0);
|
||||||
|
+ rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
|
||||||
|
+ rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
|
||||||
|
|
||||||
|
/* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure
|
||||||
|
that selector's low 128bit can access all op0's elements, and
|
||||||
|
@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
{
|
||||||
|
case E_V4DFmode:
|
||||||
|
sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm));
|
||||||
|
- tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
|
||||||
|
+ tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
|
||||||
|
emit_move_insn (tmp, sel);
|
||||||
|
break;
|
||||||
|
case E_V8SFmode:
|
||||||
|
sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm));
|
||||||
|
- tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0);
|
||||||
|
+ tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
|
||||||
|
emit_move_insn (tmp, sel);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
@@ -10192,7 +10200,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
|
||||||
|
64bit in target vector register. */
|
||||||
|
else if (extract_ev_od)
|
||||||
|
{
|
||||||
|
- rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
|
||||||
|
+ rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
|
||||||
|
emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted,
|
||||||
|
GEN_INT (0xD8)));
|
||||||
|
}
|
||||||
|
@@ -11279,7 +11287,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
if (mode != vimode)
|
||||||
|
{
|
||||||
|
xop1 = gen_reg_rtx (vimode);
|
||||||
|
- emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
|
||||||
|
+ emit_move_insn (xop1,
|
||||||
|
+ simplify_gen_subreg (vimode, operands[1],
|
||||||
|
+ mode, 0));
|
||||||
|
}
|
||||||
|
emit_move_insn (src1, xop1);
|
||||||
|
}
|
||||||
|
@@ -11296,7 +11306,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
if (mode != vimode)
|
||||||
|
{
|
||||||
|
xop2 = gen_reg_rtx (vimode);
|
||||||
|
- emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
|
||||||
|
+ emit_move_insn (xop2,
|
||||||
|
+ simplify_gen_subreg (vimode, operands[2],
|
||||||
|
+ mode, 0));
|
||||||
|
}
|
||||||
|
emit_move_insn (src2, xop2);
|
||||||
|
}
|
||||||
|
@@ -11315,7 +11327,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
|
||||||
|
gen_rtx_AND (vimode, mask, src1));
|
||||||
|
/* The result is placed back to a register with the mask. */
|
||||||
|
emit_insn (gen_rtx_SET (mask, bsel));
|
||||||
|
- emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
|
||||||
|
+ emit_move_insn (operands[0],
|
||||||
|
+ simplify_gen_subreg (mode, mask, vimode, 0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-3.c b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d696d4182
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c
|
||||||
|
@@ -0,0 +1,58 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlsx" } */
|
||||||
|
+
|
||||||
|
+#include <stdint.h>
|
||||||
|
+
|
||||||
|
+typedef int8_t orc_int8;
|
||||||
|
+typedef int16_t orc_int16;
|
||||||
|
+typedef int32_t orc_int32;
|
||||||
|
+typedef int64_t orc_int64;
|
||||||
|
+
|
||||||
|
+typedef union
|
||||||
|
+{
|
||||||
|
+ orc_int32 i;
|
||||||
|
+ float f;
|
||||||
|
+ orc_int16 x2[2];
|
||||||
|
+ orc_int8 x4[4];
|
||||||
|
+} orc_union32;
|
||||||
|
+typedef union
|
||||||
|
+{
|
||||||
|
+ orc_int64 i;
|
||||||
|
+ double f;
|
||||||
|
+ orc_int32 x2[2];
|
||||||
|
+ float x2f[2];
|
||||||
|
+ orc_int16 x4[4];
|
||||||
|
+} orc_union64;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+audio_orc_s32_to_double (double * restrict d1,
|
||||||
|
+ const signed int * restrict s1, int n)
|
||||||
|
+{
|
||||||
|
+ int i;
|
||||||
|
+ orc_union64 *restrict ptr0;
|
||||||
|
+ const orc_union32 *restrict ptr4;
|
||||||
|
+ orc_union32 var33;
|
||||||
|
+ orc_union64 var34;
|
||||||
|
+ orc_union64 var35;
|
||||||
|
+ orc_union64 var36;
|
||||||
|
+
|
||||||
|
+ ptr0 = (orc_union64 *) d1;
|
||||||
|
+ ptr4 = (orc_union32 *) s1;
|
||||||
|
+
|
||||||
|
+ var34.i = 0x41e0000000000000UL;
|
||||||
|
+
|
||||||
|
+ for (i = 0; i < n; i++) {
|
||||||
|
+ var33 = ptr4[i];
|
||||||
|
+ var36.f = var33.i;
|
||||||
|
+ {
|
||||||
|
+ orc_union64 _src1;
|
||||||
|
+ orc_union64 _src2;
|
||||||
|
+ orc_union64 _dest1;
|
||||||
|
+ _src1.i = ((var36.i) & ((((var36.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
|
||||||
|
+ _src2.i = ((var34.i) & ((((var34.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
|
||||||
|
+ _dest1.f = _src1.f / _src2.f;
|
||||||
|
+ var35.i = ((_dest1.i) & ((((_dest1.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
|
||||||
|
+ }
|
||||||
|
+ ptr0[i] = var35;
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-4.c b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..955d98552
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c
|
||||||
|
@@ -0,0 +1,4 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+
|
||||||
|
+#include "pr112476-3.c"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
236
0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
Normal file
236
0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
Normal file
@ -0,0 +1,236 @@
|
|||||||
|
From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yang Yujie <yangyujie@loongson.cn>
|
||||||
|
Date: Fri, 8 Dec 2023 18:01:18 +0800
|
||||||
|
Subject: [PATCH 066/188] LoongArch: Fix eh_return epilogue for normal returns.
|
||||||
|
|
||||||
|
On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
|
||||||
|
and restored in the function prologue and epilogue if the given function calls
|
||||||
|
__builtin_eh_return. This causes the return value to be overwritten on normal
|
||||||
|
return paths and breaks a rare case of libgcc's _Unwind_RaiseException.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc: Do not restore the saved eh_return
|
||||||
|
data registers ($r4-$r7) for a normal return of a function that calls
|
||||||
|
__builtin_eh_return elsewhere.
|
||||||
|
* config/loongarch/loongarch-protos.h: Same.
|
||||||
|
* config/loongarch/loongarch.md: Same.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/eh_return-normal-return.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-protos.h | 2 +-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 34 ++++++++++++-----
|
||||||
|
gcc/config/loongarch/loongarch.md | 23 ++++++++++-
|
||||||
|
.../loongarch/eh_return-normal-return.c | 38 +++++++++++++++++++
|
||||||
|
4 files changed, 84 insertions(+), 13 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
index 117669e9f..e5fcf3111 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
@@ -60,7 +60,7 @@ enum loongarch_symbol_type {
|
||||||
|
extern rtx loongarch_emit_move (rtx, rtx);
|
||||||
|
extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
|
||||||
|
extern void loongarch_expand_prologue (void);
|
||||||
|
-extern void loongarch_expand_epilogue (bool);
|
||||||
|
+extern void loongarch_expand_epilogue (int);
|
||||||
|
extern bool loongarch_can_use_return_insn (void);
|
||||||
|
|
||||||
|
extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 4a3a7a246..7caf04d8d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
|
||||||
|
|
||||||
|
static void
|
||||||
|
loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
|
||||||
|
- loongarch_save_restore_fn fn)
|
||||||
|
+ loongarch_save_restore_fn fn,
|
||||||
|
+ bool skip_eh_data_regs_p)
|
||||||
|
{
|
||||||
|
HOST_WIDE_INT offset;
|
||||||
|
|
||||||
|
@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
|
||||||
|
for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
|
||||||
|
if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
|
||||||
|
{
|
||||||
|
- if (!cfun->machine->reg_is_wrapped_separately[regno])
|
||||||
|
+ /* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
|
||||||
|
+ when returning normally from a function that calls
|
||||||
|
+ __builtin_eh_return. In this case, these registers are saved but
|
||||||
|
+ should not be restored, or the return value may be clobbered. */
|
||||||
|
+
|
||||||
|
+ if (!(cfun->machine->reg_is_wrapped_separately[regno]
|
||||||
|
+ || (skip_eh_data_regs_p
|
||||||
|
+ && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4)))
|
||||||
|
loongarch_save_restore_reg (word_mode, regno, offset, fn);
|
||||||
|
|
||||||
|
offset -= UNITS_PER_WORD;
|
||||||
|
@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void)
|
||||||
|
GEN_INT (-step1));
|
||||||
|
RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
|
||||||
|
size -= step1;
|
||||||
|
- loongarch_for_each_saved_reg (size, loongarch_save_reg);
|
||||||
|
+ loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set up the frame pointer, if we're using one. */
|
||||||
|
@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void)
|
||||||
|
return reload_completed && cfun->machine->frame.total_size == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
|
||||||
|
- says which. */
|
||||||
|
+/* Expand function epilogue using the following insn patterns:
|
||||||
|
+ "epilogue" (style == NORMAL_RETURN)
|
||||||
|
+ "sibcall_epilogue" (style == SIBCALL_RETURN)
|
||||||
|
+ "eh_return" (style == EXCEPTION_RETURN) */
|
||||||
|
|
||||||
|
void
|
||||||
|
-loongarch_expand_epilogue (bool sibcall_p)
|
||||||
|
+loongarch_expand_epilogue (int style)
|
||||||
|
{
|
||||||
|
/* Split the frame into two. STEP1 is the amount of stack we should
|
||||||
|
deallocate before restoring the registers. STEP2 is the amount we
|
||||||
|
@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p)
|
||||||
|
bool need_barrier_p
|
||||||
|
= (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
|
||||||
|
|
||||||
|
- if (!sibcall_p && loongarch_can_use_return_insn ())
|
||||||
|
+ /* Handle simple returns. */
|
||||||
|
+ if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
|
||||||
|
{
|
||||||
|
emit_jump_insn (gen_return ());
|
||||||
|
return;
|
||||||
|
@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p)
|
||||||
|
|
||||||
|
/* Restore the registers. */
|
||||||
|
loongarch_for_each_saved_reg (frame->total_size - step2,
|
||||||
|
- loongarch_restore_reg);
|
||||||
|
+ loongarch_restore_reg,
|
||||||
|
+ crtl->calls_eh_return
|
||||||
|
+ && style != EXCEPTION_RETURN);
|
||||||
|
|
||||||
|
if (need_barrier_p)
|
||||||
|
loongarch_emit_stack_tie ();
|
||||||
|
@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p)
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add in the __builtin_eh_return stack adjustment. */
|
||||||
|
- if (crtl->calls_eh_return)
|
||||||
|
+ if (crtl->calls_eh_return && style == EXCEPTION_RETURN)
|
||||||
|
emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
|
||||||
|
EH_RETURN_STACKADJ_RTX));
|
||||||
|
|
||||||
|
- if (!sibcall_p)
|
||||||
|
+ /* Emit return unless doing sibcall. */
|
||||||
|
+ if (style != SIBCALL_RETURN)
|
||||||
|
emit_jump_insn (gen_simple_return_internal (ra));
|
||||||
|
}
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index c6edd1dda..222f1ae83 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -125,6 +125,11 @@
|
||||||
|
(T1_REGNUM 13)
|
||||||
|
(S0_REGNUM 23)
|
||||||
|
|
||||||
|
+ ;; Return path styles
|
||||||
|
+ (NORMAL_RETURN 0)
|
||||||
|
+ (SIBCALL_RETURN 1)
|
||||||
|
+ (EXCEPTION_RETURN 2)
|
||||||
|
+
|
||||||
|
;; PIC long branch sequences are never longer than 100 bytes.
|
||||||
|
(MAX_PIC_BRANCH_LENGTH 100)
|
||||||
|
])
|
||||||
|
@@ -3276,7 +3281,7 @@
|
||||||
|
[(const_int 2)]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- loongarch_expand_epilogue (false);
|
||||||
|
+ loongarch_expand_epilogue (NORMAL_RETURN);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
@@ -3284,7 +3289,7 @@
|
||||||
|
[(const_int 2)]
|
||||||
|
""
|
||||||
|
{
|
||||||
|
- loongarch_expand_epilogue (true);
|
||||||
|
+ loongarch_expand_epilogue (SIBCALL_RETURN);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
@@ -3341,6 +3346,20 @@
|
||||||
|
emit_insn (gen_eh_set_ra_di (operands[0]));
|
||||||
|
else
|
||||||
|
emit_insn (gen_eh_set_ra_si (operands[0]));
|
||||||
|
+
|
||||||
|
+ emit_jump_insn (gen_eh_return_internal ());
|
||||||
|
+ emit_barrier ();
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_insn_and_split "eh_return_internal"
|
||||||
|
+ [(eh_return)]
|
||||||
|
+ ""
|
||||||
|
+ "#"
|
||||||
|
+ "epilogue_completed"
|
||||||
|
+ [(const_int 0)]
|
||||||
|
+{
|
||||||
|
+ loongarch_expand_epilogue (EXCEPTION_RETURN);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..f8f3965f8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
|
||||||
|
@@ -0,0 +1,38 @@
|
||||||
|
+/* { dg-do run } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+
|
||||||
|
+#include <stdlib.h>
|
||||||
|
+
|
||||||
|
+int foo () __attribute__((noinline));
|
||||||
|
+int main ();
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+foo () {
|
||||||
|
+
|
||||||
|
+ int t;
|
||||||
|
+
|
||||||
|
+ /* prevent optimization using asm */
|
||||||
|
+ asm ("" : "=r" (t) : "0" (-1));
|
||||||
|
+ asm ("" : "=r" (t) : "0" (t ? 1 : 0));
|
||||||
|
+
|
||||||
|
+ if (t == 0)
|
||||||
|
+ /* never reached */
|
||||||
|
+ __builtin_eh_return (0, __builtin_return_address (0));
|
||||||
|
+
|
||||||
|
+ else if (t == 1)
|
||||||
|
+ /* return here */
|
||||||
|
+ return 202312;
|
||||||
|
+
|
||||||
|
+ else
|
||||||
|
+ /* never reached: prevent vrp optimization in main */
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+main ()
|
||||||
|
+{
|
||||||
|
+ if (foo() == 202312)
|
||||||
|
+ return 0;
|
||||||
|
+ else
|
||||||
|
+ abort ();
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
180
0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
Normal file
180
0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Thu, 7 Dec 2023 15:45:30 +0800
|
||||||
|
Subject: [PATCH 067/188] LoongArch: Allow -mcmodel=extreme and model attribute
|
||||||
|
with -mexplicit-relocs=auto
|
||||||
|
|
||||||
|
There seems no real reason to require -mexplicit-relocs=always for
|
||||||
|
-mcmodel=extreme or model attribute. As the linker does not know how to
|
||||||
|
relax a 3-operand la.local or la.global pseudo instruction, just emit
|
||||||
|
explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
|
||||||
|
SYMBOL_GOT_DISP.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
|
||||||
|
Return true for SYMBOL_PCREL64. Return true for SYMBOL_GOT_DISP
|
||||||
|
if TARGET_CMODEL_EXTREME.
|
||||||
|
(loongarch_split_symbol): Check for la_opt_explicit_relocs !=
|
||||||
|
EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS.
|
||||||
|
(loongarch_print_operand_reloc): Likewise.
|
||||||
|
(loongarch_option_override_internal): Likewise.
|
||||||
|
(loongarch_handle_model_attribute): Likewise.
|
||||||
|
* doc/invoke.texi (-mcmodel=extreme): Update the compatibility
|
||||||
|
between it and -mexplicit-relocs=.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/attr-model-3.c: New test.
|
||||||
|
* gcc.target/loongarch/attr-model-4.c: New test.
|
||||||
|
* gcc.target/loongarch/func-call-extreme-3.c: New test.
|
||||||
|
* gcc.target/loongarch/func-call-extreme-4.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 25 ++++++++++++-------
|
||||||
|
gcc/doc/invoke.texi | 4 +--
|
||||||
|
.../gcc.target/loongarch/attr-model-3.c | 6 +++++
|
||||||
|
.../gcc.target/loongarch/attr-model-4.c | 6 +++++
|
||||||
|
.../loongarch/func-call-extreme-3.c | 7 ++++++
|
||||||
|
.../loongarch/func-call-extreme-4.c | 7 ++++++
|
||||||
|
6 files changed, 44 insertions(+), 11 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 7caf04d8d..4362149ef 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
|
||||||
|
case SYMBOL_TLS_LE:
|
||||||
|
case SYMBOL_TLSGD:
|
||||||
|
case SYMBOL_TLSLDM:
|
||||||
|
- /* The linker don't know how to relax TLS accesses. */
|
||||||
|
+ case SYMBOL_PCREL64:
|
||||||
|
+ /* The linker don't know how to relax TLS accesses or 64-bit
|
||||||
|
+ pc-relative accesses. */
|
||||||
|
return true;
|
||||||
|
case SYMBOL_GOT_DISP:
|
||||||
|
+ /* The linker don't know how to relax GOT accesses in extreme
|
||||||
|
+ code model. */
|
||||||
|
+ if (TARGET_CMODEL_EXTREME)
|
||||||
|
+ return true;
|
||||||
|
+
|
||||||
|
/* If we are performing LTO for a final link, and we have the
|
||||||
|
linker plugin so we know the resolution of the symbols, then
|
||||||
|
all GOT references are binding to external symbols or
|
||||||
|
@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||||
|
|
||||||
|
if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
|
||||||
|
{
|
||||||
|
- gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||||
|
+ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
|
||||||
|
|
||||||
|
temp1 = gen_reg_rtx (Pmode);
|
||||||
|
emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
|
||||||
|
@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||||
|
loongarch_classify_symbolic_expression (op);
|
||||||
|
|
||||||
|
if (loongarch_symbol_extreme_p (symbol_type))
|
||||||
|
- gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||||
|
+ gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
|
||||||
|
|
||||||
|
switch (symbol_type)
|
||||||
|
{
|
||||||
|
@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts,
|
||||||
|
switch (la_target.cmodel)
|
||||||
|
{
|
||||||
|
case CMODEL_EXTREME:
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
- error ("code model %qs needs %s",
|
||||||
|
- "extreme", "-mexplicit-relocs=always");
|
||||||
|
+ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
|
||||||
|
+ error ("code model %qs is not compatible with %s",
|
||||||
|
+ "extreme", "-mexplicit-relocs=none");
|
||||||
|
|
||||||
|
if (opts->x_flag_plt)
|
||||||
|
{
|
||||||
|
@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
|
||||||
|
*no_add_attrs = true;
|
||||||
|
return NULL_TREE;
|
||||||
|
}
|
||||||
|
- if (!TARGET_EXPLICIT_RELOCS)
|
||||||
|
+ if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
|
||||||
|
{
|
||||||
|
error_at (DECL_SOURCE_LOCATION (decl),
|
||||||
|
- "%qE attribute requires %s", name,
|
||||||
|
- "-mexplicit-relocs=always");
|
||||||
|
+ "%qE attribute is not compatible with %s", name,
|
||||||
|
+ "-mexplicit-relocs=none");
|
||||||
|
*no_add_attrs = true;
|
||||||
|
return NULL_TREE;
|
||||||
|
}
|
||||||
|
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||||
|
index 76a8f20d1..5c6515cb1 100644
|
||||||
|
--- a/gcc/doc/invoke.texi
|
||||||
|
+++ b/gcc/doc/invoke.texi
|
||||||
|
@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space.
|
||||||
|
|
||||||
|
@item extreme
|
||||||
|
This mode does not limit the size of the code segment and data segment.
|
||||||
|
-The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
|
||||||
|
-@option{-mno-explicit-relocs}.
|
||||||
|
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt}
|
||||||
|
+and/or @option{-mexplicit-relocs=none}.
|
||||||
|
@end table
|
||||||
|
The default code model is @code{normal}.
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..5622d5086
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
|
||||||
|
@@ -0,0 +1,6 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
|
||||||
|
+
|
||||||
|
+#define ATTR_MODEL_TEST
|
||||||
|
+#include "attr-model-test.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..482724bb9
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
|
||||||
|
@@ -0,0 +1,6 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */
|
||||||
|
+
|
||||||
|
+#define ATTR_MODEL_TEST
|
||||||
|
+#include "attr-model-test.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a4da44b4a
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
|
||||||
|
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||||
|
+
|
||||||
|
+#include "func-call-extreme-1.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..16b00f4c5
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
|
||||||
|
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||||
|
+
|
||||||
|
+#include "func-call-extreme-1.c"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
79
0068-LoongArch-Fix-warnings-building-libgcc.patch
Normal file
79
0068-LoongArch-Fix-warnings-building-libgcc.patch
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 9 Dec 2023 22:08:37 +0800
|
||||||
|
Subject: [PATCH 068/188] LoongArch: Fix warnings building libgcc
|
||||||
|
|
||||||
|
We are excluding loongarch-opts.h from target libraries, but now struct
|
||||||
|
loongarch_target and gcc_options are not declared in the target
|
||||||
|
libraries, causing:
|
||||||
|
|
||||||
|
In file included from ../.././gcc/options.h:8,
|
||||||
|
from ../.././gcc/tm.h:49,
|
||||||
|
from ../../../gcc/libgcc/fixed-bit.c:48:
|
||||||
|
../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41:
|
||||||
|
warning: 'struct gcc_options' declared inside parameter list will not
|
||||||
|
be visible outside of this definition or declaration
|
||||||
|
57 | struct gcc_options *opts,
|
||||||
|
| ^~~~~~~~~~~
|
||||||
|
|
||||||
|
So exclude the declarations referring to the C++ structs as well.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-opts.h (la_target): Move into #if
|
||||||
|
for loongarch-def.h.
|
||||||
|
(loongarch_init_target): Likewise.
|
||||||
|
(loongarch_config_target): Likewise.
|
||||||
|
(loongarch_update_gcc_opt_status): Likewise.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++----------
|
||||||
|
1 file changed, 10 insertions(+), 10 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 7010ddfec..639ed50bd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#ifndef LOONGARCH_OPTS_H
|
||||||
|
#define LOONGARCH_OPTS_H
|
||||||
|
|
||||||
|
-/* This is a C++ header and it shouldn't be used by target libraries. */
|
||||||
|
+/* The loongarch-def.h file is a C++ header and it shouldn't be used by
|
||||||
|
+ target libraries. Exclude it and everything using the C++ structs
|
||||||
|
+ (struct loongarch_target and gcc_options) from target libraries. */
|
||||||
|
#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
-#endif
|
||||||
|
|
||||||
|
/* Target configuration */
|
||||||
|
extern struct loongarch_target la_target;
|
||||||
|
|
||||||
|
-/* Flag status */
|
||||||
|
-struct loongarch_flags {
|
||||||
|
- int flt; const char* flt_str;
|
||||||
|
-#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
|
||||||
|
- int sx[2];
|
||||||
|
-};
|
||||||
|
-
|
||||||
|
-
|
||||||
|
/* Initialize loongarch_target from separate option variables. */
|
||||||
|
void
|
||||||
|
loongarch_init_target (struct loongarch_target *target,
|
||||||
|
@@ -56,7 +49,14 @@ void
|
||||||
|
loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||||
|
struct gcc_options *opts,
|
||||||
|
struct gcc_options *opts_set);
|
||||||
|
+#endif
|
||||||
|
|
||||||
|
+/* Flag status */
|
||||||
|
+struct loongarch_flags {
|
||||||
|
+ int flt; const char* flt_str;
|
||||||
|
+#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
|
||||||
|
+ int sx[2];
|
||||||
|
+};
|
||||||
|
|
||||||
|
/* Macros for common conditional expressions used in loongarch.{c,h,md} */
|
||||||
|
#define TARGET_CMODEL_NORMAL (la_target.cmodel == CMODEL_NORMAL)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,30 @@
|
|||||||
|
From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Wed, 13 Dec 2023 02:39:35 +0800
|
||||||
|
Subject: [PATCH 069/188] LoongArch: testsuite: Remove XFAIL in
|
||||||
|
vect-ftint-no-inexact.c
|
||||||
|
|
||||||
|
After r14-6455 this no longer fails.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove.
|
||||||
|
---
|
||||||
|
gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
|
||||||
|
index 83d268099..61918beef 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
|
||||||
|
@@ -39,6 +39,5 @@
|
||||||
|
/* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */
|
||||||
|
/* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */
|
||||||
|
|
||||||
|
-/* trunc: XFAIL due to PR 107723 */
|
||||||
|
-/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */
|
||||||
|
+/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */
|
||||||
|
/* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,44 @@
|
|||||||
|
From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 9 Dec 2023 15:27:28 +0800
|
||||||
|
Subject: [PATCH 070/188] LoongArch: Include rtl.h for COSTS_N_INSNS instead of
|
||||||
|
hard coding our own
|
||||||
|
|
||||||
|
With loongarch-def.cc switched from C to C++, we can include rtl.h for
|
||||||
|
COSTS_N_INSNS, instead of hard coding our own.
|
||||||
|
|
||||||
|
THis is a non-functional change for now, but it will make the code more
|
||||||
|
future-proof in case COSTS_N_INSNS in rtl.h would be changed.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-def.cc (rtl.h): Include.
|
||||||
|
(COSTS_N_INSNS): Remove the macro definition.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 3 +--
|
||||||
|
1 file changed, 1 insertion(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
index c41804a18..6217b1926 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3. If not see
|
||||||
|
#include "system.h"
|
||||||
|
#include "coretypes.h"
|
||||||
|
#include "tm.h"
|
||||||
|
+#include "rtl.h"
|
||||||
|
|
||||||
|
#include "loongarch-def.h"
|
||||||
|
#include "loongarch-str.h"
|
||||||
|
@@ -89,8 +90,6 @@ array_tune<loongarch_align> loongarch_cpu_align =
|
||||||
|
.set (CPU_LA464, la464_align ())
|
||||||
|
.set (CPU_LA664, la464_align ());
|
||||||
|
|
||||||
|
-#define COSTS_N_INSNS(N) ((N) * 4)
|
||||||
|
-
|
||||||
|
/* Default RTX cost initializer. */
|
||||||
|
loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
||||||
|
: fp_add (COSTS_N_INSNS (1)),
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
165
0071-LoongArch-Fix-instruction-costs-PR112936.patch
Normal file
165
0071-LoongArch-Fix-instruction-costs-PR112936.patch
Normal file
@ -0,0 +1,165 @@
|
|||||||
|
From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 9 Dec 2023 17:41:32 +0800
|
||||||
|
Subject: [PATCH 071/188] LoongArch: Fix instruction costs [PR112936]
|
||||||
|
|
||||||
|
Replace the instruction costs in loongarch_rtx_cost_data constructor
|
||||||
|
based on micro-benchmark results on LA464 and LA664.
|
||||||
|
|
||||||
|
This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
|
||||||
|
and slli.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/112936
|
||||||
|
* config/loongarch/loongarch-def.cc
|
||||||
|
(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
|
||||||
|
instruction costs per micro-benchmark results.
|
||||||
|
(loongarch_rtx_cost_optimize_size): Set all instruction costs
|
||||||
|
to (COSTS_N_INSNS (1) + 1).
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
|
||||||
|
special case for multiplication when optimizing for size.
|
||||||
|
Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
|
||||||
|
Account the extra cost when TARGET_CHECK_ZERO_DIV and
|
||||||
|
optimizing for speed.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog
|
||||||
|
|
||||||
|
PR target/112936
|
||||||
|
* gcc.target/loongarch/mul-const-reduction.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 39 ++++++++++---------
|
||||||
|
gcc/config/loongarch/loongarch.cc | 22 +++++------
|
||||||
|
.../loongarch/mul-const-reduction.c | 11 ++++++
|
||||||
|
3 files changed, 43 insertions(+), 29 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
index 6217b1926..4a8885e83 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align =
|
||||||
|
|
||||||
|
/* Default RTX cost initializer. */
|
||||||
|
loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
||||||
|
- : fp_add (COSTS_N_INSNS (1)),
|
||||||
|
- fp_mult_sf (COSTS_N_INSNS (2)),
|
||||||
|
- fp_mult_df (COSTS_N_INSNS (4)),
|
||||||
|
- fp_div_sf (COSTS_N_INSNS (6)),
|
||||||
|
+ : fp_add (COSTS_N_INSNS (5)),
|
||||||
|
+ fp_mult_sf (COSTS_N_INSNS (5)),
|
||||||
|
+ fp_mult_df (COSTS_N_INSNS (5)),
|
||||||
|
+ fp_div_sf (COSTS_N_INSNS (8)),
|
||||||
|
fp_div_df (COSTS_N_INSNS (8)),
|
||||||
|
- int_mult_si (COSTS_N_INSNS (1)),
|
||||||
|
- int_mult_di (COSTS_N_INSNS (1)),
|
||||||
|
- int_div_si (COSTS_N_INSNS (4)),
|
||||||
|
- int_div_di (COSTS_N_INSNS (6)),
|
||||||
|
+ int_mult_si (COSTS_N_INSNS (4)),
|
||||||
|
+ int_mult_di (COSTS_N_INSNS (4)),
|
||||||
|
+ int_div_si (COSTS_N_INSNS (5)),
|
||||||
|
+ int_div_di (COSTS_N_INSNS (5)),
|
||||||
|
branch_cost (6),
|
||||||
|
memory_latency (4) {}
|
||||||
|
|
||||||
|
@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
||||||
|
array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
|
||||||
|
array_tune<loongarch_rtx_cost_data> ();
|
||||||
|
|
||||||
|
-/* RTX costs to use when optimizing for size. */
|
||||||
|
+/* RTX costs to use when optimizing for size.
|
||||||
|
+ We use a value slightly larger than COSTS_N_INSNS (1) for all of them
|
||||||
|
+ because they are slower than simple instructions. */
|
||||||
|
+#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1)
|
||||||
|
const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
|
||||||
|
loongarch_rtx_cost_data ()
|
||||||
|
- .fp_add_ (4)
|
||||||
|
- .fp_mult_sf_ (4)
|
||||||
|
- .fp_mult_df_ (4)
|
||||||
|
- .fp_div_sf_ (4)
|
||||||
|
- .fp_div_df_ (4)
|
||||||
|
- .int_mult_si_ (4)
|
||||||
|
- .int_mult_di_ (4)
|
||||||
|
- .int_div_si_ (4)
|
||||||
|
- .int_div_di_ (4);
|
||||||
|
+ .fp_add_ (COST_COMPLEX_INSN)
|
||||||
|
+ .fp_mult_sf_ (COST_COMPLEX_INSN)
|
||||||
|
+ .fp_mult_df_ (COST_COMPLEX_INSN)
|
||||||
|
+ .fp_div_sf_ (COST_COMPLEX_INSN)
|
||||||
|
+ .fp_div_df_ (COST_COMPLEX_INSN)
|
||||||
|
+ .int_mult_si_ (COST_COMPLEX_INSN)
|
||||||
|
+ .int_mult_di_ (COST_COMPLEX_INSN)
|
||||||
|
+ .int_div_si_ (COST_COMPLEX_INSN)
|
||||||
|
+ .int_div_di_ (COST_COMPLEX_INSN);
|
||||||
|
|
||||||
|
array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
|
||||||
|
.set (CPU_NATIVE, 4)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 4362149ef..afbb55390 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
||||||
|
*total = (speed
|
||||||
|
? loongarch_cost->int_mult_si * 3 + 6
|
||||||
|
: COSTS_N_INSNS (7));
|
||||||
|
- else if (!speed)
|
||||||
|
- *total = COSTS_N_INSNS (1) + 1;
|
||||||
|
else if (mode == DImode)
|
||||||
|
*total = loongarch_cost->int_mult_di;
|
||||||
|
else
|
||||||
|
@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
||||||
|
|
||||||
|
case UDIV:
|
||||||
|
case UMOD:
|
||||||
|
- if (!speed)
|
||||||
|
- {
|
||||||
|
- *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
|
||||||
|
- }
|
||||||
|
- else if (mode == DImode)
|
||||||
|
+ if (mode == DImode)
|
||||||
|
*total = loongarch_cost->int_div_di;
|
||||||
|
else
|
||||||
|
- *total = loongarch_cost->int_div_si;
|
||||||
|
+ {
|
||||||
|
+ *total = loongarch_cost->int_div_si;
|
||||||
|
+ if (TARGET_64BIT && !TARGET_DIV32)
|
||||||
|
+ *total += COSTS_N_INSNS (2);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (TARGET_CHECK_ZERO_DIV)
|
||||||
|
+ *total += COSTS_N_INSNS (2);
|
||||||
|
+
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case SIGN_EXTEND:
|
||||||
|
@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
||||||
|
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
|
||||||
|
== ZERO_EXTEND))))
|
||||||
|
{
|
||||||
|
- if (!speed)
|
||||||
|
- *total = COSTS_N_INSNS (1) + 1;
|
||||||
|
- else if (mode == DImode)
|
||||||
|
+ if (mode == DImode)
|
||||||
|
*total = loongarch_cost->int_mult_di;
|
||||||
|
else
|
||||||
|
*total = loongarch_cost->int_mult_si;
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..02d9a4876
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mtune=la464" } */
|
||||||
|
+/* { dg-final { scan-assembler "alsl\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler "slli\.w" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "mul\.w" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+test (int a)
|
||||||
|
+{
|
||||||
|
+ return a * 68;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
53
0072-LoongArch-Add-alslsi3_extend.patch
Normal file
53
0072-LoongArch-Add-alslsi3_extend.patch
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 9 Dec 2023 18:02:35 +0800
|
||||||
|
Subject: [PATCH 072/188] LoongArch: Add alslsi3_extend
|
||||||
|
|
||||||
|
Following the instruction cost fix, we are generating
|
||||||
|
|
||||||
|
alsl.w $a0, $a0, $a0, 4
|
||||||
|
|
||||||
|
instead of
|
||||||
|
|
||||||
|
li.w $t0, 17
|
||||||
|
mul.w $a0, $t0
|
||||||
|
|
||||||
|
for "x * 4", because alsl.w is 4 times faster than mul.w. But we didn't
|
||||||
|
have a sign-extending pattern for alsl.w, causing an extra slli.w
|
||||||
|
instruction generated to sign-extend $a0. Add the pattern to remove the
|
||||||
|
redundant extension.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (alslsi3_extend): New
|
||||||
|
define_insn.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 12 ++++++++++++
|
||||||
|
1 file changed, 12 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 222f1ae83..23368008e 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2874,6 +2874,18 @@
|
||||||
|
[(set_attr "type" "arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+(define_insn "alslsi3_extend"
|
||||||
|
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||||
|
+ (sign_extend:DI
|
||||||
|
+ (plus:SI
|
||||||
|
+ (ashift:SI (match_operand:SI 1 "register_operand" "r")
|
||||||
|
+ (match_operand 2 "const_immalsl_operand" ""))
|
||||||
|
+ (match_operand:SI 3 "register_operand" "r"))))]
|
||||||
|
+ ""
|
||||||
|
+ "alsl.w\t%0,%1,%3,%2"
|
||||||
|
+ [(set_attr "type" "arith")
|
||||||
|
+ (set_attr "mode" "SI")])
|
||||||
|
+
|
||||||
|
|
||||||
|
|
||||||
|
;; Reverse the order of bytes of operand 1 and store the result in operand 0.
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
224
0073-LoongArch-Add-support-for-D-frontend.patch
Normal file
224
0073-LoongArch-Add-support-for-D-frontend.patch
Normal file
@ -0,0 +1,224 @@
|
|||||||
|
From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001
|
||||||
|
From: liushuyu <liushuyu011@gmail.com>
|
||||||
|
Date: Mon, 18 Dec 2023 09:52:07 +0800
|
||||||
|
Subject: [PATCH 073/188] LoongArch: Add support for D frontend.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.gcc: Add loongarch-d.o to d_target_objs for LoongArch
|
||||||
|
architecture.
|
||||||
|
* config/loongarch/t-loongarch: Add object target for loongarch-d.cc.
|
||||||
|
* config/loongarch/loongarch-d.cc
|
||||||
|
(loongarch_d_target_versions): add interface function to define builtin
|
||||||
|
D versions for LoongArch architecture.
|
||||||
|
(loongarch_d_handle_target_float_abi): add interface function to define
|
||||||
|
builtin D traits for LoongArch architecture.
|
||||||
|
(loongarch_d_register_target_info): add interface function to register
|
||||||
|
loongarch_d_handle_target_float_abi function.
|
||||||
|
* config/loongarch/loongarch-d.h
|
||||||
|
(loongarch_d_target_versions): add function prototype.
|
||||||
|
(loongarch_d_register_target_info): Likewise.
|
||||||
|
|
||||||
|
libphobos/ChangeLog:
|
||||||
|
|
||||||
|
* configure.tgt: Enable libphobos for LoongArch architecture.
|
||||||
|
* libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for
|
||||||
|
LoongArch64.
|
||||||
|
* libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for
|
||||||
|
LoongArch64.
|
||||||
|
---
|
||||||
|
gcc/config.gcc | 1 +
|
||||||
|
gcc/config/loongarch/loongarch-d.cc | 77 ++++++++++++++++++++++
|
||||||
|
gcc/config/loongarch/loongarch-d.h | 26 ++++++++
|
||||||
|
gcc/config/loongarch/t-loongarch | 4 ++
|
||||||
|
libphobos/configure.tgt | 3 +
|
||||||
|
libphobos/libdruntime/gcc/sections/elf.d | 2 +
|
||||||
|
libphobos/libdruntime/gcc/unwind/generic.d | 1 +
|
||||||
|
7 files changed, 114 insertions(+)
|
||||||
|
create mode 100644 gcc/config/loongarch/loongarch-d.cc
|
||||||
|
create mode 100644 gcc/config/loongarch/loongarch-d.h
|
||||||
|
|
||||||
|
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||||
|
index 11ab620d0..039187fa2 100644
|
||||||
|
--- a/gcc/config.gcc
|
||||||
|
+++ b/gcc/config.gcc
|
||||||
|
@@ -456,6 +456,7 @@ mips*-*-*)
|
||||||
|
;;
|
||||||
|
loongarch*-*-*)
|
||||||
|
cpu_type=loongarch
|
||||||
|
+ d_target_objs="loongarch-d.o"
|
||||||
|
extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
|
||||||
|
extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
|
||||||
|
extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..9ac483c39
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-d.cc
|
||||||
|
@@ -0,0 +1,77 @@
|
||||||
|
+/* Subroutines for the D front end on the LoongArch architecture.
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#define IN_TARGET_CODE 1
|
||||||
|
+
|
||||||
|
+#include "config.h"
|
||||||
|
+#include "system.h"
|
||||||
|
+#include "coretypes.h"
|
||||||
|
+#include "tm_d.h"
|
||||||
|
+#include "d/d-target.h"
|
||||||
|
+#include "d/d-target-def.h"
|
||||||
|
+
|
||||||
|
+/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+loongarch_d_target_versions (void)
|
||||||
|
+{
|
||||||
|
+ if (TARGET_64BIT)
|
||||||
|
+ d_add_builtin_version ("LoongArch64");
|
||||||
|
+ else
|
||||||
|
+ d_add_builtin_version ("LoongArch32");
|
||||||
|
+
|
||||||
|
+ if (TARGET_HARD_FLOAT_ABI)
|
||||||
|
+ {
|
||||||
|
+ d_add_builtin_version ("LoongArch_HardFloat");
|
||||||
|
+ d_add_builtin_version ("D_HardFloat");
|
||||||
|
+ }
|
||||||
|
+ else if (TARGET_SOFT_FLOAT_ABI)
|
||||||
|
+ {
|
||||||
|
+ d_add_builtin_version ("LoongArch_SoftFloat");
|
||||||
|
+ d_add_builtin_version ("D_SoftFloat");
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Handle a call to `__traits(getTargetInfo, "floatAbi")'. */
|
||||||
|
+
|
||||||
|
+static tree
|
||||||
|
+loongarch_d_handle_target_float_abi (void)
|
||||||
|
+{
|
||||||
|
+ const char *abi;
|
||||||
|
+
|
||||||
|
+ if (TARGET_HARD_FLOAT_ABI)
|
||||||
|
+ abi = "hard";
|
||||||
|
+ else if (TARGET_SOFT_FLOAT_ABI)
|
||||||
|
+ abi = "soft";
|
||||||
|
+ else
|
||||||
|
+ abi = "";
|
||||||
|
+
|
||||||
|
+ return build_string_literal (strlen (abi) + 1, abi);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO. */
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+loongarch_d_register_target_info (void)
|
||||||
|
+{
|
||||||
|
+ const struct d_target_info_spec handlers[] = {
|
||||||
|
+ {"floatAbi", loongarch_d_handle_target_float_abi},
|
||||||
|
+ {NULL, NULL},
|
||||||
|
+ };
|
||||||
|
+
|
||||||
|
+ d_add_target_info_handlers (handlers);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a2fb8d51d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-d.h
|
||||||
|
@@ -0,0 +1,26 @@
|
||||||
|
+/* Definitions for the D front end on the LoongArch architecture.
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify
|
||||||
|
+it under the terms of the GNU General Public License as published by
|
||||||
|
+the Free Software Foundation; either version 3, or (at your option)
|
||||||
|
+any later version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful,
|
||||||
|
+but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
+GNU General Public License for more details.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License
|
||||||
|
+along with GCC; see the file COPYING3. If not see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+/* Defined in loongarch-d.cc */
|
||||||
|
+extern void
|
||||||
|
+loongarch_d_target_versions (void);
|
||||||
|
+extern void
|
||||||
|
+loongarch_d_register_target_info (void);
|
||||||
|
+
|
||||||
|
+/* Target hooks for D language. */
|
||||||
|
+#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions
|
||||||
|
+#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info
|
||||||
|
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
|
||||||
|
index a1a40431f..994f4d19c 100644
|
||||||
|
--- a/gcc/config/loongarch/t-loongarch
|
||||||
|
+++ b/gcc/config/loongarch/t-loongarch
|
||||||
|
@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
|
||||||
|
loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H)
|
||||||
|
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
|
||||||
|
|
||||||
|
+loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc
|
||||||
|
+ $(COMPILE) $<
|
||||||
|
+ $(POSTCOMPILE)
|
||||||
|
+
|
||||||
|
$(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
|
||||||
|
s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
|
||||||
|
$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
|
||||||
|
diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt
|
||||||
|
index 0063dd232..dcb1551cd 100644
|
||||||
|
--- a/libphobos/configure.tgt
|
||||||
|
+++ b/libphobos/configure.tgt
|
||||||
|
@@ -36,6 +36,9 @@ case "${target}" in
|
||||||
|
hppa-*-linux*)
|
||||||
|
LIBPHOBOS_SUPPORTED=yes
|
||||||
|
;;
|
||||||
|
+ loongarch*-*-linux*)
|
||||||
|
+ LIBPHOBOS_SUPPORTED=yes
|
||||||
|
+ ;;
|
||||||
|
mips*-*-linux*)
|
||||||
|
LIBPHOBOS_SUPPORTED=yes
|
||||||
|
;;
|
||||||
|
diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d
|
||||||
|
index 5819811f3..bc993ea49 100644
|
||||||
|
--- a/libphobos/libdruntime/gcc/sections/elf.d
|
||||||
|
+++ b/libphobos/libdruntime/gcc/sections/elf.d
|
||||||
|
@@ -1061,6 +1061,8 @@ else version (MIPS64)
|
||||||
|
enum TLS_DTV_OFFSET = 0x8000;
|
||||||
|
else version (IBMZ_Any)
|
||||||
|
enum TLS_DTV_OFFSET = 0x0;
|
||||||
|
+else version (LoongArch64)
|
||||||
|
+ enum TLS_DTV_OFFSET = 0x0;
|
||||||
|
else
|
||||||
|
static assert( false, "Platform not supported." );
|
||||||
|
|
||||||
|
diff --git a/libphobos/libdruntime/gcc/unwind/generic.d b/libphobos/libdruntime/gcc/unwind/generic.d
|
||||||
|
index 929b75dc7..8e5db80e1 100644
|
||||||
|
--- a/libphobos/libdruntime/gcc/unwind/generic.d
|
||||||
|
+++ b/libphobos/libdruntime/gcc/unwind/generic.d
|
||||||
|
@@ -141,6 +141,7 @@ else version (SPARC64) private enum __aligned__ = 16;
|
||||||
|
else version (SystemZ) private enum __aligned__ = 8;
|
||||||
|
else version (X86) private enum __aligned__ = 16;
|
||||||
|
else version (X86_64) private enum __aligned__ = 16;
|
||||||
|
+else version (LoongArch64) private enum __aligned__ = 16;
|
||||||
|
else static assert( false, "Platform not supported.");
|
||||||
|
|
||||||
|
align(__aligned__) struct _Unwind_Exception
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
156
0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
Normal file
156
0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
Normal file
@ -0,0 +1,156 @@
|
|||||||
|
From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Yang Yujie <yangyujie@loongson.cn>
|
||||||
|
Date: Fri, 8 Dec 2023 18:09:41 +0800
|
||||||
|
Subject: [PATCH 074/188] libruntime: Add fiber context switch code for
|
||||||
|
LoongArch.
|
||||||
|
|
||||||
|
libphobos/ChangeLog:
|
||||||
|
|
||||||
|
* libdruntime/config/loongarch/switchcontext.S: New file.
|
||||||
|
---
|
||||||
|
.../config/loongarch/switchcontext.S | 133 ++++++++++++++++++
|
||||||
|
1 file changed, 133 insertions(+)
|
||||||
|
create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S
|
||||||
|
|
||||||
|
diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..edfb9b67e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/libphobos/libdruntime/config/loongarch/switchcontext.S
|
||||||
|
@@ -0,0 +1,133 @@
|
||||||
|
+/* LoongArch support code for fibers and multithreading.
|
||||||
|
+ Copyright (C) 2023 Free Software Foundation, Inc.
|
||||||
|
+
|
||||||
|
+This file is part of GCC.
|
||||||
|
+
|
||||||
|
+GCC is free software; you can redistribute it and/or modify it under
|
||||||
|
+the terms of the GNU General Public License as published by the Free
|
||||||
|
+Software Foundation; either version 3, or (at your option) any later
|
||||||
|
+version.
|
||||||
|
+
|
||||||
|
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
+for more details.
|
||||||
|
+
|
||||||
|
+Under Section 7 of GPL version 3, you are granted additional
|
||||||
|
+permissions described in the GCC Runtime Library Exception, version
|
||||||
|
+3.1, as published by the Free Software Foundation.
|
||||||
|
+
|
||||||
|
+You should have received a copy of the GNU General Public License and
|
||||||
|
+a copy of the GCC Runtime Library Exception along with this program;
|
||||||
|
+see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||||
|
+<http://www.gnu.org/licenses/>. */
|
||||||
|
+
|
||||||
|
+#include "../common/threadasm.S"
|
||||||
|
+
|
||||||
|
+/**
|
||||||
|
+ * Performs a context switch.
|
||||||
|
+ *
|
||||||
|
+ * $a0 - void** - ptr to old stack pointer
|
||||||
|
+ * $a1 - void* - new stack pointer
|
||||||
|
+ *
|
||||||
|
+ */
|
||||||
|
+
|
||||||
|
+#if defined(__loongarch_lp64)
|
||||||
|
+# define GPR_L ld.d
|
||||||
|
+# define GPR_S st.d
|
||||||
|
+# define SZ_GPR 8
|
||||||
|
+# define ADDSP(si) addi.d $sp, $sp, si
|
||||||
|
+#elif defined(__loongarch64_ilp32)
|
||||||
|
+# define GPR_L ld.w
|
||||||
|
+# define GPR_S st.w
|
||||||
|
+# define SZ_GPR 4
|
||||||
|
+# define ADDSP(si) addi.w $sp, $sp, si
|
||||||
|
+#else
|
||||||
|
+# error Unsupported GPR size (must be 64-bit or 32-bit).
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#if defined(__loongarch_double_float)
|
||||||
|
+# define FPR_L fld.d
|
||||||
|
+# define FPR_S fst.d
|
||||||
|
+# define SZ_FPR 8
|
||||||
|
+#elif defined(__loongarch_single_float)
|
||||||
|
+# define FPR_L fld.s
|
||||||
|
+# define FPR_S fst.s
|
||||||
|
+# define SZ_FPR 4
|
||||||
|
+#else
|
||||||
|
+# define SZ_FPR 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ .text
|
||||||
|
+ .align 2
|
||||||
|
+ .global fiber_switchContext
|
||||||
|
+ .type fiber_switchContext, @function
|
||||||
|
+fiber_switchContext:
|
||||||
|
+ .cfi_startproc
|
||||||
|
+ ADDSP(-11 * SZ_GPR)
|
||||||
|
+
|
||||||
|
+ // fp regs and return address are stored below the stack
|
||||||
|
+ // because we don't want the GC to scan them.
|
||||||
|
+
|
||||||
|
+ // return address (r1)
|
||||||
|
+ GPR_S $r1, $sp, -SZ_GPR
|
||||||
|
+
|
||||||
|
+#if SZ_FPR != 0
|
||||||
|
+ // callee-saved scratch FPRs (f24-f31)
|
||||||
|
+ FPR_S $f24, $sp, -SZ_GPR-1*SZ_FPR
|
||||||
|
+ FPR_S $f25, $sp, -SZ_GPR-2*SZ_FPR
|
||||||
|
+ FPR_S $f26, $sp, -SZ_GPR-3*SZ_FPR
|
||||||
|
+ FPR_S $f27, $sp, -SZ_GPR-4*SZ_FPR
|
||||||
|
+ FPR_S $f28, $sp, -SZ_GPR-5*SZ_FPR
|
||||||
|
+ FPR_S $f29, $sp, -SZ_GPR-6*SZ_FPR
|
||||||
|
+ FPR_S $f30, $sp, -SZ_GPR-7*SZ_FPR
|
||||||
|
+ FPR_S $f31, $sp, -SZ_GPR-8*SZ_FPR
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ // callee-saved GPRs (r21, fp (r22), r23-r31)
|
||||||
|
+ GPR_S $r21, $sp, 0*SZ_GPR
|
||||||
|
+ GPR_S $fp, $sp, 1*SZ_GPR
|
||||||
|
+ GPR_S $s0, $sp, 2*SZ_GPR
|
||||||
|
+ GPR_S $s1, $sp, 3*SZ_GPR
|
||||||
|
+ GPR_S $s2, $sp, 4*SZ_GPR
|
||||||
|
+ GPR_S $s3, $sp, 5*SZ_GPR
|
||||||
|
+ GPR_S $s4, $sp, 6*SZ_GPR
|
||||||
|
+ GPR_S $s5, $sp, 7*SZ_GPR
|
||||||
|
+ GPR_S $s6, $sp, 8*SZ_GPR
|
||||||
|
+ GPR_S $s7, $sp, 9*SZ_GPR
|
||||||
|
+ GPR_S $s8, $sp, 10*SZ_GPR
|
||||||
|
+
|
||||||
|
+ // swap stack pointer
|
||||||
|
+ GPR_S $sp, $a0, 0
|
||||||
|
+ move $sp, $a1
|
||||||
|
+
|
||||||
|
+ GPR_L $r1, $sp, -SZ_GPR
|
||||||
|
+
|
||||||
|
+#if SZ_FPR != 0
|
||||||
|
+ FPR_L $f24, $sp, -SZ_GPR-1*SZ_FPR
|
||||||
|
+ FPR_L $f25, $sp, -SZ_GPR-2*SZ_FPR
|
||||||
|
+ FPR_L $f26, $sp, -SZ_GPR-3*SZ_FPR
|
||||||
|
+ FPR_L $f27, $sp, -SZ_GPR-4*SZ_FPR
|
||||||
|
+ FPR_L $f28, $sp, -SZ_GPR-5*SZ_FPR
|
||||||
|
+ FPR_L $f29, $sp, -SZ_GPR-6*SZ_FPR
|
||||||
|
+ FPR_L $f30, $sp, -SZ_GPR-7*SZ_FPR
|
||||||
|
+ FPR_L $f31, $sp, -SZ_GPR-8*SZ_FPR
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+ GPR_L $r21, $sp, 0*SZ_GPR
|
||||||
|
+ GPR_L $fp, $sp, 1*SZ_GPR
|
||||||
|
+ GPR_L $s0, $sp, 2*SZ_GPR
|
||||||
|
+ GPR_L $s1, $sp, 3*SZ_GPR
|
||||||
|
+ GPR_L $s2, $sp, 4*SZ_GPR
|
||||||
|
+ GPR_L $s3, $sp, 5*SZ_GPR
|
||||||
|
+ GPR_L $s4, $sp, 6*SZ_GPR
|
||||||
|
+ GPR_L $s5, $sp, 7*SZ_GPR
|
||||||
|
+ GPR_L $s6, $sp, 8*SZ_GPR
|
||||||
|
+ GPR_L $s7, $sp, 9*SZ_GPR
|
||||||
|
+ GPR_L $s8, $sp, 10*SZ_GPR
|
||||||
|
+
|
||||||
|
+ ADDSP(11 * SZ_GPR)
|
||||||
|
+
|
||||||
|
+ jr $r1 // return
|
||||||
|
+ .cfi_endproc
|
||||||
|
+ .size fiber_switchContext,.-fiber_switchContext
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
866
0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
Normal file
866
0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
Normal file
@ -0,0 +1,866 @@
|
|||||||
|
From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 17 Dec 2023 01:09:20 +0800
|
||||||
|
Subject: [PATCH 075/188] LoongArch: Fix FP vector comparsons [PR113034]
|
||||||
|
|
||||||
|
We had the following mappings between <x>vfcmp submenmonics and RTX
|
||||||
|
codes:
|
||||||
|
|
||||||
|
(define_code_attr fcc
|
||||||
|
[(unordered "cun")
|
||||||
|
(ordered "cor")
|
||||||
|
(eq "ceq")
|
||||||
|
(ne "cne")
|
||||||
|
(uneq "cueq")
|
||||||
|
(unle "cule")
|
||||||
|
(unlt "cult")
|
||||||
|
(le "cle")
|
||||||
|
(lt "clt")])
|
||||||
|
|
||||||
|
This is inconsistent with scalar code:
|
||||||
|
|
||||||
|
(define_code_attr fcond [(unordered "cun")
|
||||||
|
(uneq "cueq")
|
||||||
|
(unlt "cult")
|
||||||
|
(unle "cule")
|
||||||
|
(eq "ceq")
|
||||||
|
(lt "slt")
|
||||||
|
(le "sle")
|
||||||
|
(ordered "cor")
|
||||||
|
(ltgt "sne")
|
||||||
|
(ne "cune")
|
||||||
|
(ge "sge")
|
||||||
|
(gt "sgt")
|
||||||
|
(unge "cuge")
|
||||||
|
(ungt "cugt")])
|
||||||
|
|
||||||
|
For every RTX code for which the LSX/LASX code is different from the
|
||||||
|
scalar code, the scalar code is correct and the LSX/LASX code is wrong.
|
||||||
|
Most seriously, the RTX code NE should be mapped to "cneq", not "cne".
|
||||||
|
Rewrite <x>vfcmp define_insns in simd.md using the same mapping as
|
||||||
|
scalar fcmp.
|
||||||
|
|
||||||
|
Note that GAS does not support [x]vfcmp.{c/s}[u]{ge/gt} (pseudo)
|
||||||
|
instruction (although fcmp.{c/s}[u]{ge/gt} is supported), so we need to
|
||||||
|
switch the order of inputs and use [x]vfcmp.{c/s}[u]{le/lt} instead.
|
||||||
|
|
||||||
|
The <x>vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single
|
||||||
|
RTX code, but they can be modeled as an inversed RTX code following a
|
||||||
|
"not" operation. Doing so allows the compiler to optimized vectorized
|
||||||
|
__builtin_isless etc. to a single instruction. This optimization should
|
||||||
|
be added for scalar code too and I'll do it later.
|
||||||
|
|
||||||
|
Tests are added for mapping between C code, IEC 60559 operations, and
|
||||||
|
vfcmp instructions.
|
||||||
|
|
||||||
|
[1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113034
|
||||||
|
* config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove.
|
||||||
|
(lasx_xvfcmp_caf_<flasxfmt>): Remove.
|
||||||
|
(lasx_xvfcmp_cune_<FLASX:flasxfmt>): Remove.
|
||||||
|
(FSC256_UNS): Remove.
|
||||||
|
(fsc256): Remove.
|
||||||
|
(lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>): Remove.
|
||||||
|
(lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>): Remove.
|
||||||
|
* config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove.
|
||||||
|
(lsx_vfcmp_caf_<flsxfmt>): Remove.
|
||||||
|
(lsx_vfcmp_cune_<FLSX:flsxfmt>): Remove.
|
||||||
|
(vfcond): Remove.
|
||||||
|
(fcc): Remove.
|
||||||
|
(FSC_UNS): Remove.
|
||||||
|
(fsc): Remove.
|
||||||
|
(lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>): Remove.
|
||||||
|
(lsx_vfcmp_<fsc>_<FLSX:flsxfmt>): Remove.
|
||||||
|
* config/loongarch/simd.md
|
||||||
|
(fcond_simd): New define_code_iterator.
|
||||||
|
(<simd_isa>_<x>vfcmp_<fcond:fcond_simd>_<simdfmt>):
|
||||||
|
New define_insn.
|
||||||
|
(fcond_simd_rev): New define_code_iterator.
|
||||||
|
(fcond_rev_asm): New define_code_attr.
|
||||||
|
(<simd_isa>_<x>vfcmp_<fcond:fcond_simd_rev>_<simdfmt>):
|
||||||
|
New define_insn.
|
||||||
|
(fcond_inv): New define_code_iterator.
|
||||||
|
(fcond_inv_rev): New define_code_iterator.
|
||||||
|
(fcond_inv_rev_asm): New define_code_attr.
|
||||||
|
(<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>): New define_insn.
|
||||||
|
(<simd_isa>_<x>vfcmp_<fcond_inv:fcond_inv_rev>_<simdfmt>):
|
||||||
|
New define_insn.
|
||||||
|
(UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF,
|
||||||
|
UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN,
|
||||||
|
UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE,
|
||||||
|
UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs.
|
||||||
|
(SIMD_FCMP): New define_int_iterator.
|
||||||
|
(fcond_unspec): New define_int_attr.
|
||||||
|
(<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>): New define_insn.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp):
|
||||||
|
Remove unneeded special cases.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113034
|
||||||
|
* gcc.target/loongarch/vfcmp-f.c: New test.
|
||||||
|
* gcc.target/loongarch/vfcmp-d.c: New test.
|
||||||
|
* gcc.target/loongarch/xvfcmp-f.c: New test.
|
||||||
|
* gcc.target/loongarch/xvfcmp-d.c: New test.
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune
|
||||||
|
instead of cne.
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 76 --------
|
||||||
|
gcc/config/loongarch/loongarch.cc | 60 +-----
|
||||||
|
gcc/config/loongarch/lsx.md | 83 --------
|
||||||
|
gcc/config/loongarch/simd.md | 118 ++++++++++++
|
||||||
|
.../loongarch/vector/lasx/lasx-vcond-2.c | 4 +-
|
||||||
|
.../loongarch/vector/lsx/lsx-vcond-2.c | 4 +-
|
||||||
|
gcc/testsuite/gcc.target/loongarch/vfcmp-d.c | 28 +++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/vfcmp-f.c | 178 ++++++++++++++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c | 29 +++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c | 27 +++
|
||||||
|
10 files changed, 385 insertions(+), 222 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index eeac8cd98..921ce0eeb 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -32,9 +32,7 @@
|
||||||
|
UNSPEC_LASX_XVBITREVI
|
||||||
|
UNSPEC_LASX_XVBITSET
|
||||||
|
UNSPEC_LASX_XVBITSETI
|
||||||
|
- UNSPEC_LASX_XVFCMP_CAF
|
||||||
|
UNSPEC_LASX_XVFCLASS
|
||||||
|
- UNSPEC_LASX_XVFCMP_CUNE
|
||||||
|
UNSPEC_LASX_XVFCVT
|
||||||
|
UNSPEC_LASX_XVFCVTH
|
||||||
|
UNSPEC_LASX_XVFCVTL
|
||||||
|
@@ -44,17 +42,6 @@
|
||||||
|
UNSPEC_LASX_XVFRINT
|
||||||
|
UNSPEC_LASX_XVFRSQRT
|
||||||
|
UNSPEC_LASX_XVFRSQRTE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SAF
|
||||||
|
- UNSPEC_LASX_XVFCMP_SEQ
|
||||||
|
- UNSPEC_LASX_XVFCMP_SLE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SLT
|
||||||
|
- UNSPEC_LASX_XVFCMP_SNE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SOR
|
||||||
|
- UNSPEC_LASX_XVFCMP_SUEQ
|
||||||
|
- UNSPEC_LASX_XVFCMP_SULE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SULT
|
||||||
|
- UNSPEC_LASX_XVFCMP_SUN
|
||||||
|
- UNSPEC_LASX_XVFCMP_SUNE
|
||||||
|
UNSPEC_LASX_XVFTINT_U
|
||||||
|
UNSPEC_LASX_XVCLO
|
||||||
|
UNSPEC_LASX_XVSAT_S
|
||||||
|
@@ -1481,69 +1468,6 @@
|
||||||
|
[(set_attr "type" "simd_fclass")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lasx_xvfcmp_caf_<flasxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLASX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVFCMP_CAF))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLASX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LASX_XVFCMP_CUNE))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-(define_int_iterator FSC256_UNS [UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN
|
||||||
|
- UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ
|
||||||
|
- UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ
|
||||||
|
- UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE
|
||||||
|
- UNSPEC_LASX_XVFCMP_SLT])
|
||||||
|
-
|
||||||
|
-(define_int_attr fsc256
|
||||||
|
- [(UNSPEC_LASX_XVFCMP_SAF "saf")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SUN "sun")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SOR "sor")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SEQ "seq")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SNE "sne")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SUEQ "sueq")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SUNE "sune")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SULE "sule")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SULT "sult")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SLE "sle")
|
||||||
|
- (UNSPEC_LASX_XVFCMP_SLT "slt")])
|
||||||
|
-
|
||||||
|
-(define_insn "lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
|
||||||
|
- (vfcond:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLASX 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvfcmp.<vfcond:fcc>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-
|
||||||
|
-(define_insn "lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLASX 2 "register_operand" "f")]
|
||||||
|
- FSC256_UNS))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
- "xvfcmp.<fsc256>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-
|
||||||
|
(define_mode_attr fint256
|
||||||
|
[(V8SF "v8si")
|
||||||
|
(V4DF "v4di")])
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index afbb55390..a22601d88 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -11156,7 +11156,6 @@ static void
|
||||||
|
loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
|
||||||
|
{
|
||||||
|
machine_mode cmp_mode = GET_MODE (op0);
|
||||||
|
- int unspec = -1;
|
||||||
|
bool negate = false;
|
||||||
|
|
||||||
|
switch (cmp_mode)
|
||||||
|
@@ -11198,66 +11197,9 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
|
||||||
|
|
||||||
|
case E_V4SFmode:
|
||||||
|
case E_V2DFmode:
|
||||||
|
- switch (cond)
|
||||||
|
- {
|
||||||
|
- case UNORDERED:
|
||||||
|
- case ORDERED:
|
||||||
|
- case EQ:
|
||||||
|
- case NE:
|
||||||
|
- case UNEQ:
|
||||||
|
- case UNLE:
|
||||||
|
- case UNLT:
|
||||||
|
- break;
|
||||||
|
- case LTGT: cond = NE; break;
|
||||||
|
- case UNGE: cond = UNLE; std::swap (op0, op1); break;
|
||||||
|
- case UNGT: cond = UNLT; std::swap (op0, op1); break;
|
||||||
|
- case LE: unspec = UNSPEC_LSX_VFCMP_SLE; break;
|
||||||
|
- case LT: unspec = UNSPEC_LSX_VFCMP_SLT; break;
|
||||||
|
- case GE: unspec = UNSPEC_LSX_VFCMP_SLE; std::swap (op0, op1); break;
|
||||||
|
- case GT: unspec = UNSPEC_LSX_VFCMP_SLT; std::swap (op0, op1); break;
|
||||||
|
- default:
|
||||||
|
- gcc_unreachable ();
|
||||||
|
- }
|
||||||
|
- if (unspec < 0)
|
||||||
|
- loongarch_emit_binary (cond, dest, op0, op1);
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
|
||||||
|
- gen_rtvec (2, op0, op1), unspec);
|
||||||
|
- emit_insn (gen_rtx_SET (dest, x));
|
||||||
|
- }
|
||||||
|
- break;
|
||||||
|
-
|
||||||
|
case E_V8SFmode:
|
||||||
|
case E_V4DFmode:
|
||||||
|
- switch (cond)
|
||||||
|
- {
|
||||||
|
- case UNORDERED:
|
||||||
|
- case ORDERED:
|
||||||
|
- case EQ:
|
||||||
|
- case NE:
|
||||||
|
- case UNEQ:
|
||||||
|
- case UNLE:
|
||||||
|
- case UNLT:
|
||||||
|
- break;
|
||||||
|
- case LTGT: cond = NE; break;
|
||||||
|
- case UNGE: cond = UNLE; std::swap (op0, op1); break;
|
||||||
|
- case UNGT: cond = UNLT; std::swap (op0, op1); break;
|
||||||
|
- case LE: unspec = UNSPEC_LASX_XVFCMP_SLE; break;
|
||||||
|
- case LT: unspec = UNSPEC_LASX_XVFCMP_SLT; break;
|
||||||
|
- case GE: unspec = UNSPEC_LASX_XVFCMP_SLE; std::swap (op0, op1); break;
|
||||||
|
- case GT: unspec = UNSPEC_LASX_XVFCMP_SLT; std::swap (op0, op1); break;
|
||||||
|
- default:
|
||||||
|
- gcc_unreachable ();
|
||||||
|
- }
|
||||||
|
- if (unspec < 0)
|
||||||
|
- loongarch_emit_binary (cond, dest, op0, op1);
|
||||||
|
- else
|
||||||
|
- {
|
||||||
|
- rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
|
||||||
|
- gen_rtvec (2, op0, op1), unspec);
|
||||||
|
- emit_insn (gen_rtx_SET (dest, x));
|
||||||
|
- }
|
||||||
|
+ loongarch_emit_binary (cond, dest, op0, op1);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index dbdb42301..57e0ee3d4 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -34,9 +34,7 @@
|
||||||
|
UNSPEC_LSX_VBITSETI
|
||||||
|
UNSPEC_LSX_BRANCH_V
|
||||||
|
UNSPEC_LSX_BRANCH
|
||||||
|
- UNSPEC_LSX_VFCMP_CAF
|
||||||
|
UNSPEC_LSX_VFCLASS
|
||||||
|
- UNSPEC_LSX_VFCMP_CUNE
|
||||||
|
UNSPEC_LSX_VFCVT
|
||||||
|
UNSPEC_LSX_VFCVTH
|
||||||
|
UNSPEC_LSX_VFCVTL
|
||||||
|
@@ -46,17 +44,6 @@
|
||||||
|
UNSPEC_LSX_VFRINT
|
||||||
|
UNSPEC_LSX_VFRSQRT
|
||||||
|
UNSPEC_LSX_VFRSQRTE
|
||||||
|
- UNSPEC_LSX_VFCMP_SAF
|
||||||
|
- UNSPEC_LSX_VFCMP_SEQ
|
||||||
|
- UNSPEC_LSX_VFCMP_SLE
|
||||||
|
- UNSPEC_LSX_VFCMP_SLT
|
||||||
|
- UNSPEC_LSX_VFCMP_SNE
|
||||||
|
- UNSPEC_LSX_VFCMP_SOR
|
||||||
|
- UNSPEC_LSX_VFCMP_SUEQ
|
||||||
|
- UNSPEC_LSX_VFCMP_SULE
|
||||||
|
- UNSPEC_LSX_VFCMP_SULT
|
||||||
|
- UNSPEC_LSX_VFCMP_SUN
|
||||||
|
- UNSPEC_LSX_VFCMP_SUNE
|
||||||
|
UNSPEC_LSX_VFTINT_U
|
||||||
|
UNSPEC_LSX_VSAT_S
|
||||||
|
UNSPEC_LSX_VSAT_U
|
||||||
|
@@ -1377,76 +1364,6 @@
|
||||||
|
[(set_attr "type" "simd_fclass")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "lsx_vfcmp_caf_<flsxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLSX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VFCMP_CAF))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vfcmp.caf.<flsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_insn "lsx_vfcmp_cune_<FLSX:flsxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLSX 2 "register_operand" "f")]
|
||||||
|
- UNSPEC_LSX_VFCMP_CUNE))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vfcmp.cune.<FLSX:flsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_code_iterator vfcond [unordered ordered eq ne le lt uneq unle unlt])
|
||||||
|
-
|
||||||
|
-(define_code_attr fcc
|
||||||
|
- [(unordered "cun")
|
||||||
|
- (ordered "cor")
|
||||||
|
- (eq "ceq")
|
||||||
|
- (ne "cne")
|
||||||
|
- (uneq "cueq")
|
||||||
|
- (unle "cule")
|
||||||
|
- (unlt "cult")
|
||||||
|
- (le "cle")
|
||||||
|
- (lt "clt")])
|
||||||
|
-
|
||||||
|
-(define_int_iterator FSC_UNS [UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SUN UNSPEC_LSX_VFCMP_SOR
|
||||||
|
- UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SNE UNSPEC_LSX_VFCMP_SUEQ
|
||||||
|
- UNSPEC_LSX_VFCMP_SUNE UNSPEC_LSX_VFCMP_SULE UNSPEC_LSX_VFCMP_SULT
|
||||||
|
- UNSPEC_LSX_VFCMP_SLE UNSPEC_LSX_VFCMP_SLT])
|
||||||
|
-
|
||||||
|
-(define_int_attr fsc
|
||||||
|
- [(UNSPEC_LSX_VFCMP_SAF "saf")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SUN "sun")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SOR "sor")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SEQ "seq")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SNE "sne")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SUEQ "sueq")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SUNE "sune")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SULE "sule")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SULT "sult")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SLE "sle")
|
||||||
|
- (UNSPEC_LSX_VFCMP_SLT "slt")])
|
||||||
|
-
|
||||||
|
-(define_insn "lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
- (vfcond:<VIMODE> (match_operand:FLSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLSX 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vfcmp.<vfcond:fcc>.<FLSX:flsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
-(define_insn "lsx_vfcmp_<fsc>_<FLSX:flsxfmt>"
|
||||||
|
- [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
- (unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
|
||||||
|
- (match_operand:FLSX 2 "register_operand" "f")]
|
||||||
|
- FSC_UNS))]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
- "vfcmp.<fsc>.<FLSX:flsxfmt>\t%w0,%w1,%w2"
|
||||||
|
- [(set_attr "type" "simd_fcmp")
|
||||||
|
- (set_attr "mode" "<MODE>")])
|
||||||
|
-
|
||||||
|
(define_mode_attr fint
|
||||||
|
[(V4SF "v4si")
|
||||||
|
(V2DF "v2di")])
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index 843b1a41f..13202f79b 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -279,6 +279,124 @@
|
||||||
|
[(set_attr "type" "simd_int_arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; <x>vfcmp.*.{s/d} with defined RTX code
|
||||||
|
+;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have
|
||||||
|
+;; to reverse the operands ourselves :(.
|
||||||
|
+(define_code_iterator fcond_simd [unordered uneq unlt unle eq lt le
|
||||||
|
+ ordered ltgt ne])
|
||||||
|
+(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
|
||||||
|
+ [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
+ (fcond_simd:<VIMODE>
|
||||||
|
+ (match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f")))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vfcmp.<fcond>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_fcmp")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+;; There are no fcmp.{sge/sgt/cuge/cugt}.{s/d} menmonics in GAS, so we have
|
||||||
|
+;; to reverse the operands ourselves.
|
||||||
|
+(define_code_iterator fcond_simd_rev [ge gt unge ungt])
|
||||||
|
+
|
||||||
|
+(define_code_attr fcond_rev_asm
|
||||||
|
+ [(ge "sle")
|
||||||
|
+ (gt "slt")
|
||||||
|
+ (unge "cule")
|
||||||
|
+ (ungt "cult")])
|
||||||
|
+
|
||||||
|
+(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
|
||||||
|
+ [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
+ (fcond_simd_rev:<VIMODE>
|
||||||
|
+ (match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f")))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vfcmp.<fcond_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1";
|
||||||
|
+ [(set_attr "type" "simd_fcmp")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+;; <x>vfcmp.*.{s/d} without defined RTX code, but with defined RTX code for
|
||||||
|
+;; its inverse. Again, there are no fcmp.{sugt/suge/cgt/cge}.{s/d}
|
||||||
|
+;; menmonics in GAS, so we have to reverse the operands ourselves.
|
||||||
|
+(define_code_iterator fcond_inv [ge gt unge ungt])
|
||||||
|
+(define_code_iterator fcond_inv_rev [le lt unle unlt])
|
||||||
|
+(define_code_attr fcond_inv
|
||||||
|
+ [(ge "sult")
|
||||||
|
+ (gt "sule")
|
||||||
|
+ (unge "clt")
|
||||||
|
+ (ungt "cle")
|
||||||
|
+ (le "sugt")
|
||||||
|
+ (lt "suge")
|
||||||
|
+ (unle "cgt")
|
||||||
|
+ (unlt "cge")])
|
||||||
|
+(define_code_attr fcond_inv_rev_asm
|
||||||
|
+ [(le "sult")
|
||||||
|
+ (lt "sule")
|
||||||
|
+ (unle "clt")
|
||||||
|
+ (unlt "cle")])
|
||||||
|
+
|
||||||
|
+(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
|
||||||
|
+ [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
+ (not:<VIMODE>
|
||||||
|
+ (fcond_inv:<VIMODE>
|
||||||
|
+ (match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f"))))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vfcmp.<fcond_inv>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_fcmp")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
|
||||||
|
+ [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
+ (not:<VIMODE>
|
||||||
|
+ (fcond_inv_rev:<VIMODE>
|
||||||
|
+ (match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f"))))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vfcmp.<fcond_inv_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1"
|
||||||
|
+ [(set_attr "type" "simd_fcmp")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+;; <x>vfcmp.*.{s/d} instructions only as instrinsics
|
||||||
|
+(define_c_enum "unspec"
|
||||||
|
+ [UNSPEC_SIMD_FCMP_CAF
|
||||||
|
+ UNSPEC_SIMD_FCMP_SAF
|
||||||
|
+ UNSPEC_SIMD_FCMP_SEQ
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUN
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUEQ
|
||||||
|
+ UNSPEC_SIMD_FCMP_CNE
|
||||||
|
+ UNSPEC_SIMD_FCMP_SOR
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUNE])
|
||||||
|
+
|
||||||
|
+(define_int_iterator SIMD_FCMP
|
||||||
|
+ [UNSPEC_SIMD_FCMP_CAF
|
||||||
|
+ UNSPEC_SIMD_FCMP_SAF
|
||||||
|
+ UNSPEC_SIMD_FCMP_SEQ
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUN
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUEQ
|
||||||
|
+ UNSPEC_SIMD_FCMP_CNE
|
||||||
|
+ UNSPEC_SIMD_FCMP_SOR
|
||||||
|
+ UNSPEC_SIMD_FCMP_SUNE])
|
||||||
|
+
|
||||||
|
+(define_int_attr fcond_unspec
|
||||||
|
+ [(UNSPEC_SIMD_FCMP_CAF "caf")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SAF "saf")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SEQ "seq")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SUN "sun")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SUEQ "sueq")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_CNE "cne")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SOR "sor")
|
||||||
|
+ (UNSPEC_SIMD_FCMP_SUNE "sune")])
|
||||||
|
+
|
||||||
|
+(define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
|
||||||
|
+ [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
|
||||||
|
+ (unspec:<VIMODE> [(match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f")]
|
||||||
|
+ SIMD_FCMP))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>vfcmp.<fcond_unspec>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_fcmp")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
; The LoongArch SX Instructions.
|
||||||
|
(include "lsx.md")
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
index 55d5a084c..f2f523622 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
|
||||||
|
@@ -69,8 +69,8 @@ TEST_CMP (nugt)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.d} 3 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
index 2214afd0a..486bedba4 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
|
||||||
|
@@ -69,8 +69,8 @@ TEST_CMP (nugt)
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */
|
||||||
|
-/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.s} 3 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.d} 3 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */
|
||||||
|
/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..8b870ef38
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
|
||||||
|
+
|
||||||
|
+#define F double
|
||||||
|
+#define I long long
|
||||||
|
+
|
||||||
|
+#include "vfcmp-f.c"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..b9110b90c
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
|
||||||
|
@@ -0,0 +1,178 @@
|
||||||
|
+/* Test mapping IEC 60559 operations to SIMD instructions.
|
||||||
|
+ For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1. */
|
||||||
|
+
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
|
||||||
|
+
|
||||||
|
+#ifndef F
|
||||||
|
+#define F float
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef I
|
||||||
|
+#define I int
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef VL
|
||||||
|
+#define VL 16
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+typedef F VF __attribute__ ((vector_size (VL)));
|
||||||
|
+typedef I VI __attribute__ ((vector_size (VL)));
|
||||||
|
+
|
||||||
|
+register VF a asm ("f0");
|
||||||
|
+register VF b asm ("f1");
|
||||||
|
+register VI c asm ("f2");
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_equal (void)
|
||||||
|
+{
|
||||||
|
+ c = (a == b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_not_equal (void)
|
||||||
|
+{
|
||||||
|
+ c = (a != b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_greater (void)
|
||||||
|
+{
|
||||||
|
+ c = (a > b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_greater_equal (void)
|
||||||
|
+{
|
||||||
|
+ c = (a >= b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_less (void)
|
||||||
|
+{
|
||||||
|
+ c = (a < b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_less_equal (void)
|
||||||
|
+{
|
||||||
|
+ c = (a <= b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_not_greater (void)
|
||||||
|
+{
|
||||||
|
+ c = ~(a > b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_less_unordered (void)
|
||||||
|
+{
|
||||||
|
+ c = ~(a >= b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_not_less (void)
|
||||||
|
+{
|
||||||
|
+ c = ~(a < b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_signaling_greater_unordered (void)
|
||||||
|
+{
|
||||||
|
+ c = ~(a <= b);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_less (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isless (a[i], b[i]) ? -1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_less_equal (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_islessequal (a[i], b[i]) ? -1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_greater (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isgreater (a[i], b[i]) ? -1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_greater_equal (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isgreaterequal (a[i], b[i]) ? -1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_not_less (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isless (a[i], b[i]) ? 0 : -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_greater_unordered (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_islessequal (a[i], b[i]) ? 0 : -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_not_greater (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isgreater (a[i], b[i]) ? 0 : -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_less_unordered (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isgreaterequal (a[i], b[i]) ? 0 : -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_unordered (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isunordered (a[i], b[i]) ? -1 : 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+compare_quiet_ordered (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
|
||||||
|
+ c[i] = __builtin_isunordered (a[i], b[i]) ? 0 : -1;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* The "-<function_name>" matches the .size directive after the function
|
||||||
|
+ body, so we can ensure the instruction is in the correct function. */
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d8017caaa
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
|
||||||
|
@@ -0,0 +1,29 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
|
||||||
|
+
|
||||||
|
+#define F double
|
||||||
|
+#define I long long
|
||||||
|
+#define VL 32
|
||||||
|
+
|
||||||
|
+#include "vfcmp-f.c"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..b54556475
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
|
||||||
|
@@ -0,0 +1,27 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */
|
||||||
|
+
|
||||||
|
+#define VL 32
|
||||||
|
+
|
||||||
|
+#include "vfcmp-f.c"
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */
|
||||||
|
+/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
190
0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
Normal file
190
0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
Normal file
@ -0,0 +1,190 @@
|
|||||||
|
From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 19 Dec 2023 04:48:03 +0800
|
||||||
|
Subject: [PATCH 076/188] LoongArch: Use force_reg instead of gen_reg_rtx +
|
||||||
|
emit_move_insn in vec_init expander [PR113033]
|
||||||
|
|
||||||
|
Jakub says:
|
||||||
|
|
||||||
|
Then that seems like a bug in the loongarch vec_init pattern(s).
|
||||||
|
Those really don't have a predicate in any of the backends on the
|
||||||
|
input operand, so they need to force_reg it if it is something it
|
||||||
|
can't handle. I've looked e.g. at i386 vec_init and that is exactly
|
||||||
|
what it does, see the various tests + force_reg calls in
|
||||||
|
ix86_expand_vector_init*.
|
||||||
|
|
||||||
|
So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113033
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_expand_vector_init_same): Replace gen_reg_rtx +
|
||||||
|
emit_move_insn with force_reg.
|
||||||
|
(loongarch_expand_vector_init): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113033
|
||||||
|
* gcc.target/loongarch/pr113033.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 38 ++++++-------------
|
||||||
|
gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++
|
||||||
|
2 files changed, 35 insertions(+), 26 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index a22601d88..000d2d623 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
- temp = gen_reg_rtx (imode);
|
||||||
|
+
|
||||||
|
if (imode == GET_MODE (same))
|
||||||
|
temp2 = same;
|
||||||
|
else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
|
||||||
|
@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||||||
|
else
|
||||||
|
temp2 = lowpart_subreg (imode, same, GET_MODE (same));
|
||||||
|
}
|
||||||
|
- emit_move_insn (temp, temp2);
|
||||||
|
+
|
||||||
|
+ temp = force_reg (imode, temp2);
|
||||||
|
|
||||||
|
switch (vmode)
|
||||||
|
{
|
||||||
|
@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals)
|
||||||
|
to reduce the number of instructions. */
|
||||||
|
if (i == 1)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_hi[0]);
|
||||||
|
- op1 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op1, val_hi[1]);
|
||||||
|
+ op0 = force_reg (imode, val_hi[0]);
|
||||||
|
+ op1 = force_reg (imode, val_hi[1]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_repl2_256 (target_hi, op0, op1));
|
||||||
|
}
|
||||||
|
else if (i > 1)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_hi[i]);
|
||||||
|
+ op0 = force_reg (imode, val_hi[i]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
+ op0 = force_reg (imode, val_hi[i]);
|
||||||
|
/* Assign the lowest element of val_hi to all elements
|
||||||
|
of target_hi. */
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_hi[0]);
|
||||||
|
emit_insn (loongarch_vec_repl1_256 (target_hi, op0));
|
||||||
|
}
|
||||||
|
else if (!rtx_equal_p (val_hi[i], val_hi[0]))
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_hi[i]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
|
||||||
|
}
|
||||||
|
@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
|
||||||
|
}
|
||||||
|
if (!lo_same && !half_same)
|
||||||
|
{
|
||||||
|
+ op0 = force_reg (imode, val_lo[i]);
|
||||||
|
/* Assign the lowest element of val_lo to all elements
|
||||||
|
of target_lo. */
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_lo[0]);
|
||||||
|
emit_insn (loongarch_vec_repl1_128 (target_lo, op0));
|
||||||
|
}
|
||||||
|
else if (!rtx_equal_p (val_lo[i], val_lo[0]))
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val_lo[i]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_set128 (target_lo, op0, GEN_INT (i)));
|
||||||
|
}
|
||||||
|
@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals)
|
||||||
|
reduce the number of instructions. */
|
||||||
|
if (i == 1)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val[0]);
|
||||||
|
- op1 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op1, val[1]);
|
||||||
|
+ op0 = force_reg (imode, val[0]);
|
||||||
|
+ op1 = force_reg (imode, val[1]);
|
||||||
|
emit_insn (loongarch_vec_repl2_128 (target, op0, op1));
|
||||||
|
}
|
||||||
|
else if (i > 1)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val[i]);
|
||||||
|
+ op0 = force_reg (imode, val[i]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_set128 (target, op0, GEN_INT (i)));
|
||||||
|
}
|
||||||
|
@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
|
||||||
|
loongarch_vec_mirror (target, target, const0_rtx));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
+ op0 = force_reg (imode, val[i]);
|
||||||
|
/* Assign the lowest element of val to all elements of
|
||||||
|
target. */
|
||||||
|
if (i == 0)
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val[0]);
|
||||||
|
emit_insn (loongarch_vec_repl1_128 (target, op0));
|
||||||
|
}
|
||||||
|
else if (!rtx_equal_p (val[i], val[0]))
|
||||||
|
{
|
||||||
|
- op0 = gen_reg_rtx (imode);
|
||||||
|
- emit_move_insn (op0, val[i]);
|
||||||
|
emit_insn (
|
||||||
|
loongarch_vec_set128 (target, op0, GEN_INT (i)));
|
||||||
|
}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..4ccd037d8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c
|
||||||
|
@@ -0,0 +1,23 @@
|
||||||
|
+/* PR target/113033: ICE with vector left rotate */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx" } */
|
||||||
|
+
|
||||||
|
+typedef unsigned __attribute__ ((vector_size (16))) v4si;
|
||||||
|
+typedef unsigned __attribute__ ((vector_size (32))) v8si;
|
||||||
|
+typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
|
||||||
|
+typedef unsigned long long __attribute__ ((vector_size (32))) v4di;
|
||||||
|
+
|
||||||
|
+#define TEST(tp) \
|
||||||
|
+extern tp data_##tp; \
|
||||||
|
+tp \
|
||||||
|
+test_##tp (int x) \
|
||||||
|
+{ \
|
||||||
|
+ const int bit = sizeof (data_##tp[0]) * __CHAR_BIT__; \
|
||||||
|
+ data_##tp = data_##tp << (x & (bit - 1)) \
|
||||||
|
+ | data_##tp >> (bit - x & (bit - 1)); \
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+TEST (v4si)
|
||||||
|
+TEST (v8si)
|
||||||
|
+TEST (v2di)
|
||||||
|
+TEST (v4di)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
83
0077-LoongArch-Clean-up-vec_init-expander.patch
Normal file
83
0077-LoongArch-Clean-up-vec_init-expander.patch
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 19 Dec 2023 05:02:42 +0800
|
||||||
|
Subject: [PATCH 077/188] LoongArch: Clean up vec_init expander
|
||||||
|
|
||||||
|
Non functional change, clean up the code.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_expand_vector_init_same): Remove "temp2" and reuse
|
||||||
|
"temp" instead.
|
||||||
|
(loongarch_expand_vector_init): Use gcc_unreachable () instead
|
||||||
|
of gcc_assert (0), and fix the comment for it.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 18 +++++++++---------
|
||||||
|
1 file changed, 9 insertions(+), 9 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 000d2d623..3aeafeafd 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||||||
|
machine_mode vmode = GET_MODE (target);
|
||||||
|
machine_mode imode = GET_MODE_INNER (vmode);
|
||||||
|
rtx same = XVECEXP (vals, 0, 0);
|
||||||
|
- rtx temp, temp2;
|
||||||
|
+ rtx temp;
|
||||||
|
|
||||||
|
if (CONST_INT_P (same) && nvar == 0
|
||||||
|
&& loongarch_signed_immediate_p (INTVAL (same), 10, 0))
|
||||||
|
@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (imode == GET_MODE (same))
|
||||||
|
- temp2 = same;
|
||||||
|
+ temp = same;
|
||||||
|
else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
|
||||||
|
{
|
||||||
|
if (GET_CODE (same) == MEM)
|
||||||
|
{
|
||||||
|
rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
|
||||||
|
loongarch_emit_move (reg_tmp, same);
|
||||||
|
- temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
|
||||||
|
+ temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
|
||||||
|
+ temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
|
||||||
|
{
|
||||||
|
rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
|
||||||
|
loongarch_emit_move (reg_tmp, same);
|
||||||
|
- temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
|
||||||
|
+ temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- temp2 = lowpart_subreg (imode, same, GET_MODE (same));
|
||||||
|
+ temp = lowpart_subreg (imode, same, GET_MODE (same));
|
||||||
|
}
|
||||||
|
|
||||||
|
- temp = force_reg (imode, temp2);
|
||||||
|
+ temp = force_reg (imode, temp);
|
||||||
|
|
||||||
|
switch (vmode)
|
||||||
|
{
|
||||||
|
@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals)
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
- /* Loongson is the only cpu with vectors with more elements. */
|
||||||
|
- gcc_assert (0);
|
||||||
|
+ /* No LoongArch CPU supports vectors with more elements as at now. */
|
||||||
|
+ gcc_unreachable ();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,78 @@
|
|||||||
|
From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiahao Xu <xujiahao@loongson.cn>
|
||||||
|
Date: Thu, 14 Dec 2023 20:49:04 +0800
|
||||||
|
Subject: [PATCH 078/188] LoongArch: Fix incorrect code generation for sad
|
||||||
|
pattern
|
||||||
|
|
||||||
|
When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c
|
||||||
|
and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad
|
||||||
|
code. This patch to fixed them, for sad patterns, use zero expansion instead of sign
|
||||||
|
expansion for reduction.
|
||||||
|
|
||||||
|
Currently, we are fixing failed vectorized tests, and in the future, we will
|
||||||
|
enable more tests of "vect" for LoongArch.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md: Use zero expansion instruction.
|
||||||
|
* config/loongarch/lsx.md: Ditto.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 8 ++++----
|
||||||
|
gcc/config/loongarch/lsx.md | 8 ++++----
|
||||||
|
2 files changed, 8 insertions(+), 8 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 921ce0eeb..9ca3f9278 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -5021,8 +5021,8 @@
|
||||||
|
rtx t2 = gen_reg_rtx (V16HImode);
|
||||||
|
rtx t3 = gen_reg_rtx (V8SImode);
|
||||||
|
emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
|
||||||
|
- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
|
||||||
|
emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -5038,8 +5038,8 @@
|
||||||
|
rtx t2 = gen_reg_rtx (V16HImode);
|
||||||
|
rtx t3 = gen_reg_rtx (V8SImode);
|
||||||
|
emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
|
||||||
|
- emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
|
||||||
|
emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 57e0ee3d4..7f5fff40a 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -3385,8 +3385,8 @@
|
||||||
|
rtx t2 = gen_reg_rtx (V8HImode);
|
||||||
|
rtx t3 = gen_reg_rtx (V4SImode);
|
||||||
|
emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
|
||||||
|
- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
|
||||||
|
emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
@@ -3402,8 +3402,8 @@
|
||||||
|
rtx t2 = gen_reg_rtx (V8HImode);
|
||||||
|
rtx t3 = gen_reg_rtx (V4SImode);
|
||||||
|
emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
|
||||||
|
- emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
|
||||||
|
- emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
|
||||||
|
+ emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
|
||||||
|
emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,68 @@
|
|||||||
|
From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Wed, 13 Dec 2023 09:31:07 +0800
|
||||||
|
Subject: [PATCH 079/188] LoongArch: Modify the check type of the vector
|
||||||
|
builtin function.
|
||||||
|
|
||||||
|
On LoongArch architecture, using the latest gcc14 in regression test,
|
||||||
|
it is found that the vector test cases in vector directory appear FAIL
|
||||||
|
entries with unmatched pointer types. In order to solve this kind of
|
||||||
|
problem, the type of the variable in the check result is modified with
|
||||||
|
the parameter type defined in the vector builtin function.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/simd_correctness_check.h:The variable
|
||||||
|
types in the check results are modified in conjunction with the
|
||||||
|
parameter types defined in the vector builtin function.
|
||||||
|
---
|
||||||
|
.../loongarch/vector/simd_correctness_check.h | 13 +++++++------
|
||||||
|
1 file changed, 7 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||||
|
index eb7fbd59c..551340bd5 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||||
|
@@ -8,11 +8,12 @@
|
||||||
|
int fail = 0; \
|
||||||
|
for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||||
|
{ \
|
||||||
|
- long *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||||
|
+ long long *temp_ref = (long long *)&ref[i], \
|
||||||
|
+ *temp_res = (long long *)&res[i]; \
|
||||||
|
if (abs (*temp_ref - *temp_res) > 0) \
|
||||||
|
{ \
|
||||||
|
printf (" error: %s at line %ld , expected " #ref \
|
||||||
|
- "[%ld]:0x%lx, got: 0x%lx\n", \
|
||||||
|
+ "[%ld]:0x%016lx, got: 0x%016lx\n", \
|
||||||
|
__FILE__, line, i, *temp_ref, *temp_res); \
|
||||||
|
fail = 1; \
|
||||||
|
} \
|
||||||
|
@@ -28,11 +29,11 @@
|
||||||
|
int fail = 0; \
|
||||||
|
for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||||
|
{ \
|
||||||
|
- int *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||||
|
+ int *temp_ref = (int *)&ref[i], *temp_res = (int *)&res[i]; \
|
||||||
|
if (abs (*temp_ref - *temp_res) > 0) \
|
||||||
|
{ \
|
||||||
|
printf (" error: %s at line %ld , expected " #ref \
|
||||||
|
- "[%ld]:0x%x, got: 0x%x\n", \
|
||||||
|
+ "[%ld]:0x%08x, got: 0x%08x\n", \
|
||||||
|
__FILE__, line, i, *temp_ref, *temp_res); \
|
||||||
|
fail = 1; \
|
||||||
|
} \
|
||||||
|
@@ -47,8 +48,8 @@
|
||||||
|
{ \
|
||||||
|
if (ref != res) \
|
||||||
|
{ \
|
||||||
|
- printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
|
||||||
|
- line, ref, res); \
|
||||||
|
+ printf (" error: %s at line %ld , expected 0x:%016x", \
|
||||||
|
+ "got 0x:%016x\n", __FILE__, line, ref, res); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
while (0)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
250
0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
Normal file
250
0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
Normal file
@ -0,0 +1,250 @@
|
|||||||
|
From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Jiajie Chen <c@jia.je>
|
||||||
|
Date: Wed, 13 Dec 2023 23:26:01 +0800
|
||||||
|
Subject: [PATCH 080/188] LoongArch: extend.texi: Fix typos in LSX intrinsics
|
||||||
|
|
||||||
|
Several typos have been found and fixed: missing semicolons, using
|
||||||
|
variable name instead of type, duplicate functions and wrong types.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name.
|
||||||
|
(__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d,
|
||||||
|
__lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d,
|
||||||
|
__lsx_vfrintrz_s): fix return types.
|
||||||
|
(__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d,
|
||||||
|
__lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d,
|
||||||
|
__lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d,
|
||||||
|
__lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q,
|
||||||
|
__lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q,
|
||||||
|
__lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q,
|
||||||
|
__lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q,
|
||||||
|
__lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon.
|
||||||
|
(__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return
|
||||||
|
type.
|
||||||
|
(__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h,
|
||||||
|
__lsx_vstelm_w): use imm type for the last argument.
|
||||||
|
(__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w,
|
||||||
|
__lsx_vsigncov_d): remove duplicate definitions.
|
||||||
|
---
|
||||||
|
gcc/doc/extend.texi | 90 ++++++++++++++++++++++-----------------------
|
||||||
|
1 file changed, 43 insertions(+), 47 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||||
|
index bb042ae78..ac8da4e80 100644
|
||||||
|
--- a/gcc/doc/extend.texi
|
||||||
|
+++ b/gcc/doc/extend.texi
|
||||||
|
@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i);
|
||||||
|
int __lsx_bz_w (__m128i);
|
||||||
|
__m128i __lsx_vabsd_b (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vabsd_bu (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vabsd_di (__m128i, __m128i);
|
||||||
|
+__m128i __lsx_vabsd_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vabsd_du (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vabsd_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vabsd_hu (__m128i, __m128i);
|
||||||
|
@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128);
|
||||||
|
__m128d __lsx_vfrecip_d (__m128d);
|
||||||
|
__m128 __lsx_vfrecip_s (__m128);
|
||||||
|
__m128d __lsx_vfrint_d (__m128d);
|
||||||
|
-__m128i __lsx_vfrintrm_d (__m128d);
|
||||||
|
-__m128i __lsx_vfrintrm_s (__m128);
|
||||||
|
-__m128i __lsx_vfrintrne_d (__m128d);
|
||||||
|
-__m128i __lsx_vfrintrne_s (__m128);
|
||||||
|
-__m128i __lsx_vfrintrp_d (__m128d);
|
||||||
|
-__m128i __lsx_vfrintrp_s (__m128);
|
||||||
|
-__m128i __lsx_vfrintrz_d (__m128d);
|
||||||
|
-__m128i __lsx_vfrintrz_s (__m128);
|
||||||
|
+__m128d __lsx_vfrintrm_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrintrm_s (__m128);
|
||||||
|
+__m128d __lsx_vfrintrne_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrintrne_s (__m128);
|
||||||
|
+__m128d __lsx_vfrintrp_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrintrp_s (__m128);
|
||||||
|
+__m128d __lsx_vfrintrz_d (__m128d);
|
||||||
|
+__m128 __lsx_vfrintrz_s (__m128);
|
||||||
|
__m128 __lsx_vfrint_s (__m128);
|
||||||
|
__m128d __lsx_vfrsqrt_d (__m128d);
|
||||||
|
__m128 __lsx_vfrsqrt_s (__m128);
|
||||||
|
@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15);
|
||||||
|
__m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1);
|
||||||
|
__m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7);
|
||||||
|
__m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3);
|
||||||
|
-__m128i __lsx_vld (void *, imm_n2048_2047)
|
||||||
|
-__m128i __lsx_vldi (imm_n1024_1023)
|
||||||
|
-__m128i __lsx_vldrepl_b (void *, imm_n2048_2047)
|
||||||
|
-__m128i __lsx_vldrepl_d (void *, imm_n256_255)
|
||||||
|
-__m128i __lsx_vldrepl_h (void *, imm_n1024_1023)
|
||||||
|
-__m128i __lsx_vldrepl_w (void *, imm_n512_511)
|
||||||
|
+__m128i __lsx_vld (void *, imm_n2048_2047);
|
||||||
|
+__m128i __lsx_vldi (imm_n1024_1023);
|
||||||
|
+__m128i __lsx_vldrepl_b (void *, imm_n2048_2047);
|
||||||
|
+__m128i __lsx_vldrepl_d (void *, imm_n256_255);
|
||||||
|
+__m128i __lsx_vldrepl_h (void *, imm_n1024_1023);
|
||||||
|
+__m128i __lsx_vldrepl_w (void *, imm_n512_511);
|
||||||
|
__m128i __lsx_vldx (void *, long int);
|
||||||
|
__m128i __lsx_vmadd_b (__m128i, __m128i, __m128i);
|
||||||
|
__m128i __lsx_vmadd_d (__m128i, __m128i, __m128i);
|
||||||
|
@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmax_du (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmax_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmax_hu (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vmaxi_b (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmaxi_b (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmaxi_bu (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmaxi_d (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmaxi_d (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmaxi_du (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmaxi_h (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmaxi_h (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmaxi_hu (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmaxi_w (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmaxi_w (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmaxi_wu (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vmax_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmax_wu (__m128i, __m128i);
|
||||||
|
@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmin_du (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmin_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmin_hu (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vmini_b (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmini_b (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmini_bu (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmini_d (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmini_d (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmini_du (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmini_h (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmini_h (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmini_hu (__m128i, imm0_31);
|
||||||
|
-__m128i __lsx_vmini_w (__m128i, imm_n16_15)
|
||||||
|
+__m128i __lsx_vmini_w (__m128i, imm_n16_15);
|
||||||
|
__m128i __lsx_vmini_wu (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vmin_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vmin_wu (__m128i, __m128i);
|
||||||
|
@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vpickod_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vpickod_w (__m128i, __m128i);
|
||||||
|
int __lsx_vpickve2gr_b (__m128i, imm0_15);
|
||||||
|
-unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15);
|
||||||
|
+unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15);
|
||||||
|
long int __lsx_vpickve2gr_d (__m128i, imm0_1);
|
||||||
|
unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1);
|
||||||
|
int __lsx_vpickve2gr_h (__m128i, imm0_7);
|
||||||
|
-unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7);
|
||||||
|
+unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7);
|
||||||
|
int __lsx_vpickve2gr_w (__m128i, imm0_3);
|
||||||
|
unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3);
|
||||||
|
__m128i __lsx_vreplgr2vr_b (int);
|
||||||
|
@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsigncov_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsigncov_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsigncov_w (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vsigncov_b (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vsigncov_d (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vsigncov_h (__m128i, __m128i);
|
||||||
|
-__m128i __lsx_vsigncov_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsle_b (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsle_bu (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsle_d (__m128i, __m128i);
|
||||||
|
@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsran_b_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsran_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
__m128i __lsx_vsran_w_d (__m128i, __m128i);
|
||||||
|
@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrarn_b_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrarn_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
__m128i __lsx_vsrarn_w_d (__m128i, __m128i);
|
||||||
|
@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrln_b_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrln_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
__m128i __lsx_vsrln_w_d (__m128i, __m128i);
|
||||||
|
@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrlrn_b_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrlrn_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
__m128i __lsx_vsrlrn_w_d (__m128i, __m128i);
|
||||||
|
@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssran_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
__m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
-__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
+__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrarn_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
__m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
-__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
+__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
@@ -17037,8 +17033,8 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrln_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
__m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
-__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
+__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
@@ -17051,8 +17047,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrlrn_h_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15);
|
||||||
|
__m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15);
|
||||||
|
-__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127)
|
||||||
|
-__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127)
|
||||||
|
+__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127);
|
||||||
|
+__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127);
|
||||||
|
__m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31);
|
||||||
|
__m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63);
|
||||||
|
@@ -17067,12 +17063,12 @@ __m128i __lsx_vssub_h (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssub_hu (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssub_w (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vssub_wu (__m128i, __m128i);
|
||||||
|
-void __lsx_vst (__m128i, void *, imm_n2048_2047)
|
||||||
|
-void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx);
|
||||||
|
-void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx);
|
||||||
|
-void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx);
|
||||||
|
-void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx);
|
||||||
|
-void __lsx_vstx (__m128i, void *, long int)
|
||||||
|
+void __lsx_vst (__m128i, void *, imm_n2048_2047);
|
||||||
|
+void __lsx_vstelm_b (__m128i, void *, imm_n128_127, imm0_15);
|
||||||
|
+void __lsx_vstelm_d (__m128i, void *, imm_n128_127, imm0_1);
|
||||||
|
+void __lsx_vstelm_h (__m128i, void *, imm_n128_127, imm0_7);
|
||||||
|
+void __lsx_vstelm_w (__m128i, void *, imm_n128_127, imm0_3);
|
||||||
|
+void __lsx_vstx (__m128i, void *, long int);
|
||||||
|
__m128i __lsx_vsub_b (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsub_d (__m128i, __m128i);
|
||||||
|
__m128i __lsx_vsub_h (__m128i, __m128i);
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,60 @@
|
|||||||
|
From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Tue, 19 Dec 2023 16:43:17 +0800
|
||||||
|
Subject: [PATCH 081/188] LoongArch: Fix builtin function prototypes for LASX
|
||||||
|
in doc.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* doc/extend.texi:According to the documents submitted earlier,
|
||||||
|
Two problems with function return types and using the actual types
|
||||||
|
of parameters instead of variable names were found and fixed.
|
||||||
|
---
|
||||||
|
gcc/doc/extend.texi | 24 ++++++++++++------------
|
||||||
|
1 file changed, 12 insertions(+), 12 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||||
|
index ac8da4e80..c793c9c5d 100644
|
||||||
|
--- a/gcc/doc/extend.texi
|
||||||
|
+++ b/gcc/doc/extend.texi
|
||||||
|
@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256);
|
||||||
|
__m256d __lasx_xvfrecip_d (__m256d);
|
||||||
|
__m256 __lasx_xvfrecip_s (__m256);
|
||||||
|
__m256d __lasx_xvfrint_d (__m256d);
|
||||||
|
-__m256i __lasx_xvfrintrm_d (__m256d);
|
||||||
|
-__m256i __lasx_xvfrintrm_s (__m256);
|
||||||
|
-__m256i __lasx_xvfrintrne_d (__m256d);
|
||||||
|
-__m256i __lasx_xvfrintrne_s (__m256);
|
||||||
|
-__m256i __lasx_xvfrintrp_d (__m256d);
|
||||||
|
-__m256i __lasx_xvfrintrp_s (__m256);
|
||||||
|
-__m256i __lasx_xvfrintrz_d (__m256d);
|
||||||
|
-__m256i __lasx_xvfrintrz_s (__m256);
|
||||||
|
+__m256d __lasx_xvfrintrm_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrintrm_s (__m256);
|
||||||
|
+__m256d __lasx_xvfrintrne_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrintrne_s (__m256);
|
||||||
|
+__m256d __lasx_xvfrintrp_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrintrp_s (__m256);
|
||||||
|
+__m256d __lasx_xvfrintrz_d (__m256d);
|
||||||
|
+__m256 __lasx_xvfrintrz_s (__m256);
|
||||||
|
__m256 __lasx_xvfrint_s (__m256);
|
||||||
|
__m256d __lasx_xvfrsqrt_d (__m256d);
|
||||||
|
__m256 __lasx_xvfrsqrt_s (__m256);
|
||||||
|
@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i);
|
||||||
|
__m256i __lasx_xvssub_w (__m256i, __m256i);
|
||||||
|
__m256i __lasx_xvssub_wu (__m256i, __m256i);
|
||||||
|
void __lasx_xvst (__m256i, void *, imm_n2048_2047);
|
||||||
|
-void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx);
|
||||||
|
-void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx);
|
||||||
|
-void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx);
|
||||||
|
-void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx);
|
||||||
|
+void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31);
|
||||||
|
+void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3);
|
||||||
|
+void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15);
|
||||||
|
+void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7);
|
||||||
|
void __lasx_xvstx (__m256i, void *, long int);
|
||||||
|
__m256i __lasx_xvsub_b (__m256i, __m256i);
|
||||||
|
__m256i __lasx_xvsub_d (__m256i, __m256i);
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,92 @@
|
|||||||
|
From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Tue, 5 Dec 2023 14:44:35 +0800
|
||||||
|
Subject: [PATCH 082/188] LoongArch: Add asm modifiers to the LSX and LASX
|
||||||
|
directives in the doc.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* doc/extend.texi:Add modifiers to the vector of asm in the doc.
|
||||||
|
* doc/md.texi:Refine the description of the modifier 'f' in the doc.
|
||||||
|
---
|
||||||
|
gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
gcc/doc/md.texi | 2 +-
|
||||||
|
2 files changed, 47 insertions(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||||
|
index c793c9c5d..bcb9329c2 100644
|
||||||
|
--- a/gcc/doc/extend.texi
|
||||||
|
+++ b/gcc/doc/extend.texi
|
||||||
|
@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch
|
||||||
|
@item @code{d} @tab Same as @code{c}.
|
||||||
|
@item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
|
||||||
|
@item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
|
||||||
|
+@item @code{u} @tab Print a LASX register.
|
||||||
|
+@item @code{w} @tab Print a LSX register.
|
||||||
|
@item @code{X} @tab Print a constant integer operand in hexadecimal.
|
||||||
|
@item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
|
||||||
|
@end multitable
|
||||||
|
|
||||||
|
+References to input and output operands in the assembler template of extended
|
||||||
|
+asm statements can use modifiers to affect the way the operands are formatted
|
||||||
|
+in the code output to the assembler. For example, the following code uses the
|
||||||
|
+'w' modifier for LoongArch:
|
||||||
|
+
|
||||||
|
+@example
|
||||||
|
+test-asm.c:
|
||||||
|
+
|
||||||
|
+#include <lsxintrin.h>
|
||||||
|
+
|
||||||
|
+__m128i foo (void)
|
||||||
|
+@{
|
||||||
|
+__m128i a,b,c;
|
||||||
|
+__asm__ ("vadd.d %w0,%w1,%w2\n\t"
|
||||||
|
+ :"=f" (c)
|
||||||
|
+ :"f" (a),"f" (b));
|
||||||
|
+
|
||||||
|
+return c;
|
||||||
|
+@}
|
||||||
|
+
|
||||||
|
+@end example
|
||||||
|
+
|
||||||
|
+@noindent
|
||||||
|
+The compile command for the test case is as follows:
|
||||||
|
+
|
||||||
|
+@example
|
||||||
|
+gcc test-asm.c -mlsx -S -o test-asm.s
|
||||||
|
+@end example
|
||||||
|
+
|
||||||
|
+@noindent
|
||||||
|
+The assembly statement produces the following assembly code:
|
||||||
|
+
|
||||||
|
+@example
|
||||||
|
+vadd.d $vr0,$vr0,$vr1
|
||||||
|
+@end example
|
||||||
|
+
|
||||||
|
+This is a 128-bit vector addition instruction, @code{c} (referred to in the
|
||||||
|
+template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are
|
||||||
|
+the inputs. @code{__m128i} is a vector data type defined in the file
|
||||||
|
+@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}). The symbol '=f'
|
||||||
|
+represents a constraint using a floating-point register as an output type, and
|
||||||
|
+the 'f' in the input operand represents a constraint using a floating-point
|
||||||
|
+register operand, which can refer to the definition of a constraint
|
||||||
|
+(@xref{Constraints}) in gcc.
|
||||||
|
|
||||||
|
@lowersections
|
||||||
|
@include md.texi
|
||||||
|
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
|
||||||
|
index b58da0787..a2e839073 100644
|
||||||
|
--- a/gcc/doc/md.texi
|
||||||
|
+++ b/gcc/doc/md.texi
|
||||||
|
@@ -2750,7 +2750,7 @@ $r1h
|
||||||
|
@item LoongArch---@file{config/loongarch/constraints.md}
|
||||||
|
@table @code
|
||||||
|
@item f
|
||||||
|
-A floating-point register (if available).
|
||||||
|
+A floating-point or vector register (if available).
|
||||||
|
@item k
|
||||||
|
A memory operand whose address is formed by a base register and
|
||||||
|
(optionally scaled) index register.
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
392
0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
Normal file
392
0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
Normal file
@ -0,0 +1,392 @@
|
|||||||
|
From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 15 Dec 2023 01:49:40 +0800
|
||||||
|
Subject: [PATCH 083/188] LoongArch: Implement FCCmode reload and
|
||||||
|
cstore<ANYF:mode>4
|
||||||
|
|
||||||
|
We used a branch to load floating-point comparison results into GPR.
|
||||||
|
This is very slow when the branch is not predictable.
|
||||||
|
|
||||||
|
Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM.
|
||||||
|
Then implement cstore<ANYF:mode>4.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch-tune.h
|
||||||
|
(loongarch_rtx_cost_data::movcf2gr): New field.
|
||||||
|
(loongarch_rtx_cost_data::movcf2gr_): New method.
|
||||||
|
(loongarch_rtx_cost_data::use_movcf2gr): New method.
|
||||||
|
* config/loongarch/loongarch-def.cc
|
||||||
|
(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr
|
||||||
|
to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based
|
||||||
|
on timing on LA464.
|
||||||
|
(loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to
|
||||||
|
COSTS_N_INSNS (1) for LA664.
|
||||||
|
(loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to
|
||||||
|
COSTS_N_INSNS (1) + 1.
|
||||||
|
* config/loongarch/predicates.md (loongarch_fcmp_operator): New
|
||||||
|
predicate.
|
||||||
|
* config/loongarch/loongarch.md (movfcc): Change to
|
||||||
|
define_expand.
|
||||||
|
(movfcc_internal): New define_insn.
|
||||||
|
(fcc_to_<X:mode>): New define_insn.
|
||||||
|
(cstore<ANYF:mode>4): New define_expand.
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs
|
||||||
|
and GPRs.
|
||||||
|
(loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR.
|
||||||
|
(loongarch_emit_float_compare): Call gen_reg_rtx instead of
|
||||||
|
loongarch_allocate_fcc.
|
||||||
|
(loongarch_allocate_fcc): Remove.
|
||||||
|
(loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS.
|
||||||
|
(loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS.
|
||||||
|
(loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS,
|
||||||
|
FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/movcf2gr.c: New test.
|
||||||
|
* gcc.target/loongarch/movcf2gr-via-fr.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-def.cc | 13 +++-
|
||||||
|
gcc/config/loongarch/loongarch-tune.h | 15 +++-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 70 ++++++++++++-------
|
||||||
|
gcc/config/loongarch/loongarch.md | 69 ++++++++++++++++--
|
||||||
|
gcc/config/loongarch/predicates.md | 4 ++
|
||||||
|
.../gcc.target/loongarch/movcf2gr-via-fr.c | 10 +++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/movcf2gr.c | 9 +++
|
||||||
|
7 files changed, 157 insertions(+), 33 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
index 4a8885e83..843be78e4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
||||||
|
@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
||||||
|
int_mult_di (COSTS_N_INSNS (4)),
|
||||||
|
int_div_si (COSTS_N_INSNS (5)),
|
||||||
|
int_div_di (COSTS_N_INSNS (5)),
|
||||||
|
+ movcf2gr (COSTS_N_INSNS (7)),
|
||||||
|
+ movgr2cf (COSTS_N_INSNS (15)),
|
||||||
|
branch_cost (6),
|
||||||
|
memory_latency (4) {}
|
||||||
|
|
||||||
|
/* The following properties cannot be looked up directly using "cpucfg".
|
||||||
|
So it is necessary to provide a default value for "unknown native"
|
||||||
|
tune targets (i.e. -mtune=native while PRID does not correspond to
|
||||||
|
- any known "-mtune" type). Currently all numbers are default. */
|
||||||
|
+ any known "-mtune" type). */
|
||||||
|
array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
|
||||||
|
- array_tune<loongarch_rtx_cost_data> ();
|
||||||
|
+ array_tune<loongarch_rtx_cost_data> ()
|
||||||
|
+ .set (CPU_LA664,
|
||||||
|
+ loongarch_rtx_cost_data ()
|
||||||
|
+ .movcf2gr_ (COSTS_N_INSNS (1))
|
||||||
|
+ .movgr2cf_ (COSTS_N_INSNS (1)));
|
||||||
|
|
||||||
|
/* RTX costs to use when optimizing for size.
|
||||||
|
We use a value slightly larger than COSTS_N_INSNS (1) for all of them
|
||||||
|
@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
|
||||||
|
.int_mult_si_ (COST_COMPLEX_INSN)
|
||||||
|
.int_mult_di_ (COST_COMPLEX_INSN)
|
||||||
|
.int_div_si_ (COST_COMPLEX_INSN)
|
||||||
|
- .int_div_di_ (COST_COMPLEX_INSN);
|
||||||
|
+ .int_div_di_ (COST_COMPLEX_INSN)
|
||||||
|
+ .movcf2gr_ (COST_COMPLEX_INSN);
|
||||||
|
|
||||||
|
array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
|
||||||
|
.set (CPU_NATIVE, 4)
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
index 616b94e87..26f163f0a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-tune.h
|
||||||
|
@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data
|
||||||
|
unsigned short int_mult_di;
|
||||||
|
unsigned short int_div_si;
|
||||||
|
unsigned short int_div_di;
|
||||||
|
+ unsigned short movcf2gr;
|
||||||
|
+ unsigned short movgr2cf;
|
||||||
|
unsigned short branch_cost;
|
||||||
|
unsigned short memory_latency;
|
||||||
|
|
||||||
|
@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
+ loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr)
|
||||||
|
+ {
|
||||||
|
+ movcf2gr = _movcf2gr;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf)
|
||||||
|
+ {
|
||||||
|
+ movgr2cf = _movgr2cf;
|
||||||
|
+ return *this;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost)
|
||||||
|
{
|
||||||
|
branch_cost = _branch_cost;
|
||||||
|
@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data
|
||||||
|
memory_latency = _memory_latency;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
-
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Costs to use when optimizing for size. */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 3aeafeafd..56f631b1a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
|
||||||
|
OPTAB_DIRECT);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Allocate a floating-point condition-code register of mode MODE. */
|
||||||
|
-
|
||||||
|
-static rtx
|
||||||
|
-loongarch_allocate_fcc (machine_mode mode)
|
||||||
|
-{
|
||||||
|
- unsigned int regno, count;
|
||||||
|
-
|
||||||
|
- gcc_assert (TARGET_HARD_FLOAT);
|
||||||
|
-
|
||||||
|
- if (mode == FCCmode)
|
||||||
|
- count = 1;
|
||||||
|
- else
|
||||||
|
- gcc_unreachable ();
|
||||||
|
-
|
||||||
|
- cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
|
||||||
|
- if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
|
||||||
|
- cfun->machine->next_fcc = 0;
|
||||||
|
-
|
||||||
|
- regno = FCC_REG_FIRST + cfun->machine->next_fcc;
|
||||||
|
- cfun->machine->next_fcc += count;
|
||||||
|
- return gen_rtx_REG (mode, regno);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/* Sign- or zero-extend OP0 and OP1 for integer comparisons. */
|
||||||
|
|
||||||
|
static void
|
||||||
|
@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
|
||||||
|
operands for FCMP.cond.fmt, instead a reversed condition code is
|
||||||
|
required and a test for false. */
|
||||||
|
*code = NE;
|
||||||
|
- *op0 = loongarch_allocate_fcc (FCCmode);
|
||||||
|
+ *op0 = gen_reg_rtx (FCCmode);
|
||||||
|
|
||||||
|
*op1 = const0_rtx;
|
||||||
|
loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
|
||||||
|
@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
|
||||||
|
enum mode_class mclass;
|
||||||
|
|
||||||
|
if (mode == FCCmode)
|
||||||
|
- return FCC_REG_P (regno);
|
||||||
|
+ return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
|
||||||
|
|
||||||
|
size = GET_MODE_SIZE (mode);
|
||||||
|
mclass = GET_MODE_CLASS (mode);
|
||||||
|
@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from)
|
||||||
|
/* MOVFR2GR, etc. */
|
||||||
|
return 4;
|
||||||
|
|
||||||
|
+ case FCC_REGS:
|
||||||
|
+ return loongarch_cost->movcf2gr;
|
||||||
|
+
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -6863,6 +6843,9 @@ loongarch_move_from_gpr_cost (reg_class_t to)
|
||||||
|
/* MOVGR2FR, etc. */
|
||||||
|
return 4;
|
||||||
|
|
||||||
|
+ case FCC_REGS:
|
||||||
|
+ return loongarch_cost->movgr2cf;
|
||||||
|
+
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
@@ -6897,6 +6880,10 @@ loongarch_register_move_cost (machine_mode mode, reg_class_t from,
|
||||||
|
if (to == dregs)
|
||||||
|
return loongarch_move_to_gpr_cost (from);
|
||||||
|
|
||||||
|
+ /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */
|
||||||
|
+ if (from == FCC_REGS || to == FCC_REGS)
|
||||||
|
+ return COSTS_N_INSNS (from == to ? 2 : 1);
|
||||||
|
+
|
||||||
|
/* Handles cases that require a GPR temporary. */
|
||||||
|
cost1 = loongarch_move_to_gpr_cost (from);
|
||||||
|
if (cost1 != 0)
|
||||||
|
@@ -6933,6 +6920,39 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||||||
|
|
||||||
|
regno = true_regnum (x);
|
||||||
|
|
||||||
|
+ if (mode == FCCmode)
|
||||||
|
+ {
|
||||||
|
+ if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno))
|
||||||
|
+ {
|
||||||
|
+ if (FCC_REG_P (regno))
|
||||||
|
+ return FP_REGS;
|
||||||
|
+
|
||||||
|
+ auto fn = in_p ? loongarch_move_from_gpr_cost
|
||||||
|
+ : loongarch_move_to_gpr_cost;
|
||||||
|
+
|
||||||
|
+ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
|
||||||
|
+ return FP_REGS;
|
||||||
|
+
|
||||||
|
+ return GP_REG_P (regno) ? NO_REGS : GR_REGS;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno))
|
||||||
|
+ {
|
||||||
|
+ auto fn = in_p ? loongarch_move_to_gpr_cost
|
||||||
|
+ : loongarch_move_from_gpr_cost;
|
||||||
|
+
|
||||||
|
+ if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
|
||||||
|
+ return FP_REGS;
|
||||||
|
+
|
||||||
|
+ return NO_REGS;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x))
|
||||||
|
+ return GR_REGS;
|
||||||
|
+
|
||||||
|
+ return NO_REGS;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
if (reg_class_subset_p (rclass, FP_REGS))
|
||||||
|
{
|
||||||
|
if (regno < 0
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 23368008e..6cf71d9e4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2283,11 +2283,72 @@
|
||||||
|
|
||||||
|
;; Clear one FCC register
|
||||||
|
|
||||||
|
-(define_insn "movfcc"
|
||||||
|
- [(set (match_operand:FCC 0 "register_operand" "=z")
|
||||||
|
- (const_int 0))]
|
||||||
|
+(define_expand "movfcc"
|
||||||
|
+ [(set (match_operand:FCC 0 "")
|
||||||
|
+ (match_operand:FCC 1 ""))]
|
||||||
|
+ "TARGET_HARD_FLOAT"
|
||||||
|
+{
|
||||||
|
+ if (memory_operand (operands[0], FCCmode)
|
||||||
|
+ && memory_operand (operands[1], FCCmode))
|
||||||
|
+ operands[1] = force_reg (FCCmode, operands[1]);
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
+(define_insn "movfcc_internal"
|
||||||
|
+ [(set (match_operand:FCC 0 "nonimmediate_operand"
|
||||||
|
+ "=z,z,*f,*f,*r,*r,*m,*f,*r,z,*r")
|
||||||
|
+ (match_operand:FCC 1 "reg_or_0_operand"
|
||||||
|
+ "J,*f,z,*f,J*r,*m,J*r,J*r,*f,*r,z"))]
|
||||||
|
+ "TARGET_HARD_FLOAT"
|
||||||
|
+ "@
|
||||||
|
+ fcmp.caf.s\t%0,$f0,$f0
|
||||||
|
+ movfr2cf\t%0,%1
|
||||||
|
+ movcf2fr\t%0,%1
|
||||||
|
+ fmov.s\t%0,%1
|
||||||
|
+ or\t%0,%z1,$r0
|
||||||
|
+ ld.b\t%0,%1
|
||||||
|
+ st.b\t%z1,%0
|
||||||
|
+ movgr2fr.w\t%0,%1
|
||||||
|
+ movfr2gr.s\t%0,%1
|
||||||
|
+ movgr2cf\t%0,%1
|
||||||
|
+ movcf2gr\t%0,%1"
|
||||||
|
+ [(set_attr "type" "move")
|
||||||
|
+ (set_attr "mode" "FCC")])
|
||||||
|
+
|
||||||
|
+(define_insn "fcc_to_<X:mode>"
|
||||||
|
+ [(set (match_operand:X 0 "register_operand" "=r")
|
||||||
|
+ (if_then_else:X (ne (match_operand:FCC 1 "register_operand" "0")
|
||||||
|
+ (const_int 0))
|
||||||
|
+ (const_int 1)
|
||||||
|
+ (const_int 0)))]
|
||||||
|
+ "TARGET_HARD_FLOAT"
|
||||||
|
""
|
||||||
|
- "fcmp.caf.s\t%0,$f0,$f0")
|
||||||
|
+ [(set_attr "length" "0")
|
||||||
|
+ (set_attr "type" "ghost")])
|
||||||
|
+
|
||||||
|
+(define_expand "cstore<ANYF:mode>4"
|
||||||
|
+ [(set (match_operand:SI 0 "register_operand")
|
||||||
|
+ (match_operator:SI 1 "loongarch_fcmp_operator"
|
||||||
|
+ [(match_operand:ANYF 2 "register_operand")
|
||||||
|
+ (match_operand:ANYF 3 "register_operand")]))]
|
||||||
|
+ ""
|
||||||
|
+ {
|
||||||
|
+ rtx fcc = gen_reg_rtx (FCCmode);
|
||||||
|
+ rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), FCCmode,
|
||||||
|
+ operands[2], operands[3]);
|
||||||
|
+
|
||||||
|
+ emit_insn (gen_rtx_SET (fcc, cmp));
|
||||||
|
+ if (TARGET_64BIT)
|
||||||
|
+ {
|
||||||
|
+ rtx gpr = gen_reg_rtx (DImode);
|
||||||
|
+ emit_insn (gen_fcc_to_di (gpr, fcc));
|
||||||
|
+ emit_insn (gen_rtx_SET (operands[0],
|
||||||
|
+ lowpart_subreg (SImode, gpr, DImode)));
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_fcc_to_si (operands[0], fcc));
|
||||||
|
+
|
||||||
|
+ DONE;
|
||||||
|
+ })
|
||||||
|
|
||||||
|
;; Conditional move instructions.
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 88e54c915..58f9a7826 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -590,6 +590,10 @@
|
||||||
|
(define_predicate "loongarch_cstore_operator"
|
||||||
|
(match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu"))
|
||||||
|
|
||||||
|
+(define_predicate "loongarch_fcmp_operator"
|
||||||
|
+ (match_code
|
||||||
|
+ "unordered,uneq,unlt,unle,eq,lt,le,ordered,ltgt,ne,ge,gt,unge,ungt"))
|
||||||
|
+
|
||||||
|
(define_predicate "small_data_pattern"
|
||||||
|
(and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
|
||||||
|
(match_test "loongarch_small_data_pattern_p (op)")))
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..23334a3a3
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
|
||||||
|
@@ -0,0 +1,10 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mtune=la464 -mabi=lp64d" } */
|
||||||
|
+/* { dg-final { scan-assembler "movcf2fr\t\\\$f\[0-9\]+,\\\$fcc" } } */
|
||||||
|
+/* { dg-final { scan-assembler "movfr2gr\\.s\t\\\$r4" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+t (float a, float b)
|
||||||
|
+{
|
||||||
|
+ return a > b;
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d27c393b5
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c
|
||||||
|
@@ -0,0 +1,9 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mtune=la664 -mabi=lp64d" } */
|
||||||
|
+/* { dg-final { scan-assembler "movcf2gr\t\\\$r4,\\\$fcc" } } */
|
||||||
|
+
|
||||||
|
+int
|
||||||
|
+t (float a, float b)
|
||||||
|
+{
|
||||||
|
+ return a > b;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,69 @@
|
|||||||
|
From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 17 Dec 2023 04:26:23 +0800
|
||||||
|
Subject: [PATCH 084/188] LoongArch: Add sign_extend pattern for 32-bit rotate
|
||||||
|
shift
|
||||||
|
|
||||||
|
Remove a redundant sign extension.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (rotrsi3_extend): New
|
||||||
|
define_insn.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/rotrw.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 10 ++++++++++
|
||||||
|
gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++
|
||||||
|
2 files changed, 27 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 6cf71d9e4..44e8d336a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2893,6 +2893,16 @@
|
||||||
|
[(set_attr "type" "shift,shift")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+(define_insn "rotrsi3_extend"
|
||||||
|
+ [(set (match_operand:DI 0 "register_operand" "=r,r")
|
||||||
|
+ (sign_extend:DI
|
||||||
|
+ (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
|
||||||
|
+ (match_operand:SI 2 "arith_operand" "r,I"))))]
|
||||||
|
+ "TARGET_64BIT"
|
||||||
|
+ "rotr%i2.w\t%0,%1,%2"
|
||||||
|
+ [(set_attr "type" "shift,shift")
|
||||||
|
+ (set_attr "mode" "SI")])
|
||||||
|
+
|
||||||
|
;; The following templates were added to generate "bstrpick.d + alsl.d"
|
||||||
|
;; instruction pairs.
|
||||||
|
;; It is required that the values of const_immalsl_operand and
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..6ed45e8b8
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c
|
||||||
|
@@ -0,0 +1,17 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */
|
||||||
|
+/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "slli\\.w" } } */
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+rotr (unsigned a, unsigned b)
|
||||||
|
+{
|
||||||
|
+ return a >> b | a << 32 - b;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+rotri (unsigned a)
|
||||||
|
+{
|
||||||
|
+ return a >> 5 | a << 27;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,37 @@
|
|||||||
|
From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Li Wei <liwei@loongson.cn>
|
||||||
|
Date: Mon, 25 Dec 2023 11:20:23 +0800
|
||||||
|
Subject: [PATCH 085/188] LoongArch: Fixed bug in *bstrins_<mode>_for_ior_mask
|
||||||
|
template.
|
||||||
|
|
||||||
|
We found that using the latest compiled gcc will cause a miscompare error
|
||||||
|
when running spec2006 400.perlbench test with -flto turned on. After testing,
|
||||||
|
it was found that only the LoongArch architecture will report errors.
|
||||||
|
The first error commit was located through the git bisect command as
|
||||||
|
r14-3773-g5b857e87201335. Through debugging, it was found that the problem
|
||||||
|
was that the split condition of the *bstrins_<mode>_for_ior_mask template was
|
||||||
|
empty, which should actually be consistent with the insn condition.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md: Adjust.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 44e8d336a..3d5b75825 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -1489,7 +1489,7 @@
|
||||||
|
"loongarch_pre_reload_split () && \
|
||||||
|
loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
|
||||||
|
"#"
|
||||||
|
- ""
|
||||||
|
+ "&& true"
|
||||||
|
[(set (match_dup 0) (match_dup 1))
|
||||||
|
(set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
|
||||||
|
(match_dup 3))]
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
132
0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
Normal file
132
0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Fri, 22 Dec 2023 16:22:03 +0800
|
||||||
|
Subject: [PATCH 086/188] LoongArch: Fix insn output of vec_concat templates
|
||||||
|
for LASX.
|
||||||
|
|
||||||
|
When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following
|
||||||
|
instruction block are being generated by vec_concatv32qi (which is
|
||||||
|
generated by vec_initv32qiv16qi) at entrance of foo() function:
|
||||||
|
|
||||||
|
vldx $vr3,$r5,$r6
|
||||||
|
vld $vr2,$r5,0
|
||||||
|
xvpermi.q $xr2,$xr3,0x20
|
||||||
|
|
||||||
|
causes the reversion of vec_initv32qiv16qi operation's high and
|
||||||
|
low 128-bit part.
|
||||||
|
|
||||||
|
According to other target's similar impl and LSX impl for following
|
||||||
|
RTL representation, current definition in lasx.md of "vec_concat<mode>"
|
||||||
|
are wrong:
|
||||||
|
|
||||||
|
(set (op0) (vec_concat (op1) (op2)))
|
||||||
|
|
||||||
|
For correct behavior, the last argument of xvpermi.q should be 0x02
|
||||||
|
instead of 0x20. This patch fixes this issue and cleanup the vec_concat
|
||||||
|
template impl.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (vec_concatv4di): Delete.
|
||||||
|
(vec_concatv8si): Delete.
|
||||||
|
(vec_concatv16hi): Delete.
|
||||||
|
(vec_concatv32qi): Delete.
|
||||||
|
(vec_concatv4df): Delete.
|
||||||
|
(vec_concatv8sf): Delete.
|
||||||
|
(vec_concat<mode>): New template with insn output fixed.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 74 ++++--------------------------------
|
||||||
|
1 file changed, 7 insertions(+), 67 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 9ca3f9278..46150f2fb 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -577,77 +577,17 @@
|
||||||
|
[(set_attr "type" "simd_insert")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
-(define_insn "vec_concatv4di"
|
||||||
|
- [(set (match_operand:V4DI 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V4DI
|
||||||
|
- (match_operand:V2DI 1 "register_operand" "0")
|
||||||
|
- (match_operand:V2DI 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
-}
|
||||||
|
- [(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
-
|
||||||
|
-(define_insn "vec_concatv8si"
|
||||||
|
- [(set (match_operand:V8SI 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V8SI
|
||||||
|
- (match_operand:V4SI 1 "register_operand" "0")
|
||||||
|
- (match_operand:V4SI 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
-}
|
||||||
|
- [(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
-
|
||||||
|
-(define_insn "vec_concatv16hi"
|
||||||
|
- [(set (match_operand:V16HI 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V16HI
|
||||||
|
- (match_operand:V8HI 1 "register_operand" "0")
|
||||||
|
- (match_operand:V8HI 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
-}
|
||||||
|
- [(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
-
|
||||||
|
-(define_insn "vec_concatv32qi"
|
||||||
|
- [(set (match_operand:V32QI 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V32QI
|
||||||
|
- (match_operand:V16QI 1 "register_operand" "0")
|
||||||
|
- (match_operand:V16QI 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
-}
|
||||||
|
- [(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
-
|
||||||
|
-(define_insn "vec_concatv4df"
|
||||||
|
- [(set (match_operand:V4DF 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V4DF
|
||||||
|
- (match_operand:V2DF 1 "register_operand" "0")
|
||||||
|
- (match_operand:V2DF 2 "register_operand" "f")))]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
-}
|
||||||
|
- [(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DF")])
|
||||||
|
-
|
||||||
|
-(define_insn "vec_concatv8sf"
|
||||||
|
- [(set (match_operand:V8SF 0 "register_operand" "=f")
|
||||||
|
- (vec_concat:V8SF
|
||||||
|
- (match_operand:V4SF 1 "register_operand" "0")
|
||||||
|
- (match_operand:V4SF 2 "register_operand" "f")))]
|
||||||
|
+(define_insn "vec_concat<mode>"
|
||||||
|
+ [(set (match_operand:LASX 0 "register_operand" "=f")
|
||||||
|
+ (vec_concat:LASX
|
||||||
|
+ (match_operand:<VHMODE256_ALL> 1 "register_operand" "0")
|
||||||
|
+ (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))]
|
||||||
|
"ISA_HAS_LASX"
|
||||||
|
{
|
||||||
|
- return "xvpermi.q\t%u0,%u2,0x20";
|
||||||
|
+ return "xvpermi.q\t%u0,%u2,0x02";
|
||||||
|
}
|
||||||
|
[(set_attr "type" "simd_splat")
|
||||||
|
- (set_attr "mode" "V4DI")])
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
;; xshuf.w
|
||||||
|
(define_insn "lasx_xvperm_<lasxfmt_f_wd>"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
232
0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
Normal file
232
0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
Normal file
@ -0,0 +1,232 @@
|
|||||||
|
From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Chenghui Pan <panchenghui@loongson.cn>
|
||||||
|
Date: Fri, 22 Dec 2023 16:18:44 +0800
|
||||||
|
Subject: [PATCH 087/188] LoongArch: Fix ICE when passing two same vector
|
||||||
|
argument consecutively
|
||||||
|
|
||||||
|
Following code will cause ICE on LoongArch target:
|
||||||
|
|
||||||
|
#include <lsxintrin.h>
|
||||||
|
|
||||||
|
extern void bar (__m128i, __m128i);
|
||||||
|
|
||||||
|
__m128i a;
|
||||||
|
|
||||||
|
void
|
||||||
|
foo ()
|
||||||
|
{
|
||||||
|
bar (a, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
It is caused by missing constraint definition in mov<mode>_lsx. This
|
||||||
|
patch fixes the template and remove the unnecessary processing from
|
||||||
|
loongarch_split_move () function.
|
||||||
|
|
||||||
|
This patch also cleanup the redundant definition from
|
||||||
|
loongarch_split_move () and loongarch_split_move_p ().
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md: Use loongarch_split_move and
|
||||||
|
loongarch_split_move_p directly.
|
||||||
|
* config/loongarch/loongarch-protos.h
|
||||||
|
(loongarch_split_move): Remove unnecessary argument.
|
||||||
|
(loongarch_split_move_insn_p): Delete.
|
||||||
|
(loongarch_split_move_insn): Delete.
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_split_move_insn_p): Delete.
|
||||||
|
(loongarch_load_store_insns): Use loongarch_split_move_p
|
||||||
|
directly.
|
||||||
|
(loongarch_split_move): remove the unnecessary processing.
|
||||||
|
(loongarch_split_move_insn): Delete.
|
||||||
|
* config/loongarch/lsx.md: Use loongarch_split_move and
|
||||||
|
loongarch_split_move_p directly.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 4 +-
|
||||||
|
gcc/config/loongarch/loongarch-protos.h | 4 +-
|
||||||
|
gcc/config/loongarch/loongarch.cc | 49 +------------------
|
||||||
|
gcc/config/loongarch/lsx.md | 10 ++--
|
||||||
|
.../loongarch/vector/lsx/lsx-mov-1.c | 14 ++++++
|
||||||
|
5 files changed, 24 insertions(+), 57 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index 46150f2fb..dbbf5a136 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -839,10 +839,10 @@
|
||||||
|
[(set (match_operand:LASX 0 "nonimmediate_operand")
|
||||||
|
(match_operand:LASX 1 "move_operand"))]
|
||||||
|
"reload_completed && ISA_HAS_LASX
|
||||||
|
- && loongarch_split_move_insn_p (operands[0], operands[1])"
|
||||||
|
+ && loongarch_split_move_p (operands[0], operands[1])"
|
||||||
|
[(const_int 0)]
|
||||||
|
{
|
||||||
|
- loongarch_split_move_insn (operands[0], operands[1], curr_insn);
|
||||||
|
+ loongarch_split_move (operands[0], operands[1]);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
index e5fcf3111..2067e50c3 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx);
|
||||||
|
|
||||||
|
extern rtx loongarch_subword (rtx, bool);
|
||||||
|
extern bool loongarch_split_move_p (rtx, rtx);
|
||||||
|
-extern void loongarch_split_move (rtx, rtx, rtx);
|
||||||
|
+extern void loongarch_split_move (rtx, rtx);
|
||||||
|
extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
|
||||||
|
extern void loongarch_split_plus_constant (rtx *, machine_mode);
|
||||||
|
-extern bool loongarch_split_move_insn_p (rtx, rtx);
|
||||||
|
-extern void loongarch_split_move_insn (rtx, rtx, rtx);
|
||||||
|
extern void loongarch_split_128bit_move (rtx, rtx);
|
||||||
|
extern bool loongarch_split_128bit_move_p (rtx, rtx);
|
||||||
|
extern void loongarch_split_256bit_move (rtx, rtx);
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 56f631b1a..5c278386a 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x)
|
||||||
|
return low + high;
|
||||||
|
}
|
||||||
|
|
||||||
|
-bool loongarch_split_move_insn_p (rtx dest, rtx src);
|
||||||
|
/* Return one word of 128-bit value OP, taking into account the fixed
|
||||||
|
endianness of certain registers. BYTE selects from the byte address. */
|
||||||
|
|
||||||
|
@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
|
||||||
|
{
|
||||||
|
set = single_set (insn);
|
||||||
|
if (set
|
||||||
|
- && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set)))
|
||||||
|
+ && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set)))
|
||||||
|
might_split_p = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src)
|
||||||
|
SPLIT_TYPE describes the split condition. */
|
||||||
|
|
||||||
|
void
|
||||||
|
-loongarch_split_move (rtx dest, rtx src, rtx insn_)
|
||||||
|
+loongarch_split_move (rtx dest, rtx src)
|
||||||
|
{
|
||||||
|
rtx low_dest;
|
||||||
|
|
||||||
|
@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
|
||||||
|
loongarch_subword (src, true));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
-
|
||||||
|
- /* This is a hack. See if the next insn uses DEST and if so, see if we
|
||||||
|
- can forward SRC for DEST. This is most useful if the next insn is a
|
||||||
|
- simple store. */
|
||||||
|
- rtx_insn *insn = (rtx_insn *) insn_;
|
||||||
|
- struct loongarch_address_info addr = {};
|
||||||
|
- if (insn)
|
||||||
|
- {
|
||||||
|
- rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
|
||||||
|
- if (next)
|
||||||
|
- {
|
||||||
|
- rtx set = single_set (next);
|
||||||
|
- if (set && SET_SRC (set) == dest)
|
||||||
|
- {
|
||||||
|
- if (MEM_P (src))
|
||||||
|
- {
|
||||||
|
- rtx tmp = XEXP (src, 0);
|
||||||
|
- loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
|
||||||
|
- true);
|
||||||
|
- if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
|
||||||
|
- validate_change (next, &SET_SRC (set), src, false);
|
||||||
|
- }
|
||||||
|
- else
|
||||||
|
- validate_change (next, &SET_SRC (set), src, false);
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
- }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Check if adding an integer constant value for a specific mode can be
|
||||||
|
@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
|
||||||
|
op[2] = gen_int_mode (v, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* Return true if a move from SRC to DEST in INSN should be split. */
|
||||||
|
-
|
||||||
|
-bool
|
||||||
|
-loongarch_split_move_insn_p (rtx dest, rtx src)
|
||||||
|
-{
|
||||||
|
- return loongarch_split_move_p (dest, src);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
-/* Split a move from SRC to DEST in INSN, given that
|
||||||
|
- loongarch_split_move_insn_p holds. */
|
||||||
|
-
|
||||||
|
-void
|
||||||
|
-loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
|
||||||
|
-{
|
||||||
|
- loongarch_split_move (dest, src, insn);
|
||||||
|
-}
|
||||||
|
-
|
||||||
|
/* Implement TARGET_CONSTANT_ALIGNMENT. */
|
||||||
|
|
||||||
|
static HOST_WIDE_INT
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 7f5fff40a..3e3248ef4 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -794,21 +794,21 @@
|
||||||
|
})
|
||||||
|
|
||||||
|
(define_insn "mov<mode>_lsx"
|
||||||
|
- [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
|
||||||
|
- (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r"))]
|
||||||
|
+ [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r")
|
||||||
|
+ (match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))]
|
||||||
|
"ISA_HAS_LSX"
|
||||||
|
{ return loongarch_output_move (operands[0], operands[1]); }
|
||||||
|
- [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
|
||||||
|
+ [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
(define_split
|
||||||
|
[(set (match_operand:LSX 0 "nonimmediate_operand")
|
||||||
|
(match_operand:LSX 1 "move_operand"))]
|
||||||
|
"reload_completed && ISA_HAS_LSX
|
||||||
|
- && loongarch_split_move_insn_p (operands[0], operands[1])"
|
||||||
|
+ && loongarch_split_move_p (operands[0], operands[1])"
|
||||||
|
[(const_int 0)]
|
||||||
|
{
|
||||||
|
- loongarch_split_move_insn (operands[0], operands[1], curr_insn);
|
||||||
|
+ loongarch_split_move (operands[0], operands[1]);
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..7f9d792eb
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
|
||||||
|
@@ -0,0 +1,14 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-mlsx -O2" } */
|
||||||
|
+
|
||||||
|
+#include <lsxintrin.h>
|
||||||
|
+
|
||||||
|
+extern void bar (__m128i, __m128i);
|
||||||
|
+
|
||||||
|
+__m128i a;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+foo ()
|
||||||
|
+{
|
||||||
|
+ bar (a, a);
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
253
0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
Normal file
253
0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sun, 17 Dec 2023 05:38:20 +0800
|
||||||
|
Subject: [PATCH 088/188] LoongArch: Expand left rotate to right rotate with
|
||||||
|
negated amount
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (rotl<mode>3):
|
||||||
|
New define_expand.
|
||||||
|
* config/loongarch/simd.md (vrotl<mode>3): Likewise.
|
||||||
|
(rotl<mode>3): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/rotl-with-rotr.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-vrotr-b.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-vrotr-h.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-vrotr-w.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-vrotr-d.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-xvrotr-b.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-xvrotr-h.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-xvrotr-w.c: New test.
|
||||||
|
* gcc.target/loongarch/rotl-with-xvrotr-d.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 12 ++++++++
|
||||||
|
gcc/config/loongarch/simd.md | 29 +++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-rotr.c | 9 ++++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-vrotr-b.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-vrotr-d.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-vrotr-h.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-vrotr-w.c | 28 ++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-xvrotr-b.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-xvrotr-d.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-xvrotr-h.c | 7 +++++
|
||||||
|
.../gcc.target/loongarch/rotl-with-xvrotr-w.c | 7 +++++
|
||||||
|
11 files changed, 127 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 3d5b75825..ed4d4b906 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -2903,6 +2903,18 @@
|
||||||
|
[(set_attr "type" "shift,shift")
|
||||||
|
(set_attr "mode" "SI")])
|
||||||
|
|
||||||
|
+;; Expand left rotate to right rotate.
|
||||||
|
+(define_expand "rotl<mode>3"
|
||||||
|
+ [(set (match_dup 3)
|
||||||
|
+ (neg:SI (match_operand:SI 2 "register_operand")))
|
||||||
|
+ (set (match_operand:GPR 0 "register_operand")
|
||||||
|
+ (rotatert:GPR (match_operand:GPR 1 "register_operand")
|
||||||
|
+ (match_dup 3)))]
|
||||||
|
+ ""
|
||||||
|
+ {
|
||||||
|
+ operands[3] = gen_reg_rtx (SImode);
|
||||||
|
+ });
|
||||||
|
+
|
||||||
|
;; The following templates were added to generate "bstrpick.d + alsl.d"
|
||||||
|
;; instruction pairs.
|
||||||
|
;; It is required that the values of const_immalsl_operand and
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index 13202f79b..93fb39abc 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -268,6 +268,35 @@
|
||||||
|
[(set_attr "type" "simd_int_arith")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+;; Expand left rotate to right rotate.
|
||||||
|
+(define_expand "vrotl<mode>3"
|
||||||
|
+ [(set (match_dup 3)
|
||||||
|
+ (neg:IVEC (match_operand:IVEC 2 "register_operand")))
|
||||||
|
+ (set (match_operand:IVEC 0 "register_operand")
|
||||||
|
+ (rotatert:IVEC (match_operand:IVEC 1 "register_operand")
|
||||||
|
+ (match_dup 3)))]
|
||||||
|
+ ""
|
||||||
|
+ {
|
||||||
|
+ operands[3] = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ });
|
||||||
|
+
|
||||||
|
+;; Expand left rotate with a scalar amount to right rotate: negate the
|
||||||
|
+;; scalar before broadcasting it because scalar negation is cheaper than
|
||||||
|
+;; vector negation.
|
||||||
|
+(define_expand "rotl<mode>3"
|
||||||
|
+ [(set (match_dup 3)
|
||||||
|
+ (neg:SI (match_operand:SI 2 "register_operand")))
|
||||||
|
+ (set (match_dup 4)
|
||||||
|
+ (vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0)))
|
||||||
|
+ (set (match_operand:IVEC 0 "register_operand")
|
||||||
|
+ (rotatert:IVEC (match_operand:IVEC 1 "register_operand")
|
||||||
|
+ (match_dup 4)))]
|
||||||
|
+ ""
|
||||||
|
+ {
|
||||||
|
+ operands[3] = gen_reg_rtx (SImode);
|
||||||
|
+ operands[4] = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ });
|
||||||
|
+
|
||||||
|
;; <x>vrotri.{b/h/w/d}
|
||||||
|
|
||||||
|
(define_insn "rotr<mode>3"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..84cc53cec
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
|
||||||
|
@@ -0,0 +1,9 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2" } */
|
||||||
|
+/* { dg-final { scan-assembler "rotr\\.w" } } */
|
||||||
|
+
|
||||||
|
+unsigned
|
||||||
|
+t (unsigned a, unsigned b)
|
||||||
|
+{
|
||||||
|
+ return a << b | a >> (32 - b);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..14298bf9e
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */
|
||||||
|
+
|
||||||
|
+#define TYPE char
|
||||||
|
+#include "rotl-with-vrotr-w.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..0e971b323
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */
|
||||||
|
+
|
||||||
|
+#define TYPE long long
|
||||||
|
+#include "rotl-with-vrotr-w.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..93216ebc2
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */
|
||||||
|
+
|
||||||
|
+#define TYPE short
|
||||||
|
+#include "rotl-with-vrotr-w.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..d05b86f47
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
|
||||||
|
@@ -0,0 +1,28 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */
|
||||||
|
+
|
||||||
|
+#ifndef VLEN
|
||||||
|
+#define VLEN 16
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+#ifndef TYPE
|
||||||
|
+#define TYPE int
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+typedef unsigned TYPE V __attribute__ ((vector_size (VLEN)));
|
||||||
|
+V a, b, c;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test (int x)
|
||||||
|
+{
|
||||||
|
+ b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test2 (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 0; i < VLEN / sizeof (TYPE); i++)
|
||||||
|
+ c[i] = a[i] << b[i] | a[i] >> ((int)sizeof (TYPE) * __CHAR_BIT__ - b[i]);
|
||||||
|
+}
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..2674b1b61
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvrotr\\.b" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvneg\\.b" 1 } } */
|
||||||
|
+
|
||||||
|
+#define VLEN 32
|
||||||
|
+#include "rotl-with-vrotr-b.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..e94403315
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvrotr\\.d" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvneg\\.d" 1 } } */
|
||||||
|
+
|
||||||
|
+#define VLEN 32
|
||||||
|
+#include "rotl-with-vrotr-d.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..3d998941f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvrotr\\.h" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvneg\\.h" 1 } } */
|
||||||
|
+
|
||||||
|
+#define VLEN 32
|
||||||
|
+#include "rotl-with-vrotr-h.c"
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..ca6aa7bae
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
|
||||||
|
@@ -0,0 +1,7 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvrotr\\.w" 2 } } */
|
||||||
|
+/* { dg-final { scan-assembler-times "xvneg\\.w" 1 } } */
|
||||||
|
+
|
||||||
|
+#define VLEN 32
|
||||||
|
+#include "rotl-with-vrotr-w.c"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
104
0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
Normal file
104
0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Wed, 27 Dec 2023 04:28:56 +0800
|
||||||
|
Subject: [PATCH 089/188] LoongArch: Fix infinite secondary reloading of
|
||||||
|
FCCmode [PR113148]
|
||||||
|
|
||||||
|
The GCC internal doc says:
|
||||||
|
|
||||||
|
X might be a pseudo-register or a 'subreg' of a pseudo-register,
|
||||||
|
which could either be in a hard register or in memory. Use
|
||||||
|
'true_regnum' to find out; it will return -1 if the pseudo is in
|
||||||
|
memory and the hard register number if it is in a register.
|
||||||
|
|
||||||
|
So "MEM_P (x)" is not enough for checking if we are reloading from/to
|
||||||
|
the memory. This bug has caused reload pass to stall and finally ICE
|
||||||
|
complaining with "maximum number of generated reload insns per insn
|
||||||
|
achieved", since r14-6814.
|
||||||
|
|
||||||
|
Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113148
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_secondary_reload):
|
||||||
|
Check if regno == -1 besides MEM_P (x) for reloading FCCmode
|
||||||
|
from/to FPR to/from memory.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
PR target/113148
|
||||||
|
* gcc.target/loongarch/pr113148.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.cc | 3 +-
|
||||||
|
gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++
|
||||||
|
2 files changed, 46 insertions(+), 1 deletion(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 5c278386a..2e305f940 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
|
||||||
|
return NO_REGS;
|
||||||
|
}
|
||||||
|
|
||||||
|
- if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x))
|
||||||
|
+ if (reg_class_subset_p (rclass, FP_REGS)
|
||||||
|
+ && (regno == -1 || MEM_P (x)))
|
||||||
|
return GR_REGS;
|
||||||
|
|
||||||
|
return NO_REGS;
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..cf48e5520
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c
|
||||||
|
@@ -0,0 +1,44 @@
|
||||||
|
+/* PR 113148: ICE caused by infinite reloading */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */
|
||||||
|
+
|
||||||
|
+struct bound
|
||||||
|
+{
|
||||||
|
+ double max;
|
||||||
|
+} drawQuadrant_bound;
|
||||||
|
+double w4, innerXfromXY_y, computeBound_right_0;
|
||||||
|
+struct arc_def
|
||||||
|
+{
|
||||||
|
+ double w, h;
|
||||||
|
+ double a0, a1;
|
||||||
|
+};
|
||||||
|
+static void drawQuadrant (struct arc_def *);
|
||||||
|
+static void
|
||||||
|
+computeBound (struct arc_def *def, struct bound *bound)
|
||||||
|
+{
|
||||||
|
+ double ellipsex_1, ellipsex_0;
|
||||||
|
+ bound->max = def->a1 ?: __builtin_sin (w4) * def->h;
|
||||||
|
+ if (def->a0 == 5 && def->w == def->h)
|
||||||
|
+ ;
|
||||||
|
+ else
|
||||||
|
+ ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4);
|
||||||
|
+ if (def->a1 == 5 && def->w == def->h)
|
||||||
|
+ ellipsex_1 = bound->max;
|
||||||
|
+ __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4);
|
||||||
|
+ computeBound_right_0 = ellipsex_0;
|
||||||
|
+}
|
||||||
|
+void
|
||||||
|
+drawArc ()
|
||||||
|
+{
|
||||||
|
+ struct arc_def foo;
|
||||||
|
+ for (;;)
|
||||||
|
+ drawQuadrant (&foo);
|
||||||
|
+}
|
||||||
|
+void
|
||||||
|
+drawQuadrant (struct arc_def *def)
|
||||||
|
+{
|
||||||
|
+ int y, miny;
|
||||||
|
+ computeBound (def, &drawQuadrant_bound);
|
||||||
|
+ while (y >= miny)
|
||||||
|
+ ;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
305
0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
Normal file
305
0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
Normal file
@ -0,0 +1,305 @@
|
|||||||
|
From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Tue, 12 Dec 2023 04:54:21 +0800
|
||||||
|
Subject: [PATCH 090/188] LoongArch: Replace -mexplicit-relocs=auto simple-used
|
||||||
|
address peephole2 with combine
|
||||||
|
|
||||||
|
The problem with peephole2 is it uses a naive sliding-window algorithm
|
||||||
|
and misses many cases. For example:
|
||||||
|
|
||||||
|
float a[10000];
|
||||||
|
float t() { return a[0] + a[8000]; }
|
||||||
|
|
||||||
|
is compiled to:
|
||||||
|
|
||||||
|
la.local $r13,a
|
||||||
|
la.local $r12,a+32768
|
||||||
|
fld.s $f1,$r13,0
|
||||||
|
fld.s $f0,$r12,-768
|
||||||
|
fadd.s $f0,$f1,$f0
|
||||||
|
|
||||||
|
by trunk. But as we've explained in r14-4851, the following would be
|
||||||
|
better with -mexplicit-relocs=auto:
|
||||||
|
|
||||||
|
pcalau12i $r13,%pc_hi20(a)
|
||||||
|
pcalau12i $r12,%pc_hi20(a+32000)
|
||||||
|
fld.s $f1,$r13,%pc_lo12(a)
|
||||||
|
fld.s $f0,$r12,%pc_lo12(a+32000)
|
||||||
|
fadd.s $f0,$f1,$f0
|
||||||
|
|
||||||
|
However the sliding-window algorithm just won't detect the pcalau12i/fld
|
||||||
|
pair to be optimized. Use a define_insn_and_rewrite in combine pass
|
||||||
|
will work around the issue.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/predicates.md
|
||||||
|
(symbolic_pcrel_offset_operand): New define_predicate.
|
||||||
|
(mem_simple_ldst_operand): Likewise.
|
||||||
|
* config/loongarch/loongarch-protos.h
|
||||||
|
(loongarch_rewrite_mem_for_simple_ldst): Declare.
|
||||||
|
* config/loongarch/loongarch.cc
|
||||||
|
(loongarch_rewrite_mem_for_simple_ldst): Implement.
|
||||||
|
* config/loongarch/loongarch.md (simple_load<mode>): New
|
||||||
|
define_insn_and_rewrite.
|
||||||
|
(simple_load_<su>ext<SUBDI:mode><GPR:mode>): Likewise.
|
||||||
|
(simple_store<mode>): Likewise.
|
||||||
|
(define_peephole2): Remove la.local/[f]ld peepholes.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
|
||||||
|
New test.
|
||||||
|
* gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c:
|
||||||
|
New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch-protos.h | 1 +
|
||||||
|
gcc/config/loongarch/loongarch.cc | 16 +++
|
||||||
|
gcc/config/loongarch/loongarch.md | 114 +++++-------------
|
||||||
|
gcc/config/loongarch/predicates.md | 13 ++
|
||||||
|
...explicit-relocs-auto-single-load-store-2.c | 11 ++
|
||||||
|
...explicit-relocs-auto-single-load-store-3.c | 18 +++
|
||||||
|
6 files changed, 86 insertions(+), 87 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
index 2067e50c3..5060efbb6 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||||||
|
@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
|
||||||
|
extern bool loongarch_check_zero_div_p (void);
|
||||||
|
extern bool loongarch_pre_reload_split (void);
|
||||||
|
extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
|
||||||
|
+extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx);
|
||||||
|
|
||||||
|
union loongarch_gen_fn_ptrs
|
||||||
|
{
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 2e305f940..c6318bee9 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
+/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
|
||||||
|
+ -mcmodel={normal/medium}. */
|
||||||
|
+rtx
|
||||||
|
+loongarch_rewrite_mem_for_simple_ldst (rtx mem)
|
||||||
|
+{
|
||||||
|
+ rtx addr = XEXP (mem, 0);
|
||||||
|
+ rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
|
||||||
|
+ UNSPEC_PCALAU12I_GR);
|
||||||
|
+ rtx new_mem;
|
||||||
|
+
|
||||||
|
+ addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr);
|
||||||
|
+ new_mem = gen_rtx_MEM (GET_MODE (mem), addr);
|
||||||
|
+ MEM_COPY_ATTRIBUTES (new_mem, mem);
|
||||||
|
+ return new_mem;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
/* Print the text for PRINT_OPERAND punctation character CH to FILE.
|
||||||
|
The punctuation characters are:
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index ed4d4b906..3c61a0cf4 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -4135,101 +4135,41 @@
|
||||||
|
;;
|
||||||
|
;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
|
||||||
|
;; 3 instructions).
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
|
||||||
|
- (mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
- || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2)
|
||||||
|
- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
- {
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
- })
|
||||||
|
-
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
|
||||||
|
- (mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
|
||||||
|
- (match_operand 3 "const_int_operand"))))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
- || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2)
|
||||||
|
- (mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
|
||||||
|
- {
|
||||||
|
- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
- })
|
||||||
|
-
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:GPR 2 "register_operand")
|
||||||
|
- (any_extend:GPR (mem:SUBDI (match_dup 0))))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
- || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2)
|
||||||
|
- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
|
||||||
|
- (match_dup 1)))))]
|
||||||
|
+(define_insn_and_rewrite "simple_load<mode>"
|
||||||
|
+ [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
|
||||||
|
+ (match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" ""))]
|
||||||
|
+ "loongarch_pre_reload_split ()
|
||||||
|
+ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
|
||||||
|
+ "#"
|
||||||
|
+ "&& true"
|
||||||
|
{
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]);
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (match_operand:GPR 2 "register_operand")
|
||||||
|
+(define_insn_and_rewrite "simple_load_<su>ext<SUBDI:mode><GPR:mode>"
|
||||||
|
+ [(set (match_operand:GPR 0 "register_operand" "=r")
|
||||||
|
(any_extend:GPR
|
||||||
|
- (mem:SUBDI (plus (match_dup 0)
|
||||||
|
- (match_operand 3 "const_int_operand")))))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0]) \
|
||||||
|
- || REGNO (operands[0]) == REGNO (operands[2]))"
|
||||||
|
- [(set (match_dup 2)
|
||||||
|
- (any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
|
||||||
|
- (match_dup 1)))))]
|
||||||
|
- {
|
||||||
|
- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
- })
|
||||||
|
-
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (mem:ST_ANY (match_dup 0))
|
||||||
|
- (match_operand:ST_ANY 2 "register_operand"))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
- && REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
- [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+ (match_operand:SUBDI 1 "mem_simple_ldst_operand" "")))]
|
||||||
|
+ "loongarch_pre_reload_split ()
|
||||||
|
+ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
|
||||||
|
+ "#"
|
||||||
|
+ "&& true"
|
||||||
|
{
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]);
|
||||||
|
})
|
||||||
|
|
||||||
|
-(define_peephole2
|
||||||
|
- [(set (match_operand:P 0 "register_operand")
|
||||||
|
- (match_operand:P 1 "symbolic_pcrel_operand"))
|
||||||
|
- (set (mem:ST_ANY (plus (match_dup 0)
|
||||||
|
- (match_operand 3 "const_int_operand")))
|
||||||
|
- (match_operand:ST_ANY 2 "register_operand"))]
|
||||||
|
- "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
|
||||||
|
- && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
|
||||||
|
- && (peep2_reg_dead_p (2, operands[0])) \
|
||||||
|
- && REGNO (operands[0]) != REGNO (operands[2])"
|
||||||
|
- [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
|
||||||
|
+(define_insn_and_rewrite "simple_store<mode>"
|
||||||
|
+ [(set (match_operand:ST_ANY 0 "mem_simple_ldst_operand" "")
|
||||||
|
+ (match_operand:ST_ANY 1 "reg_or_0_operand" "r,f"))]
|
||||||
|
+ "loongarch_pre_reload_split ()
|
||||||
|
+ && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
|
||||||
|
+ && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
|
||||||
|
+ "#"
|
||||||
|
+ "&& true"
|
||||||
|
{
|
||||||
|
- operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
|
||||||
|
- emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
|
||||||
|
+ operands[0] = loongarch_rewrite_mem_for_simple_ldst (operands[0]);
|
||||||
|
})
|
||||||
|
|
||||||
|
;; Synchronization instructions.
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 58f9a7826..3698b9103 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -579,6 +579,19 @@
|
||||||
|
return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL;
|
||||||
|
})
|
||||||
|
|
||||||
|
+(define_predicate "symbolic_pcrel_offset_operand"
|
||||||
|
+ (and (match_code "plus")
|
||||||
|
+ (match_operand 0 "symbolic_pcrel_operand")
|
||||||
|
+ (match_operand 1 "const_int_operand")))
|
||||||
|
+
|
||||||
|
+(define_predicate "mem_simple_ldst_operand"
|
||||||
|
+ (match_code "mem")
|
||||||
|
+{
|
||||||
|
+ op = XEXP (op, 0);
|
||||||
|
+ return (symbolic_pcrel_operand (op, Pmode)
|
||||||
|
+ || symbolic_pcrel_offset_operand (op, Pmode));
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
(define_predicate "equality_operator"
|
||||||
|
(match_code "eq,ne"))
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..42cb966d1
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
|
||||||
|
@@ -0,0 +1,11 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */
|
||||||
|
+
|
||||||
|
+float a[8001];
|
||||||
|
+float
|
||||||
|
+t (void)
|
||||||
|
+{
|
||||||
|
+ return a[0] + a[8000];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+/* { dg-final { scan-assembler-not "la.local" } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..32aa5383d
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
|
||||||
|
@@ -0,0 +1,18 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mexplicit-relocs=auto -fdump-rtl-final" } */
|
||||||
|
+/* { dg-final { scan-rtl-dump-times "mem/v/c" 2 "final" } } */
|
||||||
|
+/* { dg-final { scan-assembler-not "la\\.local" } } */
|
||||||
|
+
|
||||||
|
+volatile unsigned long counter;
|
||||||
|
+
|
||||||
|
+unsigned long
|
||||||
|
+read (void)
|
||||||
|
+{
|
||||||
|
+ return counter;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+clear (void)
|
||||||
|
+{
|
||||||
|
+ counter = 0;
|
||||||
|
+}
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,33 @@
|
|||||||
|
From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Fri, 29 Dec 2023 20:04:34 +0800
|
||||||
|
Subject: [PATCH 091/188] LoongArch: Fix the format of
|
||||||
|
bstrins_<mode>_for_ior_mask condition (NFC)
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/loongarch.md (bstrins_<mode>_for_ior_mask):
|
||||||
|
For the condition, remove unneeded trailing "\" and move "&&" to
|
||||||
|
follow GNU coding style. NFC.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/loongarch.md | 4 ++--
|
||||||
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 3c61a0cf4..996df66e8 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -1486,8 +1486,8 @@
|
||||||
|
(match_operand:GPR 2 "const_int_operand"))
|
||||||
|
(and:GPR (match_operand:GPR 3 "register_operand")
|
||||||
|
(match_operand:GPR 4 "const_int_operand"))))]
|
||||||
|
- "loongarch_pre_reload_split () && \
|
||||||
|
- loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
|
||||||
|
+ "loongarch_pre_reload_split ()
|
||||||
|
+ && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
|
||||||
|
"#"
|
||||||
|
"&& true"
|
||||||
|
[(set (match_dup 0) (match_dup 1))
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
280
0092-LoongArch-Added-TLS-Le-Relax-support.patch
Normal file
280
0092-LoongArch-Added-TLS-Le-Relax-support.patch
Normal file
@ -0,0 +1,280 @@
|
|||||||
|
From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Tue, 12 Dec 2023 16:32:31 +0800
|
||||||
|
Subject: [PATCH 092/188] LoongArch: Added TLS Le Relax support.
|
||||||
|
|
||||||
|
Check whether the assembler supports tls le relax. If it supports it, the assembly
|
||||||
|
instruction sequence of tls le relax will be generated by default.
|
||||||
|
|
||||||
|
The original way to obtain the tls le symbol address:
|
||||||
|
lu12i.w $rd, %le_hi20(sym)
|
||||||
|
ori $rd, $rd, %le_lo12(sym)
|
||||||
|
add.{w/d} $rd, $rd, $tp
|
||||||
|
|
||||||
|
If the assembler supports tls le relax, the following sequence is generated:
|
||||||
|
|
||||||
|
lu12i.w $rd, %le_hi20_r(sym)
|
||||||
|
add.{w/d} $rd,$rd,$tp,%le_add_r(sym)
|
||||||
|
addi.{w/d} $rd,$rd,%le_lo12_r(sym)
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config.in: Regenerate.
|
||||||
|
* config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_legitimize_tls_address):
|
||||||
|
Added TLS Le Relax support.
|
||||||
|
(loongarch_print_operand_reloc): Add the output string of TLS Le Relax.
|
||||||
|
* config/loongarch/loongarch.md (@add_tls_le_relax<mode>): New template.
|
||||||
|
* configure: Regenerate.
|
||||||
|
* configure.ac: Check if binutils supports TLS le relax.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* lib/target-supports.exp: Add a function to check whether binutil supports
|
||||||
|
TLS Le Relax.
|
||||||
|
* gcc.target/loongarch/tls-le-relax.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config.in | 6 +++
|
||||||
|
gcc/config/loongarch/loongarch-opts.h | 4 ++
|
||||||
|
gcc/config/loongarch/loongarch.cc | 46 +++++++++++++++++--
|
||||||
|
gcc/config/loongarch/loongarch.md | 12 +++++
|
||||||
|
gcc/configure | 31 +++++++++++++
|
||||||
|
gcc/configure.ac | 5 ++
|
||||||
|
.../gcc.target/loongarch/tls-le-relax.c | 12 +++++
|
||||||
|
gcc/testsuite/lib/target-supports.exp | 12 +++++
|
||||||
|
8 files changed, 125 insertions(+), 3 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config.in b/gcc/config.in
|
||||||
|
index 033cfb98b..7220b2b2b 100644
|
||||||
|
--- a/gcc/config.in
|
||||||
|
+++ b/gcc/config.in
|
||||||
|
@@ -771,6 +771,12 @@
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
+/* Define if your assembler supports tls le relocation. */
|
||||||
|
+#ifndef USED_FOR_TARGET
|
||||||
|
+#undef HAVE_AS_TLS_LE_RELAXATION
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
+
|
||||||
|
/* Define if your assembler supports vl/vst/vlm/vstm with an optional
|
||||||
|
alignment hint argument. */
|
||||||
|
#ifndef USED_FOR_TARGET
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
index 639ed50bd..8491bee0d 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||||
|
@@ -114,4 +114,8 @@ struct loongarch_flags {
|
||||||
|
#define HAVE_AS_TLS 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
+#ifndef HAVE_AS_TLS_LE_RELAXATION
|
||||||
|
+#define HAVE_AS_TLS_LE_RELAXATION 0
|
||||||
|
+#endif
|
||||||
|
+
|
||||||
|
#endif /* LOONGARCH_OPTS_H */
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index c6318bee9..d1b1950dc 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||||
|
|
||||||
|
case TLS_MODEL_LOCAL_EXEC:
|
||||||
|
{
|
||||||
|
- /* la.tls.le; tp-relative add. */
|
||||||
|
+ /* la.tls.le; tp-relative add.
|
||||||
|
+
|
||||||
|
+ normal:
|
||||||
|
+ lu12i.w $rd, %le_hi20(sym)
|
||||||
|
+ ori $rd, $rd, %le_lo12(sym)
|
||||||
|
+ add.{w/d} $rd, $rd, $tp
|
||||||
|
+ (st.{w/d}/ld.{w/d} $rs, $rd, 0)
|
||||||
|
+
|
||||||
|
+ tls le relax:
|
||||||
|
+ lu12i.w $rd, %le_hi20_r(sym)
|
||||||
|
+ add.{w/d} $rd,$rd,$tp
|
||||||
|
+ addi.{w/d} $rd,$rd,%le_lo12_r(sym)
|
||||||
|
+ (st.{w/d}/ld.{w/d} $rs, $rd, 0)
|
||||||
|
+
|
||||||
|
+ extreme (When the code model is set to extreme, the TLS le Relax
|
||||||
|
+ instruction sequence is not generated):
|
||||||
|
+ lu12i.w $rd, %le_hi20(sym)
|
||||||
|
+ ori $rd, $rd, %le_lo12(sym)
|
||||||
|
+ lu32i.d $rd, %le64_lo20(sym)
|
||||||
|
+ lu52i.d $rd, $rd, %le64_hi12(sym)
|
||||||
|
+ add.d $rd, $rd, $tp
|
||||||
|
+ (st.{w/d}/ld.{w/d} $rs, $rd, 0) */
|
||||||
|
+
|
||||||
|
tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
|
||||||
|
tmp1 = gen_reg_rtx (Pmode);
|
||||||
|
dest = gen_reg_rtx (Pmode);
|
||||||
|
@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||||
|
tmp3 = gen_reg_rtx (Pmode);
|
||||||
|
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||||||
|
high = loongarch_force_temporary (tmp3, high);
|
||||||
|
- emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
|
||||||
|
+
|
||||||
|
+ /* The assembler does not implement tls le relax support when the
|
||||||
|
+ code model is extreme, so when the code model is extreme, the
|
||||||
|
+ old symbol address acquisition method is still used. */
|
||||||
|
+ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
|
||||||
|
+ {
|
||||||
|
+ emit_insn (gen_add_tls_le_relax (Pmode, dest, high,
|
||||||
|
+ tp, loc));
|
||||||
|
+ loongarch_emit_move (dest,
|
||||||
|
+ gen_rtx_LO_SUM (Pmode, dest, tmp2));
|
||||||
|
+ return dest;
|
||||||
|
+ }
|
||||||
|
+ else
|
||||||
|
+ emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
|
||||||
|
|
||||||
|
if (TARGET_CMODEL_EXTREME)
|
||||||
|
{
|
||||||
|
@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||||
|
gcc_unreachable ();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
- reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||||
|
+ {
|
||||||
|
+ if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
|
||||||
|
+ reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r";
|
||||||
|
+ else
|
||||||
|
+ reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||||
|
+ }
|
||||||
|
break;
|
||||||
|
|
||||||
|
case SYMBOL_TLSGD:
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||||
|
index 996df66e8..02c537d4c 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.md
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.md
|
||||||
|
@@ -73,6 +73,7 @@
|
||||||
|
UNSPEC_LOAD_FROM_GOT
|
||||||
|
UNSPEC_PCALAU12I
|
||||||
|
UNSPEC_PCALAU12I_GR
|
||||||
|
+ UNSPEC_ADD_TLS_LE_RELAX
|
||||||
|
UNSPEC_ORI_L_LO12
|
||||||
|
UNSPEC_LUI_L_HI20
|
||||||
|
UNSPEC_LUI_H_LO20
|
||||||
|
@@ -2503,6 +2504,17 @@
|
||||||
|
"pcalau12i\t%0,%%pc_hi20(%1)"
|
||||||
|
[(set_attr "type" "move")])
|
||||||
|
|
||||||
|
+(define_insn "@add_tls_le_relax<mode>"
|
||||||
|
+ [(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
+ (unspec:P [(match_operand:P 1 "register_operand" "r")
|
||||||
|
+ (match_operand:P 2 "register_operand" "r")
|
||||||
|
+ (match_operand:P 3 "symbolic_operand")]
|
||||||
|
+ UNSPEC_ADD_TLS_LE_RELAX))]
|
||||||
|
+ "HAVE_AS_TLS_LE_RELAXATION"
|
||||||
|
+ "add.<d>\t%0,%1,%2,%%le_add_r(%3)"
|
||||||
|
+ [(set_attr "type" "move")]
|
||||||
|
+)
|
||||||
|
+
|
||||||
|
(define_insn "@ori_l_lo12<mode>"
|
||||||
|
[(set (match_operand:P 0 "register_operand" "=r")
|
||||||
|
(unspec:P [(match_operand:P 1 "register_operand" "r")
|
||||||
|
diff --git a/gcc/configure b/gcc/configure
|
||||||
|
index 5842e7a18..eecfe60d6 100755
|
||||||
|
--- a/gcc/configure
|
||||||
|
+++ b/gcc/configure
|
||||||
|
@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
|
||||||
|
|
||||||
|
$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
|
||||||
|
|
||||||
|
+fi
|
||||||
|
+
|
||||||
|
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5
|
||||||
|
+$as_echo_n "checking assembler for tls le relaxation support... " >&6; }
|
||||||
|
+if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then :
|
||||||
|
+ $as_echo_n "(cached) " >&6
|
||||||
|
+else
|
||||||
|
+ gcc_cv_as_loongarch_tls_le_relaxation_support=no
|
||||||
|
+ if test x$gcc_cv_as != x; then
|
||||||
|
+ $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s
|
||||||
|
+ if { ac_try='$gcc_cv_as $gcc_cv_as_flags -o conftest.o conftest.s >&5'
|
||||||
|
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
|
||||||
|
+ (eval $ac_try) 2>&5
|
||||||
|
+ ac_status=$?
|
||||||
|
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
|
||||||
|
+ test $ac_status = 0; }; }
|
||||||
|
+ then
|
||||||
|
+ gcc_cv_as_loongarch_tls_le_relaxation_support=yes
|
||||||
|
+ else
|
||||||
|
+ echo "configure: failed program was" >&5
|
||||||
|
+ cat conftest.s >&5
|
||||||
|
+ fi
|
||||||
|
+ rm -f conftest.o conftest.s
|
||||||
|
+ fi
|
||||||
|
+fi
|
||||||
|
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_tls_le_relaxation_support" >&5
|
||||||
|
+$as_echo "$gcc_cv_as_loongarch_tls_le_relaxation_support" >&6; }
|
||||||
|
+if test $gcc_cv_as_loongarch_tls_le_relaxation_support = yes; then
|
||||||
|
+
|
||||||
|
+$as_echo "#define HAVE_AS_TLS_LE_RELAXATION 1" >>confdefs.h
|
||||||
|
+
|
||||||
|
fi
|
||||||
|
|
||||||
|
;;
|
||||||
|
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||||
|
index 9c3fd3ad6..d1032440d 100644
|
||||||
|
--- a/gcc/configure.ac
|
||||||
|
+++ b/gcc/configure.ac
|
||||||
|
@@ -5357,6 +5357,11 @@ x:
|
||||||
|
beq $a0,$a1,a],,
|
||||||
|
[AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1,
|
||||||
|
[Define if your assembler supports conditional branch relaxation.])])
|
||||||
|
+ gcc_GAS_CHECK_FEATURE([tls le relaxation support],
|
||||||
|
+ gcc_cv_as_loongarch_tls_le_relaxation_support,,
|
||||||
|
+ [lu12i.w $t0,%le_hi20_r(a)],,
|
||||||
|
+ [AC_DEFINE(HAVE_AS_TLS_LE_RELAXATION, 1,
|
||||||
|
+ [Define if your assembler supports tls le relocation.])])
|
||||||
|
;;
|
||||||
|
s390*-*-*)
|
||||||
|
gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..a9a404fc7
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
|
||||||
|
@@ -0,0 +1,12 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs" } */
|
||||||
|
+/* { dg-final { scan-assembler "%le_add_r" { target tls_le_relax } } } */
|
||||||
|
+
|
||||||
|
+__attribute__ ((tls_model ("local-exec"))) __thread int a;
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test (void)
|
||||||
|
+{
|
||||||
|
+ a = 10;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
index b8bff1a31..20fbd43ee 100644
|
||||||
|
--- a/gcc/testsuite/lib/target-supports.exp
|
||||||
|
+++ b/gcc/testsuite/lib/target-supports.exp
|
||||||
|
@@ -10582,6 +10582,18 @@ proc check_effective_target_loongarch_call36_support { } {
|
||||||
|
} ""]
|
||||||
|
}
|
||||||
|
|
||||||
|
+# Returns 1 if binutils supports TLS le Relax, 0 otherwise.
|
||||||
|
+proc check_effective_target_tls_le_relax { } {
|
||||||
|
+ if [check_effective_target_tls_native] {
|
||||||
|
+ return [check_no_compiler_messages loongarch_tls_le_relax object {
|
||||||
|
+ /* Assembly code */
|
||||||
|
+ lu12i.w $r12, %le_hi20_r(a)
|
||||||
|
+ }]
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
# Return 1 if the target does *not* require strict alignment.
|
||||||
|
|
||||||
|
proc check_effective_target_non_strict_align {} {
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
112
0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
Normal file
112
0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001
|
||||||
|
From: Xi Ruoyao <xry111@xry111.site>
|
||||||
|
Date: Sat, 30 Dec 2023 21:40:11 +0800
|
||||||
|
Subject: [PATCH 093/188] LoongArch: Provide fmin/fmax RTL pattern for vectors
|
||||||
|
|
||||||
|
We already had smin/smax RTL pattern using vfmin/vfmax instructions.
|
||||||
|
But for smin/smax, it's unspecified what will happen if either operand
|
||||||
|
contains any NaN operands. So we would not vectorize the loop with
|
||||||
|
-fno-finite-math-only (the default for all optimization levels expect
|
||||||
|
-Ofast).
|
||||||
|
|
||||||
|
But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
|
||||||
|
can also use them and vectorize the loop.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/simd.md (fmax<mode>3): New define_insn.
|
||||||
|
(fmin<mode>3): Likewise.
|
||||||
|
(reduc_fmax_scal_<mode>3): New define_expand.
|
||||||
|
(reduc_fmin_scal_<mode>3): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vfmax-vfmin.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/simd.md | 31 +++++++++++++++++++
|
||||||
|
.../gcc.target/loongarch/vfmax-vfmin.c | 31 +++++++++++++++++++
|
||||||
|
2 files changed, 62 insertions(+)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
|
||||||
|
index 93fb39abc..8ac1d75a8 100644
|
||||||
|
--- a/gcc/config/loongarch/simd.md
|
||||||
|
+++ b/gcc/config/loongarch/simd.md
|
||||||
|
@@ -426,6 +426,37 @@
|
||||||
|
[(set_attr "type" "simd_fcmp")
|
||||||
|
(set_attr "mode" "<MODE>")])
|
||||||
|
|
||||||
|
+; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
|
||||||
|
+; the corresponding IEEE-754-2008 operations. We must use UNSPEC instead
|
||||||
|
+; of smin/smax though, see PR105414 and PR107013.
|
||||||
|
+
|
||||||
|
+(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
|
||||||
|
+(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
|
||||||
|
+
|
||||||
|
+(define_insn "<fmaxmin><mode>3"
|
||||||
|
+ [(set (match_operand:FVEC 0 "register_operand" "=f")
|
||||||
|
+ (unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
|
||||||
|
+ (match_operand:FVEC 2 "register_operand" "f")]
|
||||||
|
+ UNSPEC_FMAXMIN))]
|
||||||
|
+ ""
|
||||||
|
+ "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
|
||||||
|
+ [(set_attr "type" "simd_fminmax")
|
||||||
|
+ (set_attr "mode" "<MODE>")])
|
||||||
|
+
|
||||||
|
+;; ... and also reduc operations.
|
||||||
|
+(define_expand "reduc_<fmaxmin>_scal_<mode>"
|
||||||
|
+ [(match_operand:<UNITMODE> 0 "register_operand")
|
||||||
|
+ (match_operand:FVEC 1 "register_operand")
|
||||||
|
+ (const_int UNSPEC_FMAXMIN)]
|
||||||
|
+ ""
|
||||||
|
+{
|
||||||
|
+ rtx tmp = gen_reg_rtx (<MODE>mode);
|
||||||
|
+ loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
|
||||||
|
+ emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
|
||||||
|
+ const0_rtx));
|
||||||
|
+ DONE;
|
||||||
|
+})
|
||||||
|
+
|
||||||
|
; The LoongArch SX Instructions.
|
||||||
|
(include "lsx.md")
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..811fee361
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
|
||||||
|
@@ -0,0 +1,31 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-O2 -mtune=la464 -mlasx" } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
|
||||||
|
+/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
|
||||||
|
+
|
||||||
|
+#define T(OP) __typeof__ (__builtin_##OP (0, 0))
|
||||||
|
+
|
||||||
|
+#define TEST(OP, LEN) \
|
||||||
|
+void \
|
||||||
|
+test_##OP##LEN (T (OP) *restrict dest, \
|
||||||
|
+ const T (OP) *restrict src1, \
|
||||||
|
+ const T (OP) *restrict src2) \
|
||||||
|
+{ \
|
||||||
|
+ for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
|
||||||
|
+ dest[i] = __builtin_##OP (src1[i], src2[i]); \
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+TEST(fmin, 16)
|
||||||
|
+TEST(fmax, 16)
|
||||||
|
+TEST(fmin, 32)
|
||||||
|
+TEST(fmax, 32)
|
||||||
|
+TEST(fminf, 16)
|
||||||
|
+TEST(fmaxf, 16)
|
||||||
|
+TEST(fminf, 32)
|
||||||
|
+TEST(fmaxf, 32)
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
1484
0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
Normal file
1484
0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,34 @@
|
|||||||
|
From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Fri, 29 Dec 2023 09:45:15 +0800
|
||||||
|
Subject: [PATCH 095/188] LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file.
|
||||||
|
|
||||||
|
After implementing the cost model on the LoongArch architecture, the GCC
|
||||||
|
compiler code has this feature turned on by default, which causes the
|
||||||
|
lasx-xvstelm.c file test to fail. Through analysis, this test case can
|
||||||
|
generate vectorization instructions required for detection only after
|
||||||
|
disabling the functionality of the cost model with the "-fno-vect-cost-model"
|
||||||
|
compilation option.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile
|
||||||
|
option "-fno-vect-cost-model" to dg-options.
|
||||||
|
---
|
||||||
|
gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +-
|
||||||
|
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||||
|
index 1a7b0e86f..4b846204a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
-/* { dg-options "-O3 -mlasx" } */
|
||||||
|
+/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */
|
||||||
|
/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
|
||||||
|
|
||||||
|
#define LEN 256
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,47 @@
|
|||||||
|
From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Fri, 5 Jan 2024 11:43:24 +0800
|
||||||
|
Subject: [PATCH 096/188] LoongArch: testsuite:Modify the test behavior of the
|
||||||
|
vect-bic-bitmask-{12, 23}.c file.
|
||||||
|
|
||||||
|
Before modifying the test behavior of the program, dg-do is set to assemble in
|
||||||
|
vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support
|
||||||
|
the vector instruction set, it will FAIL to recognize the vector instruction
|
||||||
|
and fail item will appear in the assembly stage. So set the program's dg-do to
|
||||||
|
compile.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.dg/vect/vect-bic-bitmask-12.c: Change the default
|
||||||
|
setting of assembly to compile.
|
||||||
|
* gcc.dg/vect/vect-bic-bitmask-23.c: Dito.
|
||||||
|
---
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +-
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +-
|
||||||
|
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
|
||||||
|
index 36ec5a8b1..213e4c2a4 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
|
||||||
|
-/* { dg-do assemble } */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
/* { dg-additional-options "-O3 -fdump-tree-dce -w" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
|
||||||
|
index 5b4c3b6e1..5dceb4bbc 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
|
||||||
|
@@ -1,5 +1,5 @@
|
||||||
|
/* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
|
||||||
|
-/* { dg-do assemble } */
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
/* { dg-additional-options "-O1 -fdump-tree-dce -w" } */
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,31 @@
|
|||||||
|
From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Fri, 5 Jan 2024 11:43:27 +0800
|
||||||
|
Subject: [PATCH 097/188] LoongArch: testsuite:Delete the default run behavior
|
||||||
|
in pr60510.f.
|
||||||
|
|
||||||
|
When binutils does not support vector instruction sets, the test program fails
|
||||||
|
because it does not recognize vectorization at the assembly stage. Therefore,
|
||||||
|
the default run behavior of the program is deleted, so that the behavior of
|
||||||
|
the program depends on whether the software supports vectorization.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gfortran.dg/vect/pr60510.f: Delete the default behavior of the
|
||||||
|
program.
|
||||||
|
---
|
||||||
|
gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 -
|
||||||
|
1 file changed, 1 deletion(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f
|
||||||
|
index ecd50dd55..c1e11b27d 100644
|
||||||
|
--- a/gcc/testsuite/gfortran.dg/vect/pr60510.f
|
||||||
|
+++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f
|
||||||
|
@@ -1,4 +1,3 @@
|
||||||
|
-! { dg-do run }
|
||||||
|
! { dg-require-effective-target vect_double }
|
||||||
|
! { dg-require-effective-target vect_intdouble_cvt }
|
||||||
|
! { dg-additional-options "-fno-inline -ffast-math" }
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
157
0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
Normal file
157
0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Fri, 5 Jan 2024 11:43:28 +0800
|
||||||
|
Subject: [PATCH 098/188] LoongArch: testsuite:Added additional vectorization
|
||||||
|
"-mlasx" compilation option.
|
||||||
|
|
||||||
|
In the LoongArch architecture, the reason for not adding the 128-bit
|
||||||
|
vector-width-*hi* instruction template in the GCC back end is that it causes
|
||||||
|
program performance loss, so we can only add the "-mlasx" compilation option
|
||||||
|
to use 256-bit vectorization functions in test files.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the
|
||||||
|
LoongArch architecture, you need to add the "-mlasx" compilation
|
||||||
|
option to generate vectorized code.
|
||||||
|
* gcc.dg/vect/slp-widen-mult-half.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-const-s16.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-const-u16.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-half-u8.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-half.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-u16.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito.
|
||||||
|
* gcc.dg/vect/vect-widen-mult-u8.c: Dito.
|
||||||
|
---
|
||||||
|
gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c | 1 +
|
||||||
|
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c | 1 +
|
||||||
|
10 files changed, 10 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
|
||||||
|
index 47b1a4366..52ffca82a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
|
||||||
|
@@ -1,4 +1,5 @@
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "tree-vect.h"
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
||||||
|
index e3bfee333..cd44e551f 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
||||||
|
index 4c95dd201..082c758cb 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
|
||||||
|
@@ -2,6 +2,7 @@
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
||||||
|
index 4075f815c..a95e617ad 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
|
||||||
|
@@ -2,6 +2,7 @@
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
||||||
|
index c4ac88e18..14d96645a 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
|
||||||
|
@@ -2,6 +2,7 @@
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
/* { dg-additional-options "-fno-ipa-icf" } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
||||||
|
index ebbf4f5e8..7901dae85 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include "tree-vect.h"
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
||||||
|
index 2e28baae0..21b39953e 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "tree-vect.h"
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
||||||
|
index d277f0b2b..4827e11b2 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
|
||||||
|
@@ -1,6 +1,7 @@
|
||||||
|
/* Disabling epilogues until we find a better way to deal with scans. */
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "tree-vect.h"
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
|
||||||
|
index f50358802..87eb9e0cb 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
|
||||||
|
@@ -1,5 +1,6 @@
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "tree-vect.h"
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
|
||||||
|
index 03d137941..507d30c35 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
|
||||||
|
@@ -1,5 +1,6 @@
|
||||||
|
/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
|
||||||
|
/* { dg-require-effective-target vect_int } */
|
||||||
|
+/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
|
||||||
|
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include "tree-vect.h"
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
@ -0,0 +1,80 @@
|
|||||||
|
From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001
|
||||||
|
From: chenxiaolong <chenxiaolong@loongson.cn>
|
||||||
|
Date: Fri, 5 Jan 2024 11:43:29 +0800
|
||||||
|
Subject: [PATCH 099/188] LoongArch: testsuite:Give up the detection of the
|
||||||
|
gcc.dg/fma-{3, 4, 6, 7}.c file.
|
||||||
|
|
||||||
|
On the LoongArch architecture, the above four test cases need to be waived
|
||||||
|
during testing. There are two situations:
|
||||||
|
|
||||||
|
1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on
|
||||||
|
the LoongArch architecture, the function of the existing fnmsub instruction
|
||||||
|
is to find the value of -(a*b - c);
|
||||||
|
|
||||||
|
2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on
|
||||||
|
the LoongArch architecture, the function of the existing fnmadd instruction
|
||||||
|
is to find the value of -(a*b + c);
|
||||||
|
|
||||||
|
Through the analysis of the above two cases, there will be positive and
|
||||||
|
negative zero inequality.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog
|
||||||
|
|
||||||
|
* gcc.dg/fma-3.c: The intermediate file corresponding to the
|
||||||
|
function does not produce the corresponding FNMA symbol, so the test
|
||||||
|
rules should be skipped when testing.
|
||||||
|
* gcc.dg/fma-4.c: The intermediate file corresponding to the
|
||||||
|
function does not produce the corresponding FNMS symbol, so skip the
|
||||||
|
test rules when testing.
|
||||||
|
* gcc.dg/fma-6.c: The cause is the same as fma-3.c.
|
||||||
|
* gcc.dg/fma-7.c: The cause is the same as fma-4.c
|
||||||
|
---
|
||||||
|
gcc/testsuite/gcc.dg/fma-3.c | 2 +-
|
||||||
|
gcc/testsuite/gcc.dg/fma-4.c | 2 +-
|
||||||
|
gcc/testsuite/gcc.dg/fma-6.c | 2 +-
|
||||||
|
gcc/testsuite/gcc.dg/fma-7.c | 2 +-
|
||||||
|
4 files changed, 4 insertions(+), 4 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c
|
||||||
|
index 699aa2c95..6649b54b6 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/fma-3.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/fma-3.c
|
||||||
|
@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
|
||||||
|
return c - a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c
|
||||||
|
index bff928f1f..f1701c196 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/fma-4.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/fma-4.c
|
||||||
|
@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
|
||||||
|
return -(a * b) - c;
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c
|
||||||
|
index 87258cec4..9e49b62b6 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/fma-6.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/fma-6.c
|
||||||
|
@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
|
||||||
|
return -__builtin_fma (a, b, -c);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
|
||||||
|
diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c
|
||||||
|
index f409cc8ee..86aacad7b 100644
|
||||||
|
--- a/gcc/testsuite/gcc.dg/fma-7.c
|
||||||
|
+++ b/gcc/testsuite/gcc.dg/fma-7.c
|
||||||
|
@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
|
||||||
|
return -__builtin_fma (a, b, c);
|
||||||
|
}
|
||||||
|
|
||||||
|
-/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */
|
||||||
|
+/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
206
0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
Normal file
206
0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
Normal file
@ -0,0 +1,206 @@
|
|||||||
|
From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||||
|
Date: Thu, 4 Jan 2024 10:37:53 +0800
|
||||||
|
Subject: [PATCH 100/188] LoongArch: Fixed the problem of incorrect judgment of
|
||||||
|
the immediate field of the [x]vld/[x]vst instruction.
|
||||||
|
|
||||||
|
The [x]vld/[x]vst directive is defined as follows:
|
||||||
|
[x]vld/[x]vst {x/v}d, rj, si12
|
||||||
|
|
||||||
|
When not modified, the immediate field of [x]vld/[x]vst is between 10 and
|
||||||
|
14 bits depending on the type. However, in loongarch_valid_offset_p, the
|
||||||
|
immediate field is restricted first, so there is no error. However, in
|
||||||
|
some cases redundant instructions will be generated, see test cases.
|
||||||
|
Now modify it according to the description in the instruction manual.
|
||||||
|
|
||||||
|
gcc/ChangeLog:
|
||||||
|
|
||||||
|
* config/loongarch/lasx.md (lasx_mxld_<lasxfmt_f>):
|
||||||
|
Modify the method of determining the memory offset of [x]vld/[x]vst.
|
||||||
|
(lasx_mxst_<lasxfmt_f>): Likewise.
|
||||||
|
* config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete.
|
||||||
|
(loongarch_address_insns): Likewise.
|
||||||
|
* config/loongarch/lsx.md (lsx_ld_<lsxfmt_f>): Likewise.
|
||||||
|
(lsx_st_<lsxfmt_f>): Likewise.
|
||||||
|
* config/loongarch/predicates.md (aq10b_operand): Likewise.
|
||||||
|
(aq10h_operand): Likewise.
|
||||||
|
(aq10w_operand): Likewise.
|
||||||
|
(aq10d_operand): Likewise.
|
||||||
|
|
||||||
|
gcc/testsuite/ChangeLog:
|
||||||
|
|
||||||
|
* gcc.target/loongarch/vect-ld-st-imm12.c: New test.
|
||||||
|
---
|
||||||
|
gcc/config/loongarch/lasx.md | 26 -------------------
|
||||||
|
gcc/config/loongarch/loongarch.cc | 19 +++-----------
|
||||||
|
gcc/config/loongarch/lsx.md | 26 -------------------
|
||||||
|
gcc/config/loongarch/predicates.md | 16 ------------
|
||||||
|
.../gcc.target/loongarch/vect-ld-st-imm12.c | 15 +++++++++++
|
||||||
|
5 files changed, 19 insertions(+), 83 deletions(-)
|
||||||
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
|
||||||
|
|
||||||
|
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
|
||||||
|
index dbbf5a136..95c6bae20 100644
|
||||||
|
--- a/gcc/config/loongarch/lasx.md
|
||||||
|
+++ b/gcc/config/loongarch/lasx.md
|
||||||
|
@@ -846,32 +846,6 @@
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-;; Offset load
|
||||||
|
-(define_expand "lasx_mxld_<lasxfmt_f>"
|
||||||
|
- [(match_operand:LASX 0 "register_operand")
|
||||||
|
- (match_operand 1 "pmode_register_operand")
|
||||||
|
- (match_operand 2 "aq10<lasxfmt>_operand")]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
|
||||||
|
- INTVAL (operands[2]));
|
||||||
|
- loongarch_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
|
||||||
|
- DONE;
|
||||||
|
-})
|
||||||
|
-
|
||||||
|
-;; Offset store
|
||||||
|
-(define_expand "lasx_mxst_<lasxfmt_f>"
|
||||||
|
- [(match_operand:LASX 0 "register_operand")
|
||||||
|
- (match_operand 1 "pmode_register_operand")
|
||||||
|
- (match_operand 2 "aq10<lasxfmt>_operand")]
|
||||||
|
- "ISA_HAS_LASX"
|
||||||
|
-{
|
||||||
|
- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
|
||||||
|
- INTVAL (operands[2]));
|
||||||
|
- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
|
||||||
|
- DONE;
|
||||||
|
-})
|
||||||
|
-
|
||||||
|
;; LASX
|
||||||
|
(define_insn "add<mode>3"
|
||||||
|
[(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
|
||||||
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||||
|
index 9d2374a46..ddb32cea2 100644
|
||||||
|
--- a/gcc/config/loongarch/loongarch.cc
|
||||||
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||||||
|
@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
|
||||||
|
|
||||||
|
/* We may need to split multiword moves, so make sure that every word
|
||||||
|
is accessible. */
|
||||||
|
- if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
|
||||||
|
+ if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
|
||||||
|
+ && GET_MODE_SIZE (mode) > UNITS_PER_WORD
|
||||||
|
&& !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
- /* LSX LD.* and ST.* supports 10-bit signed offsets. */
|
||||||
|
- if (LSX_SUPPORTED_MODE_P (mode)
|
||||||
|
- && !loongarch_signed_immediate_p (INTVAL (x), 10,
|
||||||
|
- loongarch_ldst_scaled_shift (mode)))
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
- /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift. */
|
||||||
|
- if (LASX_SUPPORTED_MODE_P (mode)
|
||||||
|
- && !loongarch_signed_immediate_p (INTVAL (x), 10, 0))
|
||||||
|
- return false;
|
||||||
|
-
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
|
||||||
|
case ADDRESS_REG:
|
||||||
|
if (lsx_p)
|
||||||
|
{
|
||||||
|
- /* LSX LD.* and ST.* supports 10-bit signed offsets. */
|
||||||
|
- if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10,
|
||||||
|
- loongarch_ldst_scaled_shift (mode)))
|
||||||
|
+ /* LSX LD.* and ST.* supports 12-bit signed offsets. */
|
||||||
|
+ if (IMM12_OPERAND (INTVAL (addr.offset)))
|
||||||
|
return 1;
|
||||||
|
else
|
||||||
|
return 0;
|
||||||
|
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
|
||||||
|
index 3e3248ef4..02e89247b 100644
|
||||||
|
--- a/gcc/config/loongarch/lsx.md
|
||||||
|
+++ b/gcc/config/loongarch/lsx.md
|
||||||
|
@@ -812,32 +812,6 @@
|
||||||
|
DONE;
|
||||||
|
})
|
||||||
|
|
||||||
|
-;; Offset load
|
||||||
|
-(define_expand "lsx_ld_<lsxfmt_f>"
|
||||||
|
- [(match_operand:LSX 0 "register_operand")
|
||||||
|
- (match_operand 1 "pmode_register_operand")
|
||||||
|
- (match_operand 2 "aq10<lsxfmt>_operand")]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
-{
|
||||||
|
- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
|
||||||
|
- INTVAL (operands[2]));
|
||||||
|
- loongarch_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
|
||||||
|
- DONE;
|
||||||
|
-})
|
||||||
|
-
|
||||||
|
-;; Offset store
|
||||||
|
-(define_expand "lsx_st_<lsxfmt_f>"
|
||||||
|
- [(match_operand:LSX 0 "register_operand")
|
||||||
|
- (match_operand 1 "pmode_register_operand")
|
||||||
|
- (match_operand 2 "aq10<lsxfmt>_operand")]
|
||||||
|
- "ISA_HAS_LSX"
|
||||||
|
-{
|
||||||
|
- rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
|
||||||
|
- INTVAL (operands[2]));
|
||||||
|
- loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
|
||||||
|
- DONE;
|
||||||
|
-})
|
||||||
|
-
|
||||||
|
;; Integer operations
|
||||||
|
(define_insn "add<mode>3"
|
||||||
|
[(set (match_operand:ILSX 0 "register_operand" "=f,f,f")
|
||||||
|
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||||
|
index 3698b9103..824a85b36 100644
|
||||||
|
--- a/gcc/config/loongarch/predicates.md
|
||||||
|
+++ b/gcc/config/loongarch/predicates.md
|
||||||
|
@@ -167,22 +167,6 @@
|
||||||
|
(and (match_code "const_int")
|
||||||
|
(match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)")))
|
||||||
|
|
||||||
|
-(define_predicate "aq10b_operand"
|
||||||
|
- (and (match_code "const_int")
|
||||||
|
- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)")))
|
||||||
|
-
|
||||||
|
-(define_predicate "aq10h_operand"
|
||||||
|
- (and (match_code "const_int")
|
||||||
|
- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)")))
|
||||||
|
-
|
||||||
|
-(define_predicate "aq10w_operand"
|
||||||
|
- (and (match_code "const_int")
|
||||||
|
- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)")))
|
||||||
|
-
|
||||||
|
-(define_predicate "aq10d_operand"
|
||||||
|
- (and (match_code "const_int")
|
||||||
|
- (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)")))
|
||||||
|
-
|
||||||
|
(define_predicate "aq12b_operand"
|
||||||
|
(and (match_code "const_int")
|
||||||
|
(match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)")))
|
||||||
|
diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000..bfc208e4f
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
|
||||||
|
@@ -0,0 +1,15 @@
|
||||||
|
+/* { dg-do compile } */
|
||||||
|
+/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */
|
||||||
|
+/* { dg-final { scan-assembler-not "addi.d" } } */
|
||||||
|
+
|
||||||
|
+extern short a[1000];
|
||||||
|
+extern short b[1000];
|
||||||
|
+extern short c[1000];
|
||||||
|
+
|
||||||
|
+void
|
||||||
|
+test (void)
|
||||||
|
+{
|
||||||
|
+ for (int i = 501; i < 517; i++)
|
||||||
|
+ ((int *)(c + 1))[i] = ((int *)(a + 1))[i] + ((int *)(b + 1))[i];
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
--
|
||||||
|
2.43.0
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user