!496 LoongArch: Sync from gcc upstream

From: @ticat-fp Reviewed-by: @li-yancheng Signed-off-by: @li-yancheng
2024-11-13 03:54:40 +00:00 · 2024-11-13 03:54:40 +00:00 · 41aeeefa83
commit 41aeeefa83
parent 663f592844 f653243538
192 changed files with 46369 additions and 4 deletions
--- a/0001-LoongArch-Reimplement-multilib-build-option-handling.patch
+++ b/0001-LoongArch-Reimplement-multilib-build-option-handling.patch
@ -0,0 +1,464 @@
 From d394a9ac68674b40e0d2b436c09e23dd29d8b5d0 Mon Sep 17 00:00:00 2001
 From: Yang Yujie <yangyujie@loongson.cn>
 Date: Wed, 13 Sep 2023 17:52:14 +0800
 Subject: [PATCH 001/188] LoongArch: Reimplement multilib build option
 handling.
 Library build options from --with-multilib-list used to be processed with
 *self_spec, which missed the driver's initial canonicalization.  This
 caused limitations on CFLAGS override and the use of driver-only options
 like -m[no]-lsx.
 The problem is solved by promoting the injection rules of --with-multilib-list
 options to the first element of DRIVER_SELF_SPECS, to make them execute before
 the canonialization.  The library-build options are also hard-coded in
 the driver and can be used conveniently by the builders of other non-gcc
 libraries via the use of -fmultiflags.
 Bootstrapped and tested on loongarch64-linux-gnu.
 ChangeLog:
 	* config-ml.in: Remove unneeded loongarch clause.
 	* configure.ac: Register custom makefile fragments mt-loongarch-*
 	for loongarch targets.
 	* configure: Regenerate.
 config/ChangeLog:
 	* mt-loongarch-mlib: New file.  Pass -fmultiflags when building
 	target libraries (FLAGS_FOR_TARGET).
 	* mt-loongarch-elf: New file.
 	* mt-loongarch-gnu: New file.
 gcc/ChangeLog:
 	* config.gcc: Pass the default ABI via TM_MULTILIB_CONFIG.
 	* config/loongarch/loongarch-driver.h: Invoke MLIB_SELF_SPECS
 	before the driver canonicalization routines.
 	* config/loongarch/loongarch.h: Move definitions of CC1_SPEC etc.
 	to loongarch-driver.h
 	* config/loongarch/t-linux: Move multilib-related definitions to
 	t-multilib.
 	* config/loongarch/t-multilib: New file.  Inject library build
 	options obtained from --with-multilib-list.
 	* config/loongarch/t-loongarch: Same.
 ---
 config-ml.in                            | 10 ----
 config/mt-loongarch-elf                 |  1 +
 config/mt-loongarch-gnu                 |  2 +
 config/mt-loongarch-mlib                |  1 +
 configure                               |  6 +++
 configure.ac                            |  6 +++
 gcc/config.gcc                          |  6 +--
 gcc/config/loongarch/loongarch-driver.h | 42 +++++++++++++++
 gcc/config/loongarch/loongarch.h        | 50 ------------------
 gcc/config/loongarch/t-linux            | 66 +++---------------------
 gcc/config/loongarch/t-loongarch        |  2 +-
 gcc/config/loongarch/t-multilib         | 68 +++++++++++++++++++++++++
 12 files changed, 137 insertions(+), 123 deletions(-)
 create mode 100644 config/mt-loongarch-elf
 create mode 100644 config/mt-loongarch-gnu
 create mode 100644 config/mt-loongarch-mlib
 create mode 100644 gcc/config/loongarch/t-multilib
 diff --git a/config-ml.in b/config-ml.in
 index ad0db0781..68854a4f1 100644
 --- a/config-ml.in
 +++ b/config-ml.in
@@ -301,16 +301,6 @@ arm-*-*)
 	  done
 	fi
 	;;
 -loongarch*-*)
 -	old_multidirs="${multidirs}"
 -	multidirs=""
 -	for x in ${old_multidirs}; do
 -	case "$x" in
 -	`${CC-gcc} --print-multi-directory`) : ;;
 -	*) multidirs="${multidirs} ${x}" ;;
 -	esac
 -	done
 -	;;
 m68*-*-*)
 	if [ x$enable_softfloat = xno ]
 	then
 diff --git a/config/mt-loongarch-elf b/config/mt-loongarch-elf
 new file mode 100644
 index 000000000..bbf29bb57
 --- /dev/null
 +++ b/config/mt-loongarch-elf
@@ -0,0 +1 @@
 +include $(srcdir)/config/mt-loongarch-mlib
 diff --git a/config/mt-loongarch-gnu b/config/mt-loongarch-gnu
 new file mode 100644
 index 000000000..dfefb44ed
 --- /dev/null
 +++ b/config/mt-loongarch-gnu
@@ -0,0 +1,2 @@
 +include $(srcdir)/config/mt-gnu
 +include $(srcdir)/config/mt-loongarch-mlib
 diff --git a/config/mt-loongarch-mlib b/config/mt-loongarch-mlib
 new file mode 100644
 index 000000000..4cfe568f1
 --- /dev/null
 +++ b/config/mt-loongarch-mlib
@@ -0,0 +1 @@
 +FLAGS_FOR_TARGET += -fmultiflags
 diff --git a/configure b/configure
 index aff62c464..81b4a3cec 100755
 --- a/configure
 +++ b/configure
@@ -9548,6 +9548,12 @@ case "${target}" in
   spu-*-*)
     target_makefile_frag="config/mt-spu"
     ;;
 +  loongarch*-*linux* | loongarch*-*gnu*)
 +    target_makefile_frag="config/mt-loongarch-gnu"
 +    ;;
 +  loongarch*-*elf*)
 +    target_makefile_frag="config/mt-loongarch-elf"
 +    ;;
   mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
     target_makefile_frag="config/mt-sde"
     ;;
 diff --git a/configure.ac b/configure.ac
 index f310d75ca..9f8dbd319 100644
 --- a/configure.ac
 +++ b/configure.ac
@@ -2729,6 +2729,12 @@ case "${target}" in
   spu-*-*)
     target_makefile_frag="config/mt-spu"
     ;;
 +  loongarch*-*linux* | loongarch*-*gnu*)
 +    target_makefile_frag="config/mt-loongarch-gnu"
 +    ;;
 +  loongarch*-*elf*)
 +    target_makefile_frag="config/mt-loongarch-elf"
 +    ;;
   mips*-sde-elf* | mips*-mti-elf* | mips*-img-elf*)
     target_makefile_frag="config/mt-sde"
     ;;
 diff --git a/gcc/config.gcc b/gcc/config.gcc
 index 3f870e966..e34a5fbb9 100644
 --- a/gcc/config.gcc
 +++ b/gcc/config.gcc
@@ -2510,7 +2510,7 @@ loongarch*-*-linux*)
 	tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
 	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
 	extra_options="${extra_options} linux-android.opt"
 -	tmake_file="${tmake_file} loongarch/t-linux"
 +	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
 	gnu_ld=yes
 	gas=yes
@@ -2522,7 +2522,7 @@ loongarch*-*-linux*)
 loongarch*-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file}"
 	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
 -	tmake_file="${tmake_file} loongarch/t-linux"
 +	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
 	gnu_ld=yes
 	gas=yes
@@ -5241,7 +5241,7 @@ case "${target}" in
 		loongarch_multilib_list_sane=no
 		# This one goes to TM_MULTILIB_CONFIG, for use in t-linux.
 -		loongarch_multilib_list_make=""
 +		loongarch_multilib_list_make="${abi_base},"
 		# This one goes to tm_defines, for use in loongarch-driver.c.
 		loongarch_multilib_list_c=""
 diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
 index 6cfe0efb5..e7d083677 100644
 --- a/gcc/config/loongarch/loongarch-driver.h
 +++ b/gcc/config/loongarch/loongarch-driver.h
@@ -23,6 +23,39 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-str.h"
 +#ifndef SUBTARGET_CPP_SPEC
 +#define SUBTARGET_CPP_SPEC ""
 +#endif
 +
 +#ifndef SUBTARGET_CC1_SPEC
 +#define SUBTARGET_CC1_SPEC ""
 +#endif
 +
 +#ifndef SUBTARGET_ASM_SPEC
 +#define SUBTARGET_ASM_SPEC ""
 +#endif
 +
 +#define EXTRA_SPECS \
 +  {"early_self_spec", ""}, \
 +  {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
 +  {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
 +  {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
 +
 +
 +#undef CPP_SPEC
 +#define CPP_SPEC \
 +  "%(subtarget_cpp_spec)"
 +
 +#undef CC1_SPEC
 +#define CC1_SPEC \
 +  "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
 +  "%(subtarget_cc1_spec)"
 +
 +#undef ASM_SPEC
 +#define ASM_SPEC \
 +  "%{mabi=*} %(subtarget_asm_spec)"
 +
 +
 extern const char*
 la_driver_init (int argc, const char **argv);
@@ -45,7 +78,16 @@ driver_get_normalized_m_opts (int argc, const char **argv);
 #define LA_SET_PARM_SPEC(NAME) \
   " %{m" OPTSTR_##NAME  "=*: %:set_m_parm(" OPTSTR_##NAME " %*)}" \
 +/* For MLIB_SELF_SPECS.  */
 +#include "loongarch-multilib.h"
 +
 +#ifndef MLIB_SELF_SPECS
 +#define MLIB_SELF_SPECS ""
 +#endif
 +
 #define DRIVER_HANDLE_MACHINE_OPTIONS \
 +  " %(early_self_spec)", \
 +  MLIB_SELF_SPECS \
   " %:driver_init()" \
   " %{c|S|E|nostdlib: %:set_no_link()}" \
   " %{nostartfiles: %{nodefaultlibs: %:set_no_link()}}" \
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index c7e91a06d..a443a6427 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -64,56 +64,6 @@ along with GCC; see the file COPYING3.  If not see
 #define NM_FLAGS "-Bn"
 #endif
 -/* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
 -   overridden by subtargets.  */
 -
 -#ifndef SUBTARGET_ASM_SPEC
 -#define SUBTARGET_ASM_SPEC ""
 -#endif
 -
 -#undef ASM_SPEC
 -#define ASM_SPEC "%{mabi=*} %{subtarget_asm_spec}"
 -
 -/* Extra switches sometimes passed to the linker.  */
 -
 -#ifndef LINK_SPEC
 -#define LINK_SPEC ""
 -#endif /* LINK_SPEC defined  */
 -
 -/* Specs for the compiler proper.  */
 -
 -/* CC1_SPEC is the set of arguments to pass to the compiler proper.  */
 -
 -#undef CC1_SPEC
 -#define CC1_SPEC "%{,ada:-gnatea} %{m*} \
 -%{G*} \
 -%(subtarget_cc1_spec) %{,ada:-gnatez}"
 -
 -/* Preprocessor specs.  */
 -
 -/* SUBTARGET_CPP_SPEC is passed to the preprocessor.  It may be
 -   overridden by subtargets.  */
 -#ifndef SUBTARGET_CPP_SPEC
 -#define SUBTARGET_CPP_SPEC ""
 -#endif
 -
 -#define CPP_SPEC "%(subtarget_cpp_spec)"
 -
 -/* This macro defines names of additional specifications to put in the specs
 -   that can be used in various specifications like CC1_SPEC.  Its definition
 -   is an initializer with a subgrouping for each command option.
 -
 -   Each subgrouping contains a string constant, that defines the
 -   specification name, and a string constant that used by the GCC driver
 -   program.
 -
 -   Do not define this macro if it does not need to do anything.  */
 -
 -#define EXTRA_SPECS \
 -  {"subtarget_cc1_spec", SUBTARGET_CC1_SPEC}, \
 -  {"subtarget_cpp_spec", SUBTARGET_CPP_SPEC}, \
 -  {"subtarget_asm_spec", SUBTARGET_ASM_SPEC},
 -
 /* Registers may have a prefix which can be ignored when matching
    user asm and register definitions.  */
 #ifndef REGISTER_PREFIX
 diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
 index 62a870b66..7cd7cde25 100644
 --- a/gcc/config/loongarch/t-linux
 +++ b/gcc/config/loongarch/t-linux
@@ -16,68 +16,16 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 -# Multilib
 -MULTILIB_OPTIONS = mabi=lp64d/mabi=lp64f/mabi=lp64s
 -MULTILIB_DIRNAMES = base/lp64d base/lp64f base/lp64s
 -
 -# The GCC driver always gets all abi-related options on the command line.
 -# (see loongarch-driver.c:driver_get_normalized_m_opts)
 -comma=,
 -MULTILIB_REQUIRED = $(foreach mlib,$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
 -	$(firstword $(subst /, ,$(mlib))))
 -
 -SPECS = specs.install
 -
 -# temporary self_spec when building libraries (e.g. libgcc)
 -gen_mlib_spec = $(if $(word 2,$1),\
 -	%{$(firstword $1):$(patsubst %,-%,$(wordlist 2,$(words $1),$1))})
 -
 -# clean up the result of DRIVER_SELF_SPEC to avoid conflict
 -lib_build_self_spec  = %<march=* %<mtune=* %<mcmodel=* %<mfpu=* %<msimd=*
 -
 -# append user-specified build options from --with-multilib-list
 -lib_build_self_spec += $(foreach mlib,\
 -	$(subst $(comma), ,$(TM_MULTILIB_CONFIG)),\
 -	$(call gen_mlib_spec,$(subst /, ,$(mlib))))
 -
 -specs: specs.install
 -	sed '/^*self_spec:$$/{ n;s/^$$/$(lib_build_self_spec)/g; }' $< > $@
 -
 -# Do some preparation before regression tests:
 -# remove lib-build-specs / make symlinks for the toplevel multilib variant
 -
 -LA_DEFAULT_MULTISUBDIR = $(shell $(GCC_FOR_TARGET) --print-multi-dir)
 -.PHONY: remove-lib-specs
 -check check-host check-target $(CHECK_TARGETS) $(lang_checks): remove-lib-specs
 -remove-lib-specs:
 -	-mv -f specs.install specs 2>/dev/null
 -	-mv $(LA_DEFAULT_MULTISUBDIR)/* ./
 -	-mkdir -p ../$(target_noncanonical)/`dirname $(LA_DEFAULT_MULTISUBDIR)`
 -	-$(LN_S) .. ../$(target_noncanonical)/$(LA_DEFAULT_MULTISUBDIR)
 -
 -# Multiarch
 -ifneq ($(call if_multiarch,yes),yes)
 -    # Define LA_DISABLE_MULTIARCH if multiarch is disabled.
 -    tm_defines += LA_DISABLE_MULTIARCH
 -else
 -    # Only define MULTIARCH_DIRNAME when multiarch is enabled,
 -    # or it would always introduce ${target} into the search path.
 -    MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET)
 -endif
 +MULTIOSDIR_lp64d := ../lib64$(call if_multiarch,:loongarch64-linux-gnu)
 +MULTIOSDIR_lp64f := ../lib64/f32$(call if_multiarch,:loongarch64-linux-gnuf32)
 +MULTIOSDIR_lp64s := ../lib64/sf$(call if_multiarch,:loongarch64-linux-gnusf)
 # Don't define MULTILIB_OSDIRNAMES if multilib is disabled.
 ifeq ($(filter LA_DISABLE_MULTILIB,$(tm_defines)),)
 -    MULTILIB_OSDIRNAMES = \
 -      mabi.lp64d=../lib64$\
 -      $(call if_multiarch,:loongarch64-linux-gnu)
 -
 -    MULTILIB_OSDIRNAMES += \
 -      mabi.lp64f=../lib64/f32$\
 -      $(call if_multiarch,:loongarch64-linux-gnuf32)
 -
 -    MULTILIB_OSDIRNAMES += \
 -      mabi.lp64s=../lib64/sf$\
 -      $(call if_multiarch,:loongarch64-linux-gnusf)
 +    MULTILIB_OSDIRNAMES = .=$(MULTIOSDIR_$(mlib_default))
 +    MULTILIB_OSDIRNAMES += mabi.lp64d=$(MULTIOSDIR_lp64d)
 +    MULTILIB_OSDIRNAMES += mabi.lp64f=$(MULTIOSDIR_lp64f)
 +    MULTILIB_OSDIRNAMES += mabi.lp64s=$(MULTIOSDIR_lp64s)
 endif
 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
 index e73f4f437..28cfb49df 100644
 --- a/gcc/config/loongarch/t-loongarch
 +++ b/gcc/config/loongarch/t-loongarch
@@ -16,7 +16,7 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 -TM_H += $(srcdir)/config/loongarch/loongarch-driver.h
 +TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h
 OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
 		   $(srcdir)/config/loongarch/loongarch-tune.h
 diff --git a/gcc/config/loongarch/t-multilib b/gcc/config/loongarch/t-multilib
 new file mode 100644
 index 000000000..bf6c18298
 --- /dev/null
 +++ b/gcc/config/loongarch/t-multilib
@@ -0,0 +1,68 @@
 +# Copyright (C) 2023 Free Software Foundation, Inc.
 +#
 +# This file is part of GCC.
 +#
 +# GCC is free software; you can redistribute it and/or modify
 +# it under the terms of the GNU General Public License as published by
 +# the Free Software Foundation; either version 3, or (at your option)
 +# any later version.
 +#
 +# GCC is distributed in the hope that it will be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with GCC; see the file COPYING3.  If not see
 +# <http://www.gnu.org/licenses/>.
 +
 +# Helper definitions
 +comma=,
 +null  :=
 +space := $(null) #
 +exclude_1st = $(wordlist 2,$(words $1),$1)
 +
 +# Common definitions
 +mlib_all := lp64d lp64f lp64s
 +$(foreach i,$(mlib_all),$(eval MULTISUBDIR_$i := base/$i))
 +
 +mlib_default := $(firstword $(subst $(comma), ,$(TM_MULTILIB_CONFIG)))
 +mlib_all := $(filter-out $(mlib_default),$(mlib_all))
 +
 +MULTILIB_OPTIONS := $(subst $(space),/,$(foreach i,$(mlib_all),mabi=$(i)))
 +MULTILIB_DIRNAMES := $(foreach i,$(mlib_all),$(MULTISUBDIR_$(i)))
 +
 +# Customize builds with --with-multilib-list
 +MULTILIB_REQUIRED := $(foreach i,$(call exclude_1st,\
 +	$(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\
 +	$(firstword $(subst /, ,$(i))))
 +
 +## spec rules for building libraries, triggered by -fmultiflags
 +gen_mlib_spec = $(if $(word 2,$1),\
 +	%{$(firstword $1):$(patsubst %,-%,$(call exclude_1st,$1)}))
 +
 +lib_build_spec = $(foreach mlib,\
 +	$(call exclude_1st,$(subst $(comma), ,$(TM_MULTILIB_CONFIG))),\
 +	$(call gen_mlib_spec,$(subst /, ,$(mlib))))
 +
 +default_mlib_spec := %{fmultiflags:%{!mabi=*:-mabi=$(mlib_default)}}
 +lib_build_spec    := %{fmultiflags:$(lib_build_spec)}
 +
 +ifneq ($(TM_MULTILIB_CONFIG),)
 +loongarch-multilib.h:
 +	@echo "#define MLIB_SELF_SPECS" \
 +	      "\"$(default_mlib_spec)\"," \
 +	      "\"$(lib_build_spec)\"," > $@
 +else
 +loongarch-multilib.h: ; @touch $@
 +endif
 +
 +# Multiarch
 +ifneq ($(call if_multiarch,yes),yes)
 +    # Define LA_DISABLE_MULTIARCH if multiarch is disabled.
 +    tm_defines += LA_DISABLE_MULTIARCH
 +else
 +    # Only define MULTIARCH_DIRNAME when multiarch is enabled,
 +    # or it would always introduce ${target} into the search path.
 +    MULTIARCH_DIRNAME = $(LA_MULTIARCH_TRIPLET)
 +endif
 -- 
 2.43.0
--- a/0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
+++ b/0002-LoongArch-Check-whether-binutils-supports-the-relax-.patch
@ -0,0 +1,192 @@
 From 13c33536900709bf1f33171d5ae2b2af97789601 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Fri, 15 Sep 2023 10:22:49 +0800
 Subject: [PATCH 002/188] LoongArch: Check whether binutils supports the relax
 function. If supported, explicit relocs are turned off by default.
 gcc/ChangeLog:
 	* config.in: Regenerate.
 	* config/loongarch/genopts/loongarch.opt.in: Add compilation option
 	mrelax. And set the initial value of explicit-relocs according to the
 	detection status.
 	* config/loongarch/gnu-user.h: When compiling with -mno-relax, pass the
 	--no-relax option to the linker.
 	* config/loongarch/loongarch-driver.h (ASM_SPEC): When compiling with
 	-mno-relax, pass the -mno-relax option to the assembler.
 	* config/loongarch/loongarch-opts.h (HAVE_AS_MRELAX_OPTION): Define macro.
 	* config/loongarch/loongarch.opt: Regenerate.
 	* configure: Regenerate.
 	* configure.ac: Add detection of support for binutils relax function.
 ---
 gcc/config.in                                 |  6 ++++
 gcc/config/loongarch/genopts/loongarch.opt.in |  7 ++++-
 gcc/config/loongarch/gnu-user.h               |  3 +-
 gcc/config/loongarch/loongarch-driver.h       |  2 +-
 gcc/config/loongarch/loongarch-opts.h         |  4 +++
 gcc/config/loongarch/loongarch.opt            |  7 ++++-
 gcc/configure                                 | 31 +++++++++++++++++++
 gcc/configure.ac                              |  4 +++
 8 files changed, 60 insertions(+), 4 deletions(-)
 diff --git a/gcc/config.in b/gcc/config.in
 index 0dff36199..0c55e67e7 100644
 --- a/gcc/config.in
 +++ b/gcc/config.in
@@ -637,6 +637,12 @@
 #endif
 +/* Define if your assembler supports -mrelax option. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_AS_MRELAX_OPTION
 +#endif
 +
 +
 /* Define if your assembler supports .mspabi_attribute. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_MSPABI_ATTRIBUTE
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index 2ef1b1e3b..f18733c24 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -181,7 +181,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 mexplicit-relocs
 -Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
 +Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
 Use %reloc() assembly operators.
 ; The code model option names for -mcmodel.
@@ -214,3 +214,8 @@ Specify the code model.
 mdirect-extern-access
 Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
 +
 +mrelax
 +Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 +Take advantage of linker relaxations to reduce the number of instructions
 +required to materialize symbol addresses.
 diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
 index 44e4f2575..60ef75601 100644
 --- a/gcc/config/loongarch/gnu-user.h
 +++ b/gcc/config/loongarch/gnu-user.h
@@ -48,7 +48,8 @@ along with GCC; see the file COPYING3.  If not see
   "%{!shared: %{static} " \
   "%{!static: %{!static-pie: %{rdynamic:-export-dynamic} " \
   "-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} " \
 -  "%{static-pie: -static -pie --no-dynamic-linker -z text}}"
 +  "%{static-pie: -static -pie --no-dynamic-linker -z text}}" \
 +  "%{mno-relax: --no-relax}"
 /* Similar to standard Linux, but adding -ffast-math support.  */
 diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
 index e7d083677..59fa3263d 100644
 --- a/gcc/config/loongarch/loongarch-driver.h
 +++ b/gcc/config/loongarch/loongarch-driver.h
@@ -53,7 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef ASM_SPEC
 #define ASM_SPEC \
 -  "%{mabi=*} %(subtarget_asm_spec)"
 +  "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
 extern const char*
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 624e246bb..f2b59abe6 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -99,4 +99,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 #define HAVE_AS_EXPLICIT_RELOCS 0
 #endif
 +#ifndef HAVE_AS_MRELAX_OPTION
 +#define HAVE_AS_MRELAX_OPTION 0
 +#endif
 +
 #endif /* LOONGARCH_OPTS_H */
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index f2d21c9f3..78f2baf3a 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -188,7 +188,7 @@ Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 mexplicit-relocs
 -Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS)
 +Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
 Use %reloc() assembly operators.
 ; The code model option names for -mcmodel.
@@ -221,3 +221,8 @@ Specify the code model.
 mdirect-extern-access
 Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
 +
 +mrelax
 +Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 +Take advantage of linker relaxations to reduce the number of instructions
 +required to materialize symbol addresses.
 diff --git a/gcc/configure b/gcc/configure
 index 2a5d3aaf3..8ae8a924a 100755
 --- a/gcc/configure
 +++ b/gcc/configure
@@ -28830,6 +28830,37 @@ if test $gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support = yes; then
 $as_echo "#define HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT 1" >>confdefs.h
 +fi
 +
 +    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mrelax option" >&5
 +$as_echo_n "checking assembler for -mrelax option... " >&6; }
 +if ${gcc_cv_as_loongarch_relax+:} false; then :
 +  $as_echo_n "(cached) " >&6
 +else
 +  gcc_cv_as_loongarch_relax=no
 +  if test x$gcc_cv_as != x; then
 +    $as_echo '.text' > conftest.s
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mrelax -o conftest.o conftest.s >&5'
 +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
 +  (eval $ac_try) 2>&5
 +  ac_status=$?
 +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
 +  test $ac_status = 0; }; }
 +    then
 +	gcc_cv_as_loongarch_relax=yes
 +    else
 +      echo "configure: failed program was" >&5
 +      cat conftest.s >&5
 +    fi
 +    rm -f conftest.o conftest.s
 +  fi
 +fi
 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_relax" >&5
 +$as_echo "$gcc_cv_as_loongarch_relax" >&6; }
 +if test $gcc_cv_as_loongarch_relax = yes; then
 +
 +$as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
 +
 fi
     ;;
 diff --git a/gcc/configure.ac b/gcc/configure.ac
 index ba2bf1ffc..f7161e66e 100644
 --- a/gcc/configure.ac
 +++ b/gcc/configure.ac
@@ -5322,6 +5322,10 @@ x:
        .cfi_endproc],,
       [AC_DEFINE(HAVE_AS_EH_FRAME_PCREL_ENCODING_SUPPORT, 1,
 	  [Define if your assembler supports eh_frame pcrel encoding.])])
 +    gcc_GAS_CHECK_FEATURE([-mrelax option], gcc_cv_as_loongarch_relax,
 +      [-mrelax], [.text],,
 +      [AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
 +		[Define if your assembler supports -mrelax option.])])
     ;;
     s390*-*-*)
     gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
 -- 
 2.43.0
--- a/0003-Modify-gas-uleb128-support-test.patch
+++ b/0003-Modify-gas-uleb128-support-test.patch
@ -0,0 +1,115 @@
 From 38c338555e64da83fd35c608a1a89d738e1ca356 Mon Sep 17 00:00:00 2001
 From: mengqinggang <mengqinggang@loongson.cn>
 Date: Fri, 15 Sep 2023 12:04:04 +0800
 Subject: [PATCH 003/188] Modify gas uleb128 support test
 Some assemblers (GNU as for LoongArch) generates relocations for leb128
 symbol arithmetic for relaxation, we need to disable relaxation probing
 leb128 support then.
 gcc/ChangeLog:
 	* configure: Regenerate.
 	* configure.ac: Checking assembler for -mno-relax support.
 	Disable relaxation when probing leb128 support.
 co-authored-by: Xi Ruoyao <xry111@xry111.site>
 ---
 gcc/configure    | 42 +++++++++++++++++++++++++++++++++++++++++-
 gcc/configure.ac | 17 ++++++++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)
 diff --git a/gcc/configure b/gcc/configure
 index 8ae8a924a..430d44dc3 100755
 --- a/gcc/configure
 +++ b/gcc/configure
@@ -24441,6 +24441,46 @@ _ACEOF
 +# Some assemblers (GNU as for LoongArch) generates relocations for
 +# leb128 symbol arithmetic for relaxation, we need to disable relaxation
 +# probing leb128 support then.
 +case $target in
 +  loongarch*-*-*)
 +    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for -mno-relax support" >&5
 +$as_echo_n "checking assembler for -mno-relax support... " >&6; }
 +if ${gcc_cv_as_mno_relax+:} false; then :
 +  $as_echo_n "(cached) " >&6
 +else
 +  gcc_cv_as_mno_relax=no
 +  if test x$gcc_cv_as != x; then
 +    $as_echo '.text' > conftest.s
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags -mno-relax -o conftest.o conftest.s >&5'
 +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
 +  (eval $ac_try) 2>&5
 +  ac_status=$?
 +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
 +  test $ac_status = 0; }; }
 +    then
 +	gcc_cv_as_mno_relax=yes
 +    else
 +      echo "configure: failed program was" >&5
 +      cat conftest.s >&5
 +    fi
 +    rm -f conftest.o conftest.s
 +  fi
 +fi
 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_mno_relax" >&5
 +$as_echo "$gcc_cv_as_mno_relax" >&6; }
 +if test $gcc_cv_as_mno_relax = yes; then
 +  check_leb128_asflags=-mno-relax
 +fi
 +
 +    ;;
 +  *)
 +    check_leb128_asflags=
 +    ;;
 +esac
 +
 # Check if we have .[us]leb128, and support symbol arithmetic with it.
 # Older versions of GAS and some non-GNU assemblers, have a bugs handling
 # these directives, even when they appear to accept them.
@@ -24459,7 +24499,7 @@ L1:
 L2:
 	.uleb128 0x8000000000000000
 ' > conftest.s
 -    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags $check_leb128_asflags -o conftest.o conftest.s >&5'
   { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
   (eval $ac_try) 2>&5
   ac_status=$?
 diff --git a/gcc/configure.ac b/gcc/configure.ac
 index f7161e66e..4b24db190 100644
 --- a/gcc/configure.ac
 +++ b/gcc/configure.ac
@@ -3185,10 +3185,25 @@ AC_MSG_RESULT($gcc_cv_ld_ro_rw_mix)
 gcc_AC_INITFINI_ARRAY
 +# Some assemblers (GNU as for LoongArch) generates relocations for
 +# leb128 symbol arithmetic for relaxation, we need to disable relaxation
 +# probing leb128 support then.
 +case $target in
 +  loongarch*-*-*)
 +    gcc_GAS_CHECK_FEATURE([-mno-relax support],
 +      gcc_cv_as_mno_relax,[-mno-relax],[.text],,
 +      [check_leb128_asflags=-mno-relax])
 +    ;;
 +  *)
 +    check_leb128_asflags=
 +    ;;
 +esac
 +
 # Check if we have .[us]leb128, and support symbol arithmetic with it.
 # Older versions of GAS and some non-GNU assemblers, have a bugs handling
 # these directives, even when they appear to accept them.
 -gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128,,
 +gcc_GAS_CHECK_FEATURE([.sleb128 and .uleb128], gcc_cv_as_leb128,
 +[$check_leb128_asflags],
 [	.data
 	.uleb128 L2 - L1
 L1:
 -- 
 2.43.0
--- a/0004-LoongArch-Optimizations-of-vector-construction.patch
+++ b/0004-LoongArch-Optimizations-of-vector-construction.patch
--- a/0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch
+++ b/0005-LoongArch-Replace-UNSPEC_FCOPYSIGN-with-copysign-RTL.patch
@ -0,0 +1,51 @@
 From 9b2cbf361e38ea1ad672c2b8c8cf1dda4f6f7d72 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 2 Oct 2023 18:51:00 +0800
 Subject: [PATCH 005/188] LoongArch: Replace UNSPEC_FCOPYSIGN with copysign RTL
 When I added copysign support for LoongArch (r13-3702), we did not have
 a copysign RTL insn, so I had to use UNSPEC to represent the copysign
 instruction. Now the copysign RTX code has been added in r14-1586, so
 this patch removes those UNSPECs, and it uses the native RTL copysign
 insn.
 Inspired by rs6000 patch "Cleanup: Replace UNSPEC_COPYSIGN with copysign
 RTL" [1] from Michael Meissner.
 [1]: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631701.html
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): Delete.
 	(copysign<mode>3): Use copysign RTL instead of UNSPEC.
 ---
 gcc/config/loongarch/loongarch.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 63ff32e75..73e2cbe0b 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -37,7 +37,6 @@
   UNSPEC_FCLASS
   UNSPEC_FMAX
   UNSPEC_FMIN
 -  UNSPEC_FCOPYSIGN
   UNSPEC_FTINT
   UNSPEC_FTINTRM
   UNSPEC_FTINTRP
@@ -1129,9 +1128,8 @@
 (define_insn "copysign<mode>3"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 -	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
 -		      (match_operand:ANYF 2 "register_operand" "f")]
 -		     UNSPEC_FCOPYSIGN))]
 +	(copysign:ANYF (match_operand:ANYF 1 "register_operand" "f")
 +		       (match_operand:ANYF 2 "register_operand" "f")))]
   "TARGET_HARD_FLOAT"
   "fcopysign.<fmt>\t%0,%1,%2"
   [(set_attr "type" "fcopysign")
 -- 
 2.43.0
--- a/0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch
+++ b/0006-LoongArch-Adjust-makefile-dependency-for-loongarch-h.patch
@ -0,0 +1,71 @@
 From 746109cb61d6f3db4c25a9a107f30996c17f11db Mon Sep 17 00:00:00 2001
 From: Yang Yujie <yangyujie@loongson.cn>
 Date: Wed, 11 Oct 2023 17:59:53 +0800
 Subject: [PATCH 006/188] LoongArch: Adjust makefile dependency for loongarch
 headers.
 gcc/ChangeLog:
 	* config.gcc: Add loongarch-driver.h to tm_files.
 	* config/loongarch/loongarch.h: Do not include loongarch-driver.h.
 	* config/loongarch/t-loongarch: Append loongarch-multilib.h to $(GTM_H)
 	instead of $(TM_H) for building generator programs.
 ---
 gcc/config.gcc                   | 4 ++--
 gcc/config/loongarch/loongarch.h | 3 ---
 gcc/config/loongarch/t-loongarch | 3 ++-
 3 files changed, 4 insertions(+), 6 deletions(-)
 diff --git a/gcc/config.gcc b/gcc/config.gcc
 index e34a5fbb9..11ab620d0 100644
 --- a/gcc/config.gcc
 +++ b/gcc/config.gcc
@@ -2508,7 +2508,7 @@ riscv*-*-freebsd*)
 loongarch*-*-linux*)
 	tm_file="elfos.h gnu-user.h linux.h linux-android.h glibc-stdint.h ${tm_file}"
 -	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h"
 +	tm_file="${tm_file} loongarch/gnu-user.h loongarch/linux.h loongarch/loongarch-driver.h"
 	extra_options="${extra_options} linux-android.opt"
 	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
 	gnu_ld=yes
@@ -2521,7 +2521,7 @@ loongarch*-*-linux*)
 loongarch*-*-elf*)
 	tm_file="elfos.h newlib-stdint.h ${tm_file}"
 -	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h"
 +	tm_file="${tm_file} loongarch/elf.h loongarch/linux.h loongarch/loongarch-driver.h"
 	tmake_file="${tmake_file} loongarch/t-multilib loongarch/t-linux"
 	gnu_ld=yes
 	gas=yes
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index a443a6427..a2dc4ba8c 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -49,9 +49,6 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_LIBGCC_SDATA_SECTION ".sdata"
 -/* Driver native functions for SPEC processing in the GCC driver.  */
 -#include "loongarch-driver.h"
 -
 /* This definition replaces the formerly used 'm' constraint with a
    different constraint letter in order to avoid changing semantics of
    the 'm' constraint when accepting new address formats in
 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
 index 28cfb49df..12734c37b 100644
 --- a/gcc/config/loongarch/t-loongarch
 +++ b/gcc/config/loongarch/t-loongarch
@@ -16,7 +16,8 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 -TM_H += loongarch-multilib.h $(srcdir)/config/loongarch/loongarch-driver.h
 +
 +GTM_H += loongarch-multilib.h
 OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
 		   $(srcdir)/config/loongarch/loongarch-tune.h
 -- 
 2.43.0
--- a/0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
+++ b/0007-LoongArch-Enable-vect.exp-for-LoongArch.-PR111424.patch
@ -0,0 +1,65 @@
 From b75f00086e863ac7e9e1ee37f8107b199cf62550 Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Fri, 25 Oct 2024 00:58:01 +0000
 Subject: [PATCH 007/188] LoongArch: Enable vect.exp for LoongArch. [PR111424]
 gcc/testsuite/ChangeLog:
        PR target/111424
        * lib/target-supports.exp: Enable vect.exp for LoongArch.
 ---
 gcc/testsuite/lib/target-supports.exp | 31 +++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
 index 192e0aded..bbe145c1c 100644
 --- a/gcc/testsuite/lib/target-supports.exp
 +++ b/gcc/testsuite/lib/target-supports.exp
@@ -10535,6 +10535,13 @@ proc check_vect_support_and_set_flags { } {
         }
     } elseif [istarget amdgcn-*-*] {
         set dg-do-what-default run
 +    } elseif [istarget loongarch*-*-*] {
 +      lappend DEFAULT_VECTCFLAGS "-mdouble-float" "-mlasx"
 +      if [check_effective_target_loongarch_asx_hw] {
 +     set dg-do-what-default run
 +      } else {
 +     set dg-do-what-default compile
 +      }
     } else {
         return 0
     }
@@ -10542,6 +10549,30 @@ proc check_vect_support_and_set_flags { } {
     return 1
 }
 +proc check_effective_target_loongarch_sx_hw { } {
 +    return [check_runtime loongarch_sx_hw {
 +   #include <lsxintrin.h>
 +   int main (void)
 +   {
 +     __m128i a, b, c;
 +     c = __lsx_vand_v (a, b);
 +     return 0;
 +   }
 +    } "-mlsx"]
 +}
 +
 +proc check_effective_target_loongarch_asx_hw { } {
 +    return [check_runtime loongarch_asx_hw {
 +   #include <lasxintrin.h>
 +   int main (void)
 +   {
 +     __m256i a, b, c;
 +     c = __lasx_xvand_v (a, b);
 +     return 0;
 +   }
 +    } "-mlasx"]
 +}
 +
 # Return 1 if the target does *not* require strict alignment.
 proc check_effective_target_non_strict_align {} {
 -- 
 2.43.0
--- a/0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch
+++ b/0008-LoongArch-Delete-macro-definition-ASM_OUTPUT_ALIGN_W.patch
@ -0,0 +1,48 @@
 From 3829ad1963a92526201b42233d2bb4facf7ba8d4 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Fri, 15 Sep 2023 11:56:01 +0800
 Subject: [PATCH 008/188] LoongArch: Delete macro definition
 ASM_OUTPUT_ALIGN_WITH_NOP.
 There are two reasons for removing this macro definition:
 1. The default in the assembler is to use the nop instruction for filling.
 2. For assembly directives: .align [abs-expr[, abs-expr[, abs-expr]]]
   The third expression it is the maximum number of bytes that should be
   skipped by this alignment directive.
   Therefore, it will affect the display of the specified alignment rules
   and affect the operating efficiency.
 This modification relies on binutils commit 1fb3cdd87ec61715a5684925fb6d6a6cf53bb97c.
 (Since the assembler will add nop based on the .align information when doing relax,
 it will cause the conditional branch to go out of bounds during the assembly process.
 This submission of binutils solves this problem.)
 gcc/ChangeLog:
 	* config/loongarch/loongarch.h (ASM_OUTPUT_ALIGN_WITH_NOP):
 	Delete.
 Co-authored-by: Chenghua Xu <xuchenghua@loongson.cn>
 ---
 gcc/config/loongarch/loongarch.h | 5 -----
 1 file changed, 5 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index a2dc4ba8c..572b538be 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -1058,11 +1058,6 @@ typedef struct {
 #define ASM_OUTPUT_ALIGN(STREAM, LOG) fprintf (STREAM, "\t.align\t%d\n", (LOG))
 -/* "nop" instruction 54525952 (andi $r0,$r0,0) is
 -   used for padding.  */
 -#define ASM_OUTPUT_ALIGN_WITH_NOP(STREAM, LOG) \
 -  fprintf (STREAM, "\t.align\t%d,54525952,4\n", (LOG))
 -
 /* This is how to output an assembler line to advance the location
    counter by SIZE bytes.  */
 -- 
 2.43.0
--- a/0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
+++ b/0009-LoongArch-Fix-vec_initv32qiv16qi-template-to-avoid-I.patch
@ -0,0 +1,105 @@
 From aa947bf395b5722a23f2edd9d6302e220473d900 Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Wed, 11 Oct 2023 16:41:25 +0800
 Subject: [PATCH 009/188] LoongArch: Fix vec_initv32qiv16qi template to avoid
 ICE.
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 Following test code triggers unrecognized insn ICE on LoongArch target
 with "-O3 -mlasx":
 void
 foo (unsigned char *dst, unsigned char *src)
 {
  for (int y = 0; y < 16; y++)
    {
      for (int x = 0; x < 16; x++)
        dst[x] = src[x] + 1;
      dst += 32;
      src += 32;
    }
 }
 ICE info:
 ./test.c: In function ‘foo’:
 ./test.c:8:1: error: unrecognizable insn:
    8 | }
      | ^
 (insn 15 14 16 4 (set (reg:V32QI 185 [ vect__24.7 ])
        (vec_concat:V32QI (reg:V16QI 186)
            (const_vector:V16QI [
                    (const_int 0 [0]) repeated x16
                ]))) "./test.c":4:19 -1
     (nil))
 during RTL pass: vregs
 ./test.c:8:1: internal compiler error: in extract_insn, at recog.cc:2791
 0x12028023b _fatal_insn(char const*, rtx_def const*, char const*, int, char const*)
        /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:108
 0x12028026f _fatal_insn_not_found(rtx_def const*, char const*, int, char const*)
        /home/panchenghui/upstream/gcc/gcc/rtl-error.cc:116
 0x120a03c5b extract_insn(rtx_insn*)
        /home/panchenghui/upstream/gcc/gcc/recog.cc:2791
 0x12067ff73 instantiate_virtual_regs_in_insn
        /home/panchenghui/upstream/gcc/gcc/function.cc:1610
 0x12067ff73 instantiate_virtual_regs
        /home/panchenghui/upstream/gcc/gcc/function.cc:1983
 0x12067ff73 execute
        /home/panchenghui/upstream/gcc/gcc/function.cc:2030
 This RTL is generated inside loongarch_expand_vector_group_init function (related
 to vec_initv32qiv16qi template). Original impl doesn't ensure all vec_concat arguments
 are register type. This patch adds force_reg() to the vec_concat argument generation.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_expand_vector_group_init):
 	fix impl related to vec_initv32qiv16qi template to avoid ICE.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc                  |  3 ++-
 .../loongarch/vector/lasx/lasx-vec-init-1.c        | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 760b12268..9a629a999 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -10188,7 +10188,8 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val)
 void
 loongarch_expand_vector_group_init (rtx target, rtx vals)
 {
 -  rtx ops[2] = { XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1) };
 +  rtx ops[2] = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)),
 +      force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) };
   emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0],
 						      ops[1])));
 }
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
 new file mode 100644
 index 000000000..28be32982
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-1.c
@@ -0,0 +1,14 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3" } */
 +
 +void
 +foo (unsigned char *dst, unsigned char *src)
 +{
 +  for (int y = 0; y < 16; y++)
 +    {
 +      for (int x = 0; x < 16; x++)
 +        dst[x] = src[x] + 1;
 +      dst += 32;
 +      src += 32;
 +    }
 +}
 -- 
 2.43.0
--- a/0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch
+++ b/0010-LoongArch-Use-fcmp.caf.s-instead-of-movgr2cf-for-zer.patch
@ -0,0 +1,35 @@
 From 35bce671a97b27a41c425109ba92b24ab87ff35b Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 17 Oct 2023 21:55:05 +0800
 Subject: [PATCH 010/188] LoongArch: Use fcmp.caf.s instead of movgr2cf for
 zeroing a fcc
 During the review of an LLVM change [1], on LA464 we found that zeroing
 an fcc with fcmp.caf.s is much faster than a movgr2cf from $r0.
 [1]: https://github.com/llvm/llvm-project/pull/69300
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (movfcc): Use fcmp.caf.s for
 	zeroing a fcc.
 ---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 73e2cbe0b..5f9e63d66 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2150,7 +2150,7 @@
   [(set (match_operand:FCC 0 "register_operand" "=z")
 	(const_int 0))]
   ""
 -  "movgr2cf\t%0,$r0")
 +  "fcmp.caf.s\t%0,$f0,$f0")
 ;; Conditional move instructions.
 -- 
 2.43.0
--- a/0011-LoongArch-Implement-avg-and-sad-standard-names.patch
+++ b/0011-LoongArch-Implement-avg-and-sad-standard-names.patch
@ -0,0 +1,389 @@
 From 159dd069968fae895f1f663ebda6f53970ec34b1 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 18 Oct 2023 17:36:12 +0800
 Subject: [PATCH 011/188] LoongArch:Implement avg and sad standard names.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md
 	(avg<mode>3_ceil): New patterns.
 	(uavg<mode>3_ceil): Ditto.
 	(avg<mode>3_floor): Ditto.
 	(uavg<mode>3_floor): Ditto.
 	(usadv32qi): Ditto.
 	(ssadv32qi): Ditto.
 	* config/loongarch/lsx.md
 	(avg<mode>3_ceil): New patterns.
 	(uavg<mode>3_ceil): Ditto.
 	(avg<mode>3_floor): Ditto.
 	(uavg<mode>3_floor): Ditto.
 	(usadv16qi): Ditto.
 	(ssadv16qi): Ditto.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/avg-ceil-lasx.c: New test.
 	* gcc.target/loongarch/avg-ceil-lsx.c: New test.
 	* gcc.target/loongarch/avg-floor-lasx.c: New test.
 	* gcc.target/loongarch/avg-floor-lsx.c: New test.
 	* gcc.target/loongarch/sad-lasx.c: New test.
 	* gcc.target/loongarch/sad-lsx.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 78 +++++++++++++++++++
 gcc/config/loongarch/lsx.md                   | 78 +++++++++++++++++++
 .../gcc.target/loongarch/avg-ceil-lasx.c      | 22 ++++++
 .../gcc.target/loongarch/avg-ceil-lsx.c       | 22 ++++++
 .../gcc.target/loongarch/avg-floor-lasx.c     | 22 ++++++
 .../gcc.target/loongarch/avg-floor-lsx.c      | 22 ++++++
 gcc/testsuite/gcc.target/loongarch/sad-lasx.c | 20 +++++
 gcc/testsuite/gcc.target/loongarch/sad-lsx.c  | 20 +++++
 8 files changed, 284 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lasx.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/sad-lsx.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 2bc5d47ed..c7496d68a 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -5171,3 +5171,81 @@
 					      const0_rtx));
   DONE;
 })
 +
 +(define_expand "avg<mode>3_ceil"
 +  [(match_operand:ILASX_WHB 0 "register_operand")
 +   (match_operand:ILASX_WHB 1 "register_operand")
 +   (match_operand:ILASX_WHB 2 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  emit_insn (gen_lasx_xvavgr_s_<lasxfmt> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "uavg<mode>3_ceil"
 +  [(match_operand:ILASX_WHB 0 "register_operand")
 +   (match_operand:ILASX_WHB 1 "register_operand")
 +   (match_operand:ILASX_WHB 2 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  emit_insn (gen_lasx_xvavgr_u_<lasxfmt_u> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "avg<mode>3_floor"
 +  [(match_operand:ILASX_WHB 0 "register_operand")
 +   (match_operand:ILASX_WHB 1 "register_operand")
 +   (match_operand:ILASX_WHB 2 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  emit_insn (gen_lasx_xvavg_s_<lasxfmt> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "uavg<mode>3_floor"
 +  [(match_operand:ILASX_WHB 0 "register_operand")
 +   (match_operand:ILASX_WHB 1 "register_operand")
 +   (match_operand:ILASX_WHB 2 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  emit_insn (gen_lasx_xvavg_u_<lasxfmt_u> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "usadv32qi"
 +  [(match_operand:V8SI 0 "register_operand")
 +   (match_operand:V32QI 1 "register_operand")
 +   (match_operand:V32QI 2 "register_operand")
 +   (match_operand:V8SI 3 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  rtx t1 = gen_reg_rtx (V32QImode);
 +  rtx t2 = gen_reg_rtx (V16HImode);
 +  rtx t3 = gen_reg_rtx (V8SImode);
 +  emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
 +  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
 +  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
 +  DONE;
 +})
 +
 +(define_expand "ssadv32qi"
 +  [(match_operand:V8SI 0 "register_operand")
 +   (match_operand:V32QI 1 "register_operand")
 +   (match_operand:V32QI 2 "register_operand")
 +   (match_operand:V8SI 3 "register_operand")]
 +  "ISA_HAS_LASX"
 +{
 +  rtx t1 = gen_reg_rtx (V32QImode);
 +  rtx t2 = gen_reg_rtx (V16HImode);
 +  rtx t3 = gen_reg_rtx (V8SImode);
 +  emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
 +  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
 +  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
 +  DONE;
 +})
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 075f6ba56..b4e92ae9c 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -3581,6 +3581,84 @@
   DONE;
 })
 +(define_expand "avg<mode>3_ceil"
 +  [(match_operand:ILSX_WHB 0 "register_operand")
 +   (match_operand:ILSX_WHB 1 "register_operand")
 +   (match_operand:ILSX_WHB 2 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  emit_insn (gen_lsx_vavgr_s_<lsxfmt> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "uavg<mode>3_ceil"
 +  [(match_operand:ILSX_WHB 0 "register_operand")
 +   (match_operand:ILSX_WHB 1 "register_operand")
 +   (match_operand:ILSX_WHB 2 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  emit_insn (gen_lsx_vavgr_u_<lsxfmt_u> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "avg<mode>3_floor"
 +  [(match_operand:ILSX_WHB 0 "register_operand")
 +   (match_operand:ILSX_WHB 1 "register_operand")
 +   (match_operand:ILSX_WHB 2 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  emit_insn (gen_lsx_vavg_s_<lsxfmt> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "uavg<mode>3_floor"
 +  [(match_operand:ILSX_WHB 0 "register_operand")
 +   (match_operand:ILSX_WHB 1 "register_operand")
 +   (match_operand:ILSX_WHB 2 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  emit_insn (gen_lsx_vavg_u_<lsxfmt_u> (operands[0],
 +	operands[1], operands[2]));
 +  DONE;
 +})
 +
 +(define_expand "usadv16qi"
 +  [(match_operand:V4SI 0 "register_operand")
 +   (match_operand:V16QI 1 "register_operand")
 +   (match_operand:V16QI 2 "register_operand")
 +   (match_operand:V4SI 3 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  rtx t1 = gen_reg_rtx (V16QImode);
 +  rtx t2 = gen_reg_rtx (V8HImode);
 +  rtx t3 = gen_reg_rtx (V4SImode);
 +  emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
 +  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
 +  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
 +  DONE;
 +})
 +
 +(define_expand "ssadv16qi"
 +  [(match_operand:V4SI 0 "register_operand")
 +   (match_operand:V16QI 1 "register_operand")
 +   (match_operand:V16QI 2 "register_operand")
 +   (match_operand:V4SI 3 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  rtx t1 = gen_reg_rtx (V16QImode);
 +  rtx t2 = gen_reg_rtx (V8HImode);
 +  rtx t3 = gen_reg_rtx (V4SImode);
 +  emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
 +  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
 +  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
 +  DONE;
 +})
 +
 (define_insn "lsx_v<optab>wev_d_w<u>"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(addsubmul:V2DI
 diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
 new file mode 100644
 index 000000000..16db7bf72
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lasx.c
@@ -0,0 +1,22 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvavgr.b" } } */
 +/* { dg-final { scan-assembler "xvavgr.bu" } } */
 +/* { dg-final { scan-assembler "xvavgr.hu" } } */
 +/* { dg-final { scan-assembler "xvavgr.h" } } */
 +
 +#define N 1024
 +
 +#define TEST(TYPE, NAME)                                        \
 +  TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N];                   \
 +  void f_##NAME (void)                                          \
 +  {                                                             \
 +    int i;                                                      \
 +    for (i = 0; i < N; i++)                                     \
 +      a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1;       \
 +  }
 +
 +TEST(char, 1);
 +TEST(short, 2);
 +TEST(unsigned char, 3);
 +TEST(unsigned short, 4);
 diff --git a/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
 new file mode 100644
 index 000000000..94119c23b
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/avg-ceil-lsx.c
@@ -0,0 +1,22 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlsx" } */
 +/* { dg-final { scan-assembler "vavgr.b" } } */
 +/* { dg-final { scan-assembler "vavgr.bu" } } */
 +/* { dg-final { scan-assembler "vavgr.hu" } } */
 +/* { dg-final { scan-assembler "vavgr.h" } } */
 +
 +#define N 1024
 +
 +#define TEST(TYPE, NAME)                                        \
 +  TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N];                   \
 +  void f_##NAME (void)                                          \
 +  {                                                             \
 +    int i;                                                      \
 +    for (i = 0; i < N; i++)                                     \
 +      a_##NAME[i] = (b_##NAME[i] + c_##NAME[i] + 1) >> 1;       \
 +  }
 +
 +TEST(char, 1);
 +TEST(short, 2);
 +TEST(unsigned char, 3);
 +TEST(unsigned short, 4);
 diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
 new file mode 100644
 index 000000000..da6896531
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lasx.c
@@ -0,0 +1,22 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvavg.b" } } */
 +/* { dg-final { scan-assembler "xvavg.bu" } } */
 +/* { dg-final { scan-assembler "xvavg.hu" } } */
 +/* { dg-final { scan-assembler "xvavg.h" } } */
 +
 +#define N 1024
 +
 +#define TEST(TYPE, NAME)                                        \
 +  TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N];                   \
 +  void f_##NAME (void)                                          \
 +  {                                                             \
 +    int i;                                                      \
 +    for (i = 0; i < N; i++)                                     \
 +      a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1;           \
 +  }
 +
 +TEST(char, 1);
 +TEST(short, 2);
 +TEST(unsigned char, 3);
 +TEST(unsigned short, 4);
 diff --git a/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
 new file mode 100644
 index 000000000..bbb9db527
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/avg-floor-lsx.c
@@ -0,0 +1,22 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlsx" } */
 +/* { dg-final { scan-assembler "vavg.b" } } */
 +/* { dg-final { scan-assembler "vavg.bu" } } */
 +/* { dg-final { scan-assembler "vavg.hu" } } */
 +/* { dg-final { scan-assembler "vavg.h" } } */
 +
 +#define N 1024
 +
 +#define TEST(TYPE, NAME)                                        \
 +  TYPE a_##NAME[N], b_##NAME[N], c_##NAME[N];                   \
 +  void f_##NAME (void)                                          \
 +  {                                                             \
 +    int i;                                                      \
 +    for (i = 0; i < N; i++)                                     \
 +      a_##NAME[i] = (b_##NAME[i] + c_##NAME[i]) >> 1;           \
 +  }
 +
 +TEST(char, 1);
 +TEST(short, 2);
 +TEST(unsigned char, 3);
 +TEST(unsigned short, 4);
 diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lasx.c b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c
 new file mode 100644
 index 000000000..6c0cdfd97
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/sad-lasx.c
@@ -0,0 +1,20 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +
 +#define N 1024
 +
 +#define TEST(SIGN)                                             \
 +  SIGN char a_##SIGN[N], b_##SIGN[N];                          \
 +  int f_##SIGN (void)                                          \
 +  {                                                            \
 +    int i, sum = 0;                                            \
 +    for (i = 0; i < N; i++)                                    \
 +      sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);;       \
 +    return sum;                                                \
 +  }
 +
 +TEST(signed);
 +TEST(unsigned);
 +
 +/* { dg-final { scan-assembler {\txvabsd.bu\t} } } */
 +/* { dg-final { scan-assembler {\txvabsd.b\t} } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/sad-lsx.c b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c
 new file mode 100644
 index 000000000..b92110a8b
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/sad-lsx.c
@@ -0,0 +1,20 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlsx" } */
 +
 +#define N 1024
 +
 +#define TEST(SIGN)                                             \
 +  SIGN char a_##SIGN[N], b_##SIGN[N];                          \
 +  int f_##SIGN (void)                                          \
 +  {                                                            \
 +    int i, sum = 0;                                            \
 +    for (i = 0; i < N; i++)                                    \
 +      sum += __builtin_abs (a_##SIGN[i] - b_##SIGN[i]);;       \
 +    return sum;                                                \
 +  }
 +
 +TEST(signed);
 +TEST(unsigned);
 +
 +/* { dg-final { scan-assembler {\tvabsd.bu\t} } } */
 +/* { dg-final { scan-assembler {\tvabsd.b\t} } } */
 -- 
 2.43.0
--- a/0012-LoongArch-Implement-vec_widen-standard-names.patch
+++ b/0012-LoongArch-Implement-vec_widen-standard-names.patch
@ -0,0 +1,403 @@
 From 81e2e22979d9f9d170b1c30ec27e30e1f25aec35 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 18 Oct 2023 17:39:40 +0800
 Subject: [PATCH 012/188] LoongArch:Implement vec_widen standard names.
 Add support for vec_widen lo/hi patterns.  These do not directly
 match on Loongarch lasx instructions but can be emulated with
 even/odd + vector merge.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md
 	(vec_widen_<su>mult_even_v8si): New patterns.
 	(vec_widen_<su>add_hi_<mode>): Ditto.
 	(vec_widen_<su>add_lo_<mode>): Ditto.
 	(vec_widen_<su>sub_hi_<mode>): Ditto.
 	(vec_widen_<su>sub_lo_<mode>): Ditto.
 	(vec_widen_<su>mult_hi_<mode>): Ditto.
 	(vec_widen_<su>mult_lo_<mode>): Ditto.
 	* config/loongarch/loongarch.md (u_bool): New iterator.
 	* config/loongarch/loongarch-protos.h
 	(loongarch_expand_vec_widen_hilo): New prototype.
 	* config/loongarch/loongarch.cc
 	(loongarch_expand_vec_interleave): New function.
 	(loongarch_expand_vec_widen_hilo): New function.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-widen-add.c: New test.
 	* gcc.target/loongarch/vect-widen-mul.c: New test.
 	* gcc.target/loongarch/vect-widen-sub.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  |  82 ++++++++---
 gcc/config/loongarch/loongarch-protos.h       |   1 +
 gcc/config/loongarch/loongarch.cc             | 137 ++++++++++++++++++
 gcc/config/loongarch/loongarch.md             |   2 +
 .../gcc.target/loongarch/vect-widen-add.c     |  24 +++
 .../gcc.target/loongarch/vect-widen-mul.c     |  24 +++
 .../gcc.target/loongarch/vect-widen-sub.c     |  24 +++
 7 files changed, 277 insertions(+), 17 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index c7496d68a..442fda246 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -5048,23 +5048,71 @@
   [(set_attr "type" "simd_store")
    (set_attr "mode" "DI")])
 -(define_insn "vec_widen_<su>mult_even_v8si"
 -  [(set (match_operand:V4DI 0 "register_operand" "=f")
 -    (mult:V4DI
 -      (any_extend:V4DI
 -        (vec_select:V4SI
 -          (match_operand:V8SI 1 "register_operand" "%f")
 -          (parallel [(const_int 0) (const_int 2)
 -                         (const_int 4) (const_int 6)])))
 -      (any_extend:V4DI
 -        (vec_select:V4SI
 -          (match_operand:V8SI 2 "register_operand" "f")
 -          (parallel [(const_int 0) (const_int 2)
 -             (const_int 4) (const_int 6)])))))]
 -  "ISA_HAS_LASX"
 -  "xvmulwev.d.w<u>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "V4DI")])
 +(define_expand "vec_widen_<su>add_hi_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, true, "add");
 +  DONE;
 +})
 +
 +(define_expand "vec_widen_<su>add_lo_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, false, "add");
 +  DONE;
 +})
 +
 +(define_expand "vec_widen_<su>sub_hi_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, true, "sub");
 +  DONE;
 +})
 +
 +(define_expand "vec_widen_<su>sub_lo_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, false, "sub");
 +  DONE;
 +})
 +
 +(define_expand "vec_widen_<su>mult_hi_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, true, "mult");
 +  DONE;
 +})
 +
 +(define_expand "vec_widen_<su>mult_lo_<mode>"
 +  [(match_operand:<VDMODE256> 0 "register_operand")
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 1 "register_operand"))
 +   (any_extend:<VDMODE256> (match_operand:ILASX_HB 2 "register_operand"))]
 +  "ISA_HAS_LASX"
 +{
 +  loongarch_expand_vec_widen_hilo (operands[0], operands[1], operands[2],
 +                        <u_bool>, false, "mult");
 +  DONE;
 +})
 ;; Vector reduction operation
 (define_expand "reduc_plus_scal_v4di"
 diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
 index ea61cf567..163162598 100644
 --- a/gcc/config/loongarch/loongarch-protos.h
 +++ b/gcc/config/loongarch/loongarch-protos.h
@@ -205,6 +205,7 @@ extern void loongarch_register_frame_header_opt (void);
 extern void loongarch_expand_vec_cond_expr (machine_mode, machine_mode, rtx *);
 extern void loongarch_expand_vec_cond_mask_expr (machine_mode, machine_mode,
 						 rtx *);
 +extern void loongarch_expand_vec_widen_hilo (rtx, rtx, rtx, bool, bool, const char *);
 /* Routines implemented in loongarch-c.c.  */
 void loongarch_cpu_cpp_builtins (cpp_reader *);
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 9a629a999..c0f58f9a9 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -8028,6 +8028,143 @@ loongarch_expand_vec_perm_even_odd (struct expand_vec_perm_d *d)
   return loongarch_expand_vec_perm_even_odd_1 (d, odd);
 }
 +static void
 +loongarch_expand_vec_interleave (rtx target, rtx op0, rtx op1, bool high_p)
 +{
 +  struct expand_vec_perm_d d;
 +  unsigned i, nelt, base;
 +  bool ok;
 +
 +  d.target = target;
 +  d.op0 = op0;
 +  d.op1 = op1;
 +  d.vmode = GET_MODE (target);
 +  d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
 +  d.one_vector_p = false;
 +  d.testing_p = false;
 +
 +  base = high_p ? nelt / 2 : 0;
 +  for (i = 0; i < nelt / 2; ++i)
 +    {
 +      d.perm[i * 2] = i + base;
 +      d.perm[i * 2 + 1] = i + base + nelt;
 +    }
 +
 +  ok = loongarch_expand_vec_perm_interleave (&d);
 +  gcc_assert (ok);
 +}
 +
 +/* The loongarch lasx instructions xvmulwev and xvmulwod return the even or odd
 +   parts of the double sized result elements in the corresponding elements of
 +   the target register. That's NOT what the vec_widen_umult_lo/hi patterns are
 +   expected to do. We emulate the widening lo/hi multiplies with the even/odd
 +   versions followed by a vector merge.  */
 +
 +void
 +loongarch_expand_vec_widen_hilo (rtx dest, rtx op1, rtx op2,
 +				 bool uns_p, bool high_p, const char *optab)
 +{
 +  machine_mode wmode = GET_MODE (dest);
 +  machine_mode mode = GET_MODE (op1);
 +  rtx t1, t2, t3;
 +
 +  t1 = gen_reg_rtx (wmode);
 +  t2 = gen_reg_rtx (wmode);
 +  t3 = gen_reg_rtx (wmode);
 +  switch (mode)
 +    {
 +    case V16HImode:
 +      if (!strcmp (optab, "add"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvaddwev_w_h (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvaddwod_w_h (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvaddwev_w_hu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvaddwod_w_hu (t2, op1, op2));
 +	    }
 +	}
 +      else if (!strcmp (optab, "mult"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvmulwev_w_h (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvmulwod_w_h (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvmulwev_w_hu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvmulwod_w_hu (t2, op1, op2));
 +	    }
 +	}
 +      else if (!strcmp (optab, "sub"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvsubwev_w_h (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvsubwod_w_h (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvsubwev_w_hu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvsubwod_w_hu (t2, op1, op2));
 +	    }
 +	}
 +      break;
 +
 +    case V32QImode:
 +      if (!strcmp (optab, "add"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvaddwev_h_b (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvaddwod_h_b (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvaddwev_h_bu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvaddwod_h_bu (t2, op1, op2));
 +	    }
 +	}
 +      else if (!strcmp (optab, "mult"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvmulwev_h_b (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvmulwod_h_b (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvmulwev_h_bu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvmulwod_h_bu (t2, op1, op2));
 +	    }
 +	}
 +      else if (!strcmp (optab, "sub"))
 +	{
 +	  if (!uns_p)
 +	    {
 +	      emit_insn (gen_lasx_xvsubwev_h_b (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvsubwod_h_b (t2, op1, op2));
 +	    }
 +	  else
 +	    {
 +	      emit_insn (gen_lasx_xvsubwev_h_bu (t1, op1, op2));
 +	      emit_insn (gen_lasx_xvsubwod_h_bu (t2, op1, op2));
 +	    }
 +	}
 +      break;
 +
 +    default:
 +      gcc_unreachable ();
 +    }
 +
 +  loongarch_expand_vec_interleave (t3, t1, t2, high_p);
 +  emit_move_insn (dest, gen_lowpart (wmode, t3));
 +}
 +
 /* Expand a variable vector permutation for LASX.  */
 void
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 5f9e63d66..29ac950bf 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -509,6 +509,8 @@
 ;; <su> is like <u>, but the signed form expands to "s" rather than "".
 (define_code_attr su [(sign_extend "s") (zero_extend "u")])
 +(define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
 +
 ;; <optab> expands to the name of the optab for a particular code.
 (define_code_attr optab [(ashift "ashl")
 			 (ashiftrt "ashr")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
 new file mode 100644
 index 000000000..0bf832d0e
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-add.c
@@ -0,0 +1,24 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvaddwev.w.h"  } } */
 +/* { dg-final { scan-assembler "xvaddwod.w.h"  } } */
 +/* { dg-final { scan-assembler "xvaddwev.w.hu"  } } */
 +/* { dg-final { scan-assembler "xvaddwod.w.hu"  } } */
 +
 +#include <stdint.h>
 +
 +#define SIZE 1024
 +
 +void
 +wide_uadd (uint32_t *foo, uint16_t *a, uint16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i]   = a[i] + b[i];
 +}
 +
 +void
 +wide_sadd (int32_t *foo, int16_t *a, int16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i]   = a[i] + b[i];
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
 new file mode 100644
 index 000000000..84b020eea
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-mul.c
@@ -0,0 +1,24 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvmulwev.w.h"  } } */
 +/* { dg-final { scan-assembler "xvmulwod.w.h"  } } */
 +/* { dg-final { scan-assembler "xvmulwev.w.hu"  } } */
 +/* { dg-final { scan-assembler "xvmulwod.w.hu"  } } */
 +
 +#include <stdint.h>
 +
 +#define SIZE 1024
 +
 +void
 +wide_umul (uint32_t *foo, uint16_t *a, uint16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i] = a[i] * b[i];
 +}
 +
 +void
 +wide_smul (int32_t *foo, int16_t *a, int16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i]   = a[i] * b[i];
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
 new file mode 100644
 index 000000000..69fc3a517
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-widen-sub.c
@@ -0,0 +1,24 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvsubwev.w.h"  } } */
 +/* { dg-final { scan-assembler "xvsubwod.w.h"  } } */
 +/* { dg-final { scan-assembler "xvsubwev.w.hu"  } } */
 +/* { dg-final { scan-assembler "xvsubwod.w.hu"  } } */
 +
 +#include <stdint.h>
 +
 +#define SIZE 1024
 +
 +void
 +wide_usub (uint32_t *foo, uint16_t *a, uint16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i]   = a[i] - b[i];
 +}
 +
 +void
 +wide_ssub (int32_t *foo, int16_t *a, int16_t *b)
 +{
 +  for ( int i = 0; i < SIZE; i++)
 +    foo[i]   = a[i] - b[i];
 +}
 -- 
 2.43.0
--- a/0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
+++ b/0013-LoongArch-Implement-the-new-vector-cost-model-framew.patch
@ -0,0 +1,354 @@
 From 472890b43d2848a46fa13945279308f0a21c55d9 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 18 Oct 2023 17:43:39 +0800
 Subject: [PATCH 013/188] LoongArch:Implement the new vector cost model
 framework.
 This patch make loongarch use the new vector hooks and implements the costing
 function determine_suggested_unroll_factor, to make it be able to suggest the
 unroll factor for a given loop being vectorized base vec_ops analysis during
 vector costing and the available issue information. Referring to aarch64 and
 rs6000 port.
 The patch also reduces the cost of unaligned stores, making it equal to the
 cost of aligned ones in order to avoid odd alignment peeling.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_vector_costs): Inherit from
 	vector_costs.  Add a constructor.
 	(loongarch_vector_costs::add_stmt_cost): Use adjust_cost_for_freq to
 	adjust the cost for inner loops.
 	(loongarch_vector_costs::count_operations): New function.
 	(loongarch_vector_costs::determine_suggested_unroll_factor): Ditto.
 	(loongarch_vector_costs::finish_cost): Ditto.
 	(loongarch_builtin_vectorization_cost): Adjust.
 	* config/loongarch/loongarch.opt (loongarch-vect-unroll-limit): New parameter.
 	(loongarcg-vect-issue-info): Ditto.
 	(mmemvec-cost): Delete.
 	* config/loongarch/genopts/loongarch.opt.in
 	(loongarch-vect-unroll-limit): Ditto.
 	(loongarcg-vect-issue-info): Ditto.
 	(mmemvec-cost): Delete.
 	* doc/invoke.texi (loongarcg-vect-unroll-limit): Document new option.
 ---
 gcc/config/loongarch/genopts/loongarch.opt.in |  15 +-
 gcc/config/loongarch/loongarch.cc             | 173 ++++++++++++++++--
 gcc/config/loongarch/loongarch.opt            |  15 +-
 gcc/doc/invoke.texi                           |   7 +
 4 files changed, 188 insertions(+), 22 deletions(-)
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index f18733c24..74cf4a7f7 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -152,10 +152,6 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
 -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
 -mmemvec-cost=
 -Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
 -mmemvec-cost=COST      Set the cost of vector memory access instructions.
 -
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV)
 Trap on integer divide by zero.
@@ -219,3 +215,14 @@ mrelax
 Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
 +
 +-param=loongarch-vect-unroll-limit=
 +Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
 +Used to limit unroll factor which indicates how much the autovectorizer may
 +unroll a loop.  The default value is 6.
 +
 +-param=loongarch-vect-issue-info=
 +Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
 +Indicate how many non memory access vector instructions can be issued per
 +cycle, it's used in unroll factor determination for autovectorizer.  The
 +default value is 4.
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index c0f58f9a9..e22a64600 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -65,6 +65,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl-iter.h"
 #include "opts.h"
 #include "function-abi.h"
 +#include "cfgloop.h"
 +#include "tree-vectorizer.h"
 /* This file should be included last.  */
 #include "target-def.h"
@@ -3841,8 +3843,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
     }
 }
 -/* Vectorizer cost model implementation.  */
 -
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 static int
@@ -3861,36 +3861,182 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case vector_load:
       case vec_to_scalar:
       case scalar_to_vec:
 -      case cond_branch_not_taken:
 -      case vec_promote_demote:
       case scalar_store:
       case vector_store:
 	return 1;
 +      case vec_promote_demote:
       case vec_perm:
 	return LASX_SUPPORTED_MODE_P (mode)
 	  && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
       case unaligned_load:
 -      case vector_gather_load:
 -	return 2;
 -
       case unaligned_store:
 -      case vector_scatter_store:
 -	return 10;
 +	return 2;
       case cond_branch_taken:
 -	return 3;
 +	return 4;
 +
 +      case cond_branch_not_taken:
 +	return 2;
       case vec_construct:
 	elements = TYPE_VECTOR_SUBPARTS (vectype);
 -	return elements / 2 + 1;
 +	if (ISA_HAS_LASX)
 +	  return elements + 1;
 +	else
 +	  return elements;
       default:
 	gcc_unreachable ();
     }
 }
 +class loongarch_vector_costs : public vector_costs
 +{
 +public:
 +  using vector_costs::vector_costs;
 +
 +  unsigned int add_stmt_cost (int count, vect_cost_for_stmt kind,
 +			      stmt_vec_info stmt_info, slp_tree, tree vectype,
 +			      int misalign,
 +			      vect_cost_model_location where) override;
 +  void finish_cost (const vector_costs *) override;
 +
 +protected:
 +  void count_operations (vect_cost_for_stmt, stmt_vec_info,
 +			 vect_cost_model_location, unsigned int);
 +  unsigned int determine_suggested_unroll_factor (loop_vec_info);
 +  /* The number of vectorized stmts in loop.  */
 +  unsigned m_stmts = 0;
 +  /* The number of load and store operations in loop.  */
 +  unsigned m_loads = 0;
 +  unsigned m_stores = 0;
 +  /* Reduction factor for suggesting unroll factor.  */
 +  unsigned m_reduc_factor = 0;
 +  /* True if the loop contains an average operation. */
 +  bool m_has_avg =false;
 +};
 +
 +/* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
 +static vector_costs *
 +loongarch_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
 +{
 +  return new loongarch_vector_costs (vinfo, costing_for_scalar);
 +}
 +
 +void
 +loongarch_vector_costs::count_operations (vect_cost_for_stmt kind,
 +					  stmt_vec_info stmt_info,
 +					  vect_cost_model_location where,
 +					  unsigned int count)
 +{
 +  if (!m_costing_for_scalar
 +      && is_a<loop_vec_info> (m_vinfo)
 +      && where == vect_body)
 +    {
 +      m_stmts += count;
 +
 +      if (kind == scalar_load
 +	  || kind == vector_load
 +	  || kind == unaligned_load)
 +	m_loads += count;
 +      else if (kind == scalar_store
 +	       || kind == vector_store
 +	       || kind == unaligned_store)
 +	m_stores += count;
 +      else if ((kind == scalar_stmt
 +		|| kind == vector_stmt
 +		|| kind == vec_to_scalar)
 +	       && stmt_info && vect_is_reduction (stmt_info))
 +	{
 +	  tree lhs = gimple_get_lhs (stmt_info->stmt);
 +	  unsigned int base = FLOAT_TYPE_P (TREE_TYPE (lhs)) ? 2 : 1;
 +	  m_reduc_factor = MAX (base * count, m_reduc_factor);
 +	}
 +    }
 +}
 +
 +unsigned int
 +loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vinfo)
 +{
 +  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 +
 +  if (m_has_avg)
 +    return 1;
 +
 +  /* Don't unroll if it's specified explicitly not to be unrolled.  */
 +  if (loop->unroll == 1
 +      || (OPTION_SET_P (flag_unroll_loops) && !flag_unroll_loops)
 +      || (OPTION_SET_P (flag_unroll_all_loops) && !flag_unroll_all_loops))
 +    return 1;
 +
 +  unsigned int nstmts_nonldst = m_stmts - m_loads - m_stores;
 +  /* Don't unroll if no vector instructions excepting for memory access.  */
 +  if (nstmts_nonldst == 0)
 +    return 1;
 +
 +  /* Use this simple hardware resource model that how many non vld/vst
 +     vector instructions can be issued per cycle.  */
 +  unsigned int issue_info = loongarch_vect_issue_info;
 +  unsigned int reduc_factor = m_reduc_factor > 1 ? m_reduc_factor : 1;
 +  unsigned int uf = CEIL (reduc_factor * issue_info, nstmts_nonldst);
 +  uf = MIN ((unsigned int) loongarch_vect_unroll_limit, uf);
 +
 +  return 1 << ceil_log2 (uf);
 +}
 +
 +unsigned
 +loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 +				       stmt_vec_info stmt_info, slp_tree,
 +				       tree vectype, int misalign,
 +				       vect_cost_model_location where)
 +{
 +  unsigned retval = 0;
 +
 +  if (flag_vect_cost_model)
 +    {
 +      int stmt_cost = loongarch_builtin_vectorization_cost (kind, vectype,
 +							    misalign);
 +      retval = adjust_cost_for_freq (stmt_info, where, count * stmt_cost);
 +      m_costs[where] += retval;
 +
 +      count_operations (kind, stmt_info, where, count);
 +    }
 +
 +  if (stmt_info)
 +    {
 +      /* Detect the use of an averaging operation.  */
 +      gimple *stmt = stmt_info->stmt;
 +      if (is_gimple_call (stmt)
 +	  && gimple_call_internal_p (stmt))
 +	{
 +	  switch (gimple_call_internal_fn (stmt))
 +	    {
 +	    case IFN_AVG_FLOOR:
 +	    case IFN_AVG_CEIL:
 +	      m_has_avg = true;
 +	    default:
 +	      break;
 +	    }
 +	}
 +    }
 +
 +  return retval;
 +}
 +
 +void
 +loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
 +{
 +  loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
 +  if (loop_vinfo)
 +    {
 +      m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
 +    }
 +
 +  vector_costs::finish_cost (scalar_costs);
 +}
 +
 /* Implement TARGET_ADDRESS_COST.  */
 static int
@@ -7261,9 +7407,6 @@ loongarch_option_override_internal (struct gcc_options *opts,
   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
     error ("%qs cannot be used for compiling a shared library",
 	   "-mdirect-extern-access");
 -  if (loongarch_vector_access_cost == 0)
 -    loongarch_vector_access_cost = 5;
 -
   switch (la_target.cmodel)
     {
@@ -11275,6 +11418,8 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
   loongarch_builtin_vectorization_cost
 +#undef TARGET_VECTORIZE_CREATE_COSTS
 +#define TARGET_VECTORIZE_CREATE_COSTS loongarch_vectorize_create_costs
 #undef TARGET_IN_SMALL_DATA_P
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index 78f2baf3a..34bd832bd 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -159,10 +159,6 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(loongarch_branch_cost)
 -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
 -mmemvec-cost=
 -Target RejectNegative Joined UInteger Var(loongarch_vector_access_cost) IntegerRange(1, 5)
 -mmemvec-cost=COST      Set the cost of vector memory access instructions.
 -
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV)
 Trap on integer divide by zero.
@@ -226,3 +222,14 @@ mrelax
 Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
 +
 +-param=loongarch-vect-unroll-limit=
 +Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
 +Used to limit unroll factor which indicates how much the autovectorizer may
 +unroll a loop.  The default value is 6.
 +
 +-param=loongarch-vect-issue-info=
 +Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) IntegerRange(1, 64) Param
 +Indicate how many non memory access vector instructions can be issued per
 +cycle, it's used in unroll factor determination for autovectorizer.  The
 +default value is 4.
 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
 index 7eed77836..168f3d0db 100644
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
@@ -24632,6 +24632,13 @@ environments where no dynamic link is performed, like firmwares, OS
 kernels, executables linked with @option{-static} or @option{-static-pie}.
 @option{-mdirect-extern-access} is not compatible with @option{-fPIC} or
 @option{-fpic}.
 +
 +@item loongarch-vect-unroll-limit
 +The vectorizer will use available tuning information to determine whether it
 +would be beneficial to unroll the main vectorized loop and by how much.  This
 +parameter set's the upper bound of how much the vectorizer will unroll the main
 +loop.  The default value is six.
 +
 @end table
 @node M32C Options
 -- 
 2.43.0
--- a/0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
+++ b/0014-LoongArch-Define-macro-CLEAR_INSN_CACHE.patch
@ -0,0 +1,34 @@
 From 7f4a912da99f5787c88b275b83ee547c9e1aa3d7 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Mon, 23 Oct 2023 09:07:32 +0800
 Subject: [PATCH 014/188] LoongArch: Define macro CLEAR_INSN_CACHE.
 LoongArch's microstructure ensures cache consistency by hardware.
 Due to out-of-order execution, "ibar" is required to ensure the visibility of the
 store (invalidated icache) executed by this CPU before "ibar" (to the instance).
 "ibar" will not invalidate the icache, so the start and end parameters are not Affect
 "ibar" performance.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.h (CLEAR_INSN_CACHE): New definition.
 ---
 gcc/config/loongarch/loongarch.h | 5 +++++
 1 file changed, 5 insertions(+)
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index 572b538be..f7ddfc452 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -1231,3 +1231,8 @@ struct GTY (()) machine_function
   (TARGET_HARD_FLOAT_ABI ? (TARGET_DOUBLE_FLOAT_ABI ? 8 : 4) : 0)
 #define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)
 +
 +/* LoongArch maintains ICache/DCache coherency by hardware,
 +   we just need "ibar" to avoid instruction hazard here.  */
 +#undef  CLEAR_INSN_CACHE
 +#define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
 -- 
 2.43.0
--- a/0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
+++ b/0015-LoongArch-Add-enum-style-mexplicit-relocs-option.patch
@ -0,0 +1,233 @@
 From 56403837a7859f0a7ccbc56c055261c9adf22fb8 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 23 Oct 2023 15:23:11 +0800
 Subject: [PATCH 015/188] LoongArch: Add enum-style -mexplicit-relocs= option
 To take a better balance between scheduling and relaxation when -flto is
 enabled, add three-way -mexplicit-relocs={auto,none,always} options.
 The old -mexplicit-relocs and -mno-explicit-relocs options are still
 supported, they are mapped to -mexplicit-relocs=always and
 -mexplicit-relocs=none.
 The default choice is determined by probing assembler capabilities at
 build time.  If the assembler does not supports explicit relocs at all,
 the default will be none; if it supports explicit relocs but not
 relaxation, the default will be always; if both explicit relocs and
 relaxation are supported, the default will be auto.
 Currently auto is same as none.  We will make auto more clever in
 following changes.
 gcc/ChangeLog:
 	* config/loongarch/genopts/loongarch-strings: Add strings for
 	-mexplicit-relocs={auto,none,always}.
 	* config/loongarch/genopts/loongarch.opt.in: Add options for
 	-mexplicit-relocs={auto,none,always}.
 	* config/loongarch/loongarch-str.h: Regenerate.
 	* config/loongarch/loongarch.opt: Regenerate.
 	* config/loongarch/loongarch-def.h
 	(EXPLICIT_RELOCS_AUTO): Define.
 	(EXPLICIT_RELOCS_NONE): Define.
 	(EXPLICIT_RELOCS_ALWAYS): Define.
 	(N_EXPLICIT_RELOCS_TYPES): Define.
 	* config/loongarch/loongarch.cc
 	(loongarch_option_override_internal): Error out if the old-style
 	-m[no-]explicit-relocs option is used with
 	-mexplicit-relocs={auto,none,always} together.  Map
 	-mno-explicit-relocs to -mexplicit-relocs=none and
 	-mexplicit-relocs to -mexplicit-relocs=always for backward
 	compatibility.  Set a proper default for -mexplicit-relocs=
 	based on configure-time probed linker capability.  Update a
 	diagnostic message to mention -mexplicit-relocs=always instead
 	of the old-style -mexplicit-relocs.
 	(loongarch_handle_model_attribute): Update a diagnostic message
 	to mention -mexplicit-relocs=always instead of the old-style
 	-mexplicit-relocs.
 	* config/loongarch/loongarch.h (TARGET_EXPLICIT_RELOCS): Define.
 ---
 .../loongarch/genopts/loongarch-strings       |  6 +++++
 gcc/config/loongarch/genopts/loongarch.opt.in | 21 ++++++++++++++--
 gcc/config/loongarch/loongarch-def.h          |  6 +++++
 gcc/config/loongarch/loongarch-str.h          |  5 ++++
 gcc/config/loongarch/loongarch.cc             | 24 +++++++++++++++++--
 gcc/config/loongarch/loongarch.h              |  3 +++
 gcc/config/loongarch/loongarch.opt            | 21 ++++++++++++++--
 7 files changed, 80 insertions(+), 6 deletions(-)
 diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
 index eb5086fe3..6c8a42af2 100644
 --- a/gcc/config/loongarch/genopts/loongarch-strings
 +++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -65,3 +65,9 @@ STR_CMODEL_TS	      tiny-static
 STR_CMODEL_MEDIUM     medium
 STR_CMODEL_LARGE      large
 STR_CMODEL_EXTREME    extreme
 +
 +# -mexplicit-relocs
 +OPTSTR_EXPLICIT_RELOCS		explicit-relocs
 +STR_EXPLICIT_RELOCS_AUTO	auto
 +STR_EXPLICIT_RELOCS_NONE	none
 +STR_EXPLICIT_RELOCS_ALWAYS	always
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index 74cf4a7f7..e7df1964a 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -176,10 +176,27 @@ mmax-inline-memcpy-size=
 Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 -mexplicit-relocs
 -Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
 +Enum
 +Name(explicit_relocs) Type(int)
 +The code model option names for -mexplicit-relocs:
 +
 +EnumValue
 +Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_AUTO@@) Value(EXPLICIT_RELOCS_AUTO)
 +
 +EnumValue
 +Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_NONE@@) Value(EXPLICIT_RELOCS_NONE)
 +
 +EnumValue
 +Enum(explicit_relocs) String(@@STR_EXPLICIT_RELOCS_ALWAYS@@) Value(EXPLICIT_RELOCS_ALWAYS)
 +
 +mexplicit-relocs=
 +Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
 Use %reloc() assembly operators.
 +mexplicit-relocs
 +Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
 +Use %reloc() assembly operators (for backward compatibility).
 +
 ; The code model option names for -mcmodel.
 Enum
 Name(cmodel) Type(int)
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index eb8e53b20..4757de14b 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -100,6 +100,12 @@ extern const char* loongarch_cmodel_strings[];
 #define CMODEL_EXTREME	      5
 #define N_CMODEL_TYPES	      6
 +/* enum explicit_relocs */
 +#define EXPLICIT_RELOCS_AUTO	0
 +#define EXPLICIT_RELOCS_NONE	1
 +#define EXPLICIT_RELOCS_ALWAYS	2
 +#define N_EXPLICIT_RELOCS_TYPES	3
 +
 /* The common default value for variables whose assignments
    are triggered by command-line options.  */
 diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
 index ecfebf9db..037e9e583 100644
 --- a/gcc/config/loongarch/loongarch-str.h
 +++ b/gcc/config/loongarch/loongarch-str.h
@@ -64,4 +64,9 @@ along with GCC; see the file COPYING3.  If not see
 #define STR_CMODEL_LARGE "large"
 #define STR_CMODEL_EXTREME "extreme"
 +#define OPTSTR_EXPLICIT_RELOCS "explicit-relocs"
 +#define STR_EXPLICIT_RELOCS_AUTO "auto"
 +#define STR_EXPLICIT_RELOCS_NONE "none"
 +#define STR_EXPLICIT_RELOCS_ALWAYS "always"
 +
 #endif /* LOONGARCH_STR_H */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index e22a64600..3258c8655 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -7383,6 +7383,25 @@ loongarch_option_override_internal (struct gcc_options *opts,
   loongarch_update_gcc_opt_status (&la_target, opts, opts_set);
   loongarch_cpu_option_override (&la_target, opts, opts_set);
 +  if (la_opt_explicit_relocs != M_OPT_UNSET
 +      && la_opt_explicit_relocs_backward != M_OPT_UNSET)
 +    error ("do not use %qs (with %qs) and %qs (without %qs) together",
 +	   "-mexplicit-relocs=", "=",
 +	   la_opt_explicit_relocs_backward ? "-mexplicit-relocs"
 +					   : "-mno-explicit-relocs", "=");
 +
 +  if (la_opt_explicit_relocs_backward != M_OPT_UNSET)
 +    la_opt_explicit_relocs = (la_opt_explicit_relocs_backward
 +			      ? EXPLICIT_RELOCS_ALWAYS
 +			      : EXPLICIT_RELOCS_NONE);
 +
 +  if (la_opt_explicit_relocs == M_OPT_UNSET)
 +    la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
 +			      ? (HAVE_AS_MRELAX_OPTION
 +				 ? EXPLICIT_RELOCS_AUTO
 +				 : EXPLICIT_RELOCS_ALWAYS)
 +			      : EXPLICIT_RELOCS_NONE);
 +
   if (TARGET_ABI_LP64)
     flag_pcc_struct_return = 0;
@@ -7413,7 +7432,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
       case CMODEL_EXTREME:
 	if (!TARGET_EXPLICIT_RELOCS)
 	  error ("code model %qs needs %s",
 -		 "extreme", "-mexplicit-relocs");
 +		 "extreme", "-mexplicit-relocs=always");
 	if (opts->x_flag_plt)
 	  {
@@ -7717,7 +7736,8 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
       if (!TARGET_EXPLICIT_RELOCS)
 	{
 	  error_at (DECL_SOURCE_LOCATION (decl),
 -		    "%qE attribute requires %s", name, "-mexplicit-relocs");
 +		    "%qE attribute requires %s", name,
 +		    "-mexplicit-relocs=always");
 	  *no_add_attrs = true;
 	  return NULL_TREE;
 	}
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index f7ddfc452..6e8ac293a 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -1236,3 +1236,6 @@ struct GTY (()) machine_function
    we just need "ibar" to avoid instruction hazard here.  */
 #undef  CLEAR_INSN_CACHE
 #define CLEAR_INSN_CACHE(beg, end) __builtin_loongarch_ibar (0)
 +
 +#define TARGET_EXPLICIT_RELOCS \
 +  (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index 34bd832bd..44376fd77 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -183,10 +183,27 @@ mmax-inline-memcpy-size=
 Target Joined RejectNegative UInteger Var(loongarch_max_inline_memcpy_size) Init(1024)
 -mmax-inline-memcpy-size=SIZE	Set the max size of memcpy to inline, default is 1024.
 -mexplicit-relocs
 -Target Var(TARGET_EXPLICIT_RELOCS) Init(HAVE_AS_EXPLICIT_RELOCS & !HAVE_AS_MRELAX_OPTION)
 +Enum
 +Name(explicit_relocs) Type(int)
 +The code model option names for -mexplicit-relocs:
 +
 +EnumValue
 +Enum(explicit_relocs) String(auto) Value(EXPLICIT_RELOCS_AUTO)
 +
 +EnumValue
 +Enum(explicit_relocs) String(none) Value(EXPLICIT_RELOCS_NONE)
 +
 +EnumValue
 +Enum(explicit_relocs) String(always) Value(EXPLICIT_RELOCS_ALWAYS)
 +
 +mexplicit-relocs=
 +Target RejectNegative Joined Enum(explicit_relocs) Var(la_opt_explicit_relocs) Init(M_OPT_UNSET)
 Use %reloc() assembly operators.
 +mexplicit-relocs
 +Target Var(la_opt_explicit_relocs_backward) Init(M_OPT_UNSET)
 +Use %reloc() assembly operators (for backward compatibility).
 +
 ; The code model option names for -mcmodel.
 Enum
 Name(cmodel) Type(int)
 -- 
 2.43.0
--- a/0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
+++ b/0016-LoongArch-Use-explicit-relocs-for-GOT-access-when-me.patch
@ -0,0 +1,212 @@
 From 8539e5560e7bf11473cc7c386043b7019264236a Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 30 Sep 2023 18:46:28 +0800
 Subject: [PATCH 016/188] LoongArch: Use explicit relocs for GOT access when
 -mexplicit-relocs=auto and LTO during a final link with linker plugin
 If we are performing LTO for a final link and linker plugin is enabled,
 then we are sure any GOT access may resolve to a symbol out of the link
 unit (otherwise the linker plugin will tell us the symbol should be
 resolved locally and we'll use PC-relative access instead).
 Produce machine instructions with explicit relocs instead of la.global
 for better scheduling.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-protos.h
 	(loongarch_explicit_relocs_p): Declare new function.
 	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
 	Implement.
 	(loongarch_symbol_insns): Call loongarch_explicit_relocs_p for
 	SYMBOL_GOT_DISP, instead of using TARGET_EXPLICIT_RELOCS.
 	(loongarch_split_symbol): Call loongarch_explicit_relocs_p for
 	deciding if return early, instead of using
 	TARGET_EXPLICIT_RELOCS.
 	(loongarch_output_move): CAll loongarch_explicit_relocs_p
 	instead of using TARGET_EXPLICIT_RELOCS.
 	* config/loongarch/loongarch.md (*low<mode>): Remove
 	TARGET_EXPLICIT_RELOCS from insn condition.
 	(@ld_from_got<mode>): Likewise.
 	* config/loongarch/predicates.md (move_operand): Call
 	loongarch_explicit_relocs_p instead of using
 	TARGET_EXPLICIT_RELOCS.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/explicit-relocs-auto-lto.c: New test.
 ---
 gcc/config/loongarch/loongarch-protos.h       |  1 +
 gcc/config/loongarch/loongarch.cc             | 34 +++++++++++++++----
 gcc/config/loongarch/loongarch.md             |  4 +--
 gcc/config/loongarch/predicates.md            |  8 ++---
 .../loongarch/explicit-relocs-auto-lto.c      | 26 ++++++++++++++
 5 files changed, 59 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
 diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
 index 163162598..51d38177b 100644
 --- a/gcc/config/loongarch/loongarch-protos.h
 +++ b/gcc/config/loongarch/loongarch-protos.h
@@ -220,4 +220,5 @@ extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int);
 extern tree loongarch_build_builtin_va_list (void);
 extern rtx loongarch_build_signbit_mask (machine_mode, bool, bool);
 +extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 3258c8655..1d20577e7 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -1922,6 +1922,29 @@ loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type)
   gcc_unreachable ();
 }
 +/* If -mexplicit-relocs=auto, we use machine operations with reloc hints
 +   for cases where the linker is unable to relax so we can schedule the
 +   machine operations, otherwise use an assembler pseudo-op so the
 +   assembler will generate R_LARCH_RELAX.  */
 +
 +bool
 +loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
 +{
 +  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
 +    return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 +
 +  /* If we are performing LTO for a final link, and we have the linker
 +     plugin so we know the resolution of the symbols, then all GOT
 +     references are binding to external symbols or preemptable symbols.
 +     So the linker cannot relax them.  */
 +  return (in_lto_p
 +	  && !flag_incremental_link
 +	  && HAVE_LTO_PLUGIN == 2
 +	  && (!global_options_set.x_flag_use_linker_plugin
 +	      || global_options.x_flag_use_linker_plugin)
 +	  && type == SYMBOL_GOT_DISP);
 +}
 +
 /* Returns the number of instructions necessary to reference a symbol.  */
 static int
@@ -1937,7 +1960,7 @@ loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode)
     case SYMBOL_GOT_DISP:
       /* The constant will have to be loaded from the GOT before it
 	 is used in an address.  */
 -      if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE)
 +      if (!loongarch_explicit_relocs_p (type) && mode != MAX_MACHINE_MODE)
 	return 0;
       return 3;
@@ -3034,7 +3057,7 @@ loongarch_symbol_extreme_p (enum loongarch_symbol_type type)
    If so, and if LOW_OUT is nonnull, emit the high part and store the
    low part in *LOW_OUT.  Leave *LOW_OUT unchanged otherwise.
 -   Return false if build with '-mno-explicit-relocs'.
 +   Return false if build with '-mexplicit-relocs=none'.
    TEMP is as for loongarch_force_temporary and is used to load the high
    part into a register.
@@ -3048,12 +3071,9 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
 {
   enum loongarch_symbol_type symbol_type;
 -  /* If build with '-mno-explicit-relocs', don't split symbol.  */
 -  if (!TARGET_EXPLICIT_RELOCS)
 -    return false;
 -
   if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE)
       || !loongarch_symbolic_constant_p (addr, &symbol_type)
 +      || !loongarch_explicit_relocs_p (symbol_type)
       || loongarch_symbol_insns (symbol_type, mode) == 0
       || !loongarch_split_symbol_type (symbol_type))
     return false;
@@ -4793,7 +4813,7 @@ loongarch_output_move (rtx dest, rtx src)
 	}
     }
 -  if (!TARGET_EXPLICIT_RELOCS
 +  if (!loongarch_explicit_relocs_p (loongarch_classify_symbol (src))
       && dest_code == REG && symbolic_operand (src, VOIDmode))
     {
       if (loongarch_classify_symbol (src) == SYMBOL_PCREL)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 29ac950bf..81c97393b 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2247,7 +2247,7 @@
   [(set (match_operand:P 0 "register_operand" "=r")
  (lo_sum:P (match_operand:P 1 "register_operand" " r")
      (match_operand:P 2 "symbolic_operand" "")))]
 -  "TARGET_EXPLICIT_RELOCS"
 +  ""
   "addi.<d>\t%0,%1,%L2"
   [(set_attr "type" "arith")
    (set_attr "mode" "<MODE>")])
@@ -2275,7 +2275,7 @@
 				(match_operand:P 1 "register_operand" "r")
 				(match_operand:P 2 "symbolic_operand")))]
 	UNSPEC_LOAD_FROM_GOT))]
 -  "TARGET_EXPLICIT_RELOCS"
 +  ""
   "ld.<d>\t%0,%1,%L2"
   [(set_attr "type" "move")]
 )
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index ad6cee5c4..6b50b3a4d 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -541,16 +541,14 @@
     case SYMBOL_REF:
     case LABEL_REF:
       return (loongarch_symbolic_constant_p (op, &symbol_type)
 -	      && (!TARGET_EXPLICIT_RELOCS
 +	      && (!loongarch_explicit_relocs_p (symbol_type)
 		  || !loongarch_split_symbol_type (symbol_type)));
     case HIGH:
 -      /* '-mno-explicit-relocs' don't generate high/low pairs.  */
 -      if (!TARGET_EXPLICIT_RELOCS)
 -	return false;
 -
       op = XEXP (op, 0);
 +
       return (loongarch_symbolic_constant_p (op, &symbol_type)
 +	      && loongarch_explicit_relocs_p (symbol_type)
 	      && loongarch_split_symbol_type (symbol_type));
     default:
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
 new file mode 100644
 index 000000000..f53b54689
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-lto.c
@@ -0,0 +1,26 @@
 +/* { dg-do link } */
 +/* { dg-require-effective-target lto } */
 +/* { dg-require-linker-plugin "" } */
 +/* { dg-options "-fpic -shared -O2 --save-temps -mexplicit-relocs=auto -flto -fuse-linker-plugin -flto-partition=one" } */
 +
 +int pcrel __attribute__ ((visibility ("hidden")));
 +int got __attribute__ ((visibility ("default")));
 +
 +int
 +*addr_pcrel (void)
 +{
 +  return &pcrel;
 +}
 +
 +int
 +*addr_got (void)
 +{
 +  return &got;
 +}
 +
 +/* With linker plugin we should use la.local (it can be relaxed to pcaddi),
 +   but not la.global (we are pretty sure the linker cannot relax la.global
 +   got).  */
 +/* { dg-final { scan-lto-assembler "la.local.*pcrel" } } */
 +/* { dg-final { scan-lto-assembler "pcalau12i.*%got_pc_hi20\\\(got\\\)" } } */
 +/* { dg-final { scan-lto-assembler "ld.*%got_pc_lo12\\\(got\\\)" } } */
 -- 
 2.43.0
--- a/0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
+++ b/0017-LoongArch-Use-explicit-relocs-for-TLS-access-with-me.patch
@ -0,0 +1,146 @@
 From 23b4166c6699a1a3063b11fa45497c1a1524bd48 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 2 Oct 2023 13:00:18 +0800
 Subject: [PATCH 017/188] LoongArch: Use explicit relocs for TLS access with
 -mexplicit-relocs=auto
 The linker does not know how to relax TLS access for LoongArch, so let's
 emit machine instructions with explicit relocs for TLS.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
 	Return true for TLS symbol types if -mexplicit-relocs=auto.
 	(loongarch_call_tls_get_addr): Replace TARGET_EXPLICIT_RELOCS
 	with la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE.
 	(loongarch_legitimize_tls_address): Likewise.
 	* config/loongarch/loongarch.md (@tls_low<mode>): Remove
 	TARGET_EXPLICIT_RELOCS from insn condition.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c: New
 	test.
 	* gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c: New
 	test.
 ---
 gcc/config/loongarch/loongarch.cc             | 37 ++++++++++++-------
 gcc/config/loongarch/loongarch.md             |  2 +-
 .../explicit-relocs-auto-tls-ld-gd.c          |  9 +++++
 .../explicit-relocs-auto-tls-le-ie.c          |  6 +++
 4 files changed, 40 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 1d20577e7..fa5c14be6 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -1933,16 +1933,27 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
   if (la_opt_explicit_relocs != EXPLICIT_RELOCS_AUTO)
     return la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS;
 -  /* If we are performing LTO for a final link, and we have the linker
 -     plugin so we know the resolution of the symbols, then all GOT
 -     references are binding to external symbols or preemptable symbols.
 -     So the linker cannot relax them.  */
 -  return (in_lto_p
 -	  && !flag_incremental_link
 -	  && HAVE_LTO_PLUGIN == 2
 -	  && (!global_options_set.x_flag_use_linker_plugin
 -	      || global_options.x_flag_use_linker_plugin)
 -	  && type == SYMBOL_GOT_DISP);
 +  switch (type)
 +    {
 +      case SYMBOL_TLS_IE:
 +      case SYMBOL_TLS_LE:
 +      case SYMBOL_TLSGD:
 +      case SYMBOL_TLSLDM:
 +	/* The linker don't know how to relax TLS accesses.  */
 +	return true;
 +      case SYMBOL_GOT_DISP:
 +	/* If we are performing LTO for a final link, and we have the
 +	   linker plugin so we know the resolution of the symbols, then
 +	   all GOT references are binding to external symbols or
 +	   preemptable symbols.  So the linker cannot relax them.  */
 +	return (in_lto_p
 +		&& !flag_incremental_link
 +		&& HAVE_LTO_PLUGIN == 2
 +		&& (!global_options_set.x_flag_use_linker_plugin
 +		    || global_options.x_flag_use_linker_plugin));
 +      default:
 +	return false;
 +    }
 }
 /* Returns the number of instructions necessary to reference a symbol.  */
@@ -2749,7 +2760,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
   start_sequence ();
 -  if (TARGET_EXPLICIT_RELOCS)
 +  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
     {
       /* Split tls symbol to high and low.  */
       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
@@ -2914,7 +2925,7 @@ loongarch_legitimize_tls_address (rtx loc)
 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
 	  tmp1 = gen_reg_rtx (Pmode);
 	  dest = gen_reg_rtx (Pmode);
 -	  if (TARGET_EXPLICIT_RELOCS)
 +	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
 	    {
 	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE);
 	      tmp3 = gen_reg_rtx (Pmode);
@@ -2951,7 +2962,7 @@ loongarch_legitimize_tls_address (rtx loc)
 	  tmp1 = gen_reg_rtx (Pmode);
 	  dest = gen_reg_rtx (Pmode);
 -	  if (TARGET_EXPLICIT_RELOCS)
 +	  if (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
 	    {
 	      tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE);
 	      tmp3 = gen_reg_rtx (Pmode);
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 81c97393b..3b836d535 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2257,7 +2257,7 @@
 	(unspec:P [(mem:P (lo_sum:P (match_operand:P 1 "register_operand" "r")
 				    (match_operand:P 2 "symbolic_operand" "")))]
 	UNSPEC_TLS_LOW))]
 -  "TARGET_EXPLICIT_RELOCS"
 +  ""
   "addi.<d>\t%0,%1,%L2"
   [(set_attr "type" "arith")
    (set_attr "mode" "<MODE>")])
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
 new file mode 100644
 index 000000000..957ff98df
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-ld-gd.c
@@ -0,0 +1,9 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -fPIC -mexplicit-relocs=auto" } */
 +
 +__thread int a __attribute__((visibility("hidden")));
 +extern __thread int b __attribute__((visibility("default")));
 +
 +int test() { return a + b; }
 +
 +/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
 new file mode 100644
 index 000000000..78898cfc6
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-tls-le-ie.c
@@ -0,0 +1,6 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mexplicit-relocs=auto" } */
 +
 +#include "explicit-relocs-auto-tls-ld-gd.c"
 +
 +/* { dg-final { scan-assembler-not "la.tls" { target tls_native } } } */
 -- 
 2.43.0
--- a/0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
+++ b/0018-LoongArch-Use-explicit-relocs-for-addresses-only-use.patch
@ -0,0 +1,245 @@
 From c29a4f4fb5ff24ef975ba27688a3da696aa7d006 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 1 Oct 2023 11:14:29 +0800
 Subject: [PATCH 018/188] LoongArch: Use explicit relocs for addresses only
 used for one load or store with -mexplicit-relocs=auto and
 -mcmodel={normal,medium}
 In these cases, if we use explicit relocs, we end up with 2
 instructions:
    pcalau12i    t0, %pc_hi20(x)
    ld.d         t0, t0, %pc_lo12(x)
 If we use la.local pseudo-op, in the best scenario (x is in +/- 2MiB
 range) we still have 2 instructions:
    pcaddi       t0, %pcrel_20(x)
    ld.d         t0, t0, 0
 If x is out of the range we'll have 3 instructions.  So for these cases
 just emit machine instructions with explicit relocs.
 gcc/ChangeLog:
 	* config/loongarch/predicates.md (symbolic_pcrel_operand): New
 	predicate.
 	* config/loongarch/loongarch.md (define_peephole2): Optimize
 	la.local + ld/st to pcalau12i + ld/st if the address is only used
 	once if -mexplicit-relocs=auto and -mcmodel=normal or medium.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/explicit-relocs-auto-single-load-store.c:
 	New test.
 	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c:
 	New test.
 ---
 gcc/config/loongarch/loongarch.md             | 122 ++++++++++++++++++
 gcc/config/loongarch/predicates.md            |   7 +
 ...-relocs-auto-single-load-store-no-anchor.c |   6 +
 .../explicit-relocs-auto-single-load-store.c  |  14 ++
 4 files changed, 149 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 3b836d535..c4c6baa60 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -65,6 +65,7 @@
   UNSPEC_LOAD_FROM_GOT
   UNSPEC_PCALAU12I
 +  UNSPEC_PCALAU12I_GR
   UNSPEC_ORI_L_LO12
   UNSPEC_LUI_L_HI20
   UNSPEC_LUI_H_LO20
@@ -2297,6 +2298,16 @@
   "pcalau12i\t%0,%%pc_hi20(%1)"
   [(set_attr "type" "move")])
 +;; @pcalau12i may be used for sibcall so it has a strict constraint.  This
 +;; allows any general register as the operand.
 +(define_insn "@pcalau12i_gr<mode>"
 +  [(set (match_operand:P 0 "register_operand" "=r")
 +       (unspec:P [(match_operand:P 1 "symbolic_operand" "")]
 +       UNSPEC_PCALAU12I_GR))]
 +  ""
 +  "pcalau12i\t%0,%%pc_hi20(%1)"
 +  [(set_attr "type" "move")])
 +
 (define_insn "@ori_l_lo12<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(unspec:P [(match_operand:P 1 "register_operand" "r")
@@ -3748,6 +3759,117 @@
   [(set_attr "type" "unknown")
    (set_attr "mode" "<MODE>")])
 +;; With normal or medium code models, if the only use of a pc-relative
 +;; address is for loading or storing a value, then relying on linker
 +;; relaxation is not better than emitting the machine instruction directly.
 +;; Even if the la.local pseudo op can be relaxed, we get:
 +;;
 +;;     pcaddi     $t0, %pcrel_20(x)
 +;;     ld.d       $t0, $t0, 0
 +;;
 +;; There are still two instructions, same as using the machine instructions
 +;; and explicit relocs:
 +;;
 +;;     pcalau12i  $t0, %pc_hi20(x)
 +;;     ld.d       $t0, $t0, %pc_lo12(x)
 +;;
 +;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
 +;; 3 instructions).
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (match_operand:GPR 2 "register_operand")
 +	(mem:GPR (match_dup 0)))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0]) \
 +       || REGNO (operands[0]) == REGNO (operands[2]))"
 +  [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
 +  {
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (match_operand:GPR 2 "register_operand")
 +	(mem:GPR (plus (match_dup 0)
 +		       (match_operand 3 "const_int_operand"))))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0]) \
 +       || REGNO (operands[0]) == REGNO (operands[2]))"
 +  [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
 +  {
 +    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (match_operand:GPR 2 "register_operand")
 +	(any_extend:GPR (mem:SUBDI (match_dup 0))))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0]) \
 +       || REGNO (operands[0]) == REGNO (operands[2]))"
 +  [(set (match_dup 2)
 +	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
 +					     (match_dup 1)))))]
 +  {
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (match_operand:GPR 2 "register_operand")
 +	(any_extend:GPR
 +	  (mem:SUBDI (plus (match_dup 0)
 +			   (match_operand 3 "const_int_operand")))))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0]) \
 +       || REGNO (operands[0]) == REGNO (operands[2]))"
 +  [(set (match_dup 2)
 +	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
 +					     (match_dup 1)))))]
 +  {
 +    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (mem:QHWD (match_dup 0))
 +	(match_operand:QHWD 2 "register_operand"))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0])) \
 +   && REGNO (operands[0]) != REGNO (operands[2])"
 +  [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +  {
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 +(define_peephole2
 +  [(set (match_operand:P 0 "register_operand")
 +	(match_operand:P 1 "symbolic_pcrel_operand"))
 +   (set (mem:QHWD (plus (match_dup 0)
 +			(match_operand 3 "const_int_operand")))
 +	(match_operand:QHWD 2 "register_operand"))]
 +  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 +   && (peep2_reg_dead_p (2, operands[0])) \
 +   && REGNO (operands[0]) != REGNO (operands[2])"
 +  [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +  {
 +    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 +    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +  })
 +
 ;; Synchronization instructions.
 (include "sync.md")
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 6b50b3a4d..1d669f560 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -563,6 +563,13 @@
   return loongarch_symbolic_constant_p (op, &type);
 })
 +(define_predicate "symbolic_pcrel_operand"
 +  (match_code "const,symbol_ref,label_ref")
 +{
 +  enum loongarch_symbol_type type;
 +  return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL;
 +})
 +
 (define_predicate "equality_operator"
   (match_code "eq,ne"))
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
 new file mode 100644
 index 000000000..fb03403d7
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-no-anchor.c
@@ -0,0 +1,6 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto -fno-section-anchors" } */
 +
 +#include "explicit-relocs-auto-single-load-store.c"
 +
 +/* { dg-final { scan-assembler-not "la.local" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
 new file mode 100644
 index 000000000..0d53644cd
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store.c
@@ -0,0 +1,14 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */
 +
 +long a;
 +int b;
 +unsigned int c;
 +
 +long load_a() { return a; }
 +long load_b() { return b; }
 +long load_c() { return c; }
 +void store_a(long x) { a = x; }
 +void store_b(int x) { b = x; }
 +
 +/* { dg-final { scan-assembler-not "la.local" } } */
 -- 
 2.43.0
--- a/0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch
+++ b/0019-LoongArch-Implement-__builtin_thread_pointer-for-TLS.patch
@ -0,0 +1,84 @@
 From 619b6081064bf85a19f4659e278a361875e4f9fb Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Tue, 24 Oct 2023 14:40:14 +0800
 Subject: [PATCH 019/188] LoongArch: Implement __builtin_thread_pointer for
 TLS.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (get_thread_pointer<mode>):Adds the
 	instruction template corresponding to the __builtin_thread_pointer
 	function.
 	* doc/extend.texi:Add the __builtin_thread_pointer function support
 	description to the documentation.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/builtin_thread_pointer.c: New test.
 ---
 gcc/config/loongarch/loongarch.md                      |  7 +++++++
 gcc/doc/extend.texi                                    |  5 +++++
 .../gcc.target/loongarch/builtin_thread_pointer.c      | 10 ++++++++++
 3 files changed, 22 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index c4c6baa60..80487488d 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -113,6 +113,7 @@
 (define_constants
   [(RETURN_ADDR_REGNUM		1)
 +   (TP_REGNUM			2)
    (T0_REGNUM			12)
    (T1_REGNUM			13)
    (S0_REGNUM			23)
@@ -3647,6 +3648,12 @@
   [(set_attr "length" "0")
    (set_attr "type" "ghost")])
 +;; Named pattern for expanding thread pointer reference.
 +(define_expand "get_thread_pointer<mode>"
 +  [(set (match_operand:P 0 "register_operand" "=r")
 +	(reg:P TP_REGNUM))]
 +  "HAVE_AS_TLS"
 +  {})
 (define_split
   [(match_operand 0 "small_data_pattern")]
 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index 1d1bac255..497c6de5f 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
@@ -16257,6 +16257,11 @@ function you need to include @code{larchintrin.h}.
     void __break (imm0_32767)
 @end smallexample
 +Returns the value that is currently set in the @samp{tp} register.
 +@smallexample
 +    void * __builtin_thread_pointer (void)
 +@end smallexample
 +
 @node MIPS DSP Built-in Functions
 @subsection MIPS DSP Built-in Functions
 diff --git a/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
 new file mode 100644
 index 000000000..541e3b143
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/builtin_thread_pointer.c
@@ -0,0 +1,10 @@
 +/* { dg-do compile } */
 +/* { dg-require-effective-target tls_native } */
 +/* { dg-options "-O2" } */
 +/* { dg-final { scan-assembler "or\t\\\$r4,\\\$r2,\\\$r0" } } */
 +
 +void *
 +get_tp ()
 +{
 +  return __builtin_thread_pointer ();
 +}
 -- 
 2.43.0
--- a/0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
+++ b/0020-LoongArch-Fix-vfrint-releated-comments-in-lsxintrin..patch
@ -0,0 +1,189 @@
 From 9b29e6ba10716656ba9b32c33f021e920bb05f3d Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Mon, 23 Oct 2023 10:13:24 +0800
 Subject: [PATCH 020/188] LoongArch: Fix vfrint-releated comments in
 lsxintrin.h and lasxintrin.h
 The comment of vfrint-related intrinsic functions does not match the return
 value type in definition. This patch fixes these comments.
 gcc/ChangeLog:
 	* config/loongarch/lasxintrin.h (__lasx_xvftintrnel_l_s): Fix comments.
 	(__lasx_xvfrintrne_s): Ditto.
 	(__lasx_xvfrintrne_d): Ditto.
 	(__lasx_xvfrintrz_s): Ditto.
 	(__lasx_xvfrintrz_d): Ditto.
 	(__lasx_xvfrintrp_s): Ditto.
 	(__lasx_xvfrintrp_d): Ditto.
 	(__lasx_xvfrintrm_s): Ditto.
 	(__lasx_xvfrintrm_d): Ditto.
 	* config/loongarch/lsxintrin.h (__lsx_vftintrneh_l_s): Ditto.
 	(__lsx_vfrintrne_s): Ditto.
 	(__lsx_vfrintrne_d): Ditto.
 	(__lsx_vfrintrz_s): Ditto.
 	(__lsx_vfrintrz_d): Ditto.
 	(__lsx_vfrintrp_s): Ditto.
 	(__lsx_vfrintrp_d): Ditto.
 	(__lsx_vfrintrm_s): Ditto.
 	(__lsx_vfrintrm_d): Ditto.
 ---
 gcc/config/loongarch/lasxintrin.h | 16 ++++++++--------
 gcc/config/loongarch/lsxintrin.h  | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)
 diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
 index d39379927..7bce2c757 100644
 --- a/gcc/config/loongarch/lasxintrin.h
 +++ b/gcc/config/loongarch/lasxintrin.h
@@ -3368,7 +3368,7 @@ __m256i __lasx_xvftintrnel_l_s (__m256 _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V8SI, V8SF.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256 __lasx_xvfrintrne_s (__m256 _1)
 {
@@ -3376,7 +3376,7 @@ __m256 __lasx_xvfrintrne_s (__m256 _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V4DI, V4DF.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256d __lasx_xvfrintrne_d (__m256d _1)
 {
@@ -3384,7 +3384,7 @@ __m256d __lasx_xvfrintrne_d (__m256d _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V8SI, V8SF.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256 __lasx_xvfrintrz_s (__m256 _1)
 {
@@ -3392,7 +3392,7 @@ __m256 __lasx_xvfrintrz_s (__m256 _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V4DI, V4DF.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256d __lasx_xvfrintrz_d (__m256d _1)
 {
@@ -3400,7 +3400,7 @@ __m256d __lasx_xvfrintrz_d (__m256d _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V8SI, V8SF.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256 __lasx_xvfrintrp_s (__m256 _1)
 {
@@ -3408,7 +3408,7 @@ __m256 __lasx_xvfrintrp_s (__m256 _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V4DI, V4DF.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256d __lasx_xvfrintrp_d (__m256d _1)
 {
@@ -3416,7 +3416,7 @@ __m256d __lasx_xvfrintrp_d (__m256d _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V8SI, V8SF.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256 __lasx_xvfrintrm_s (__m256 _1)
 {
@@ -3424,7 +3424,7 @@ __m256 __lasx_xvfrintrm_s (__m256 _1)
 }
 /* Assembly instruction format:	xd, xj.  */
 -/* Data types in instruction templates:  V4DI, V4DF.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256d __lasx_xvfrintrm_d (__m256d _1)
 {
 diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
 index ec4206990..29553c093 100644
 --- a/gcc/config/loongarch/lsxintrin.h
 +++ b/gcc/config/loongarch/lsxintrin.h
@@ -3412,7 +3412,7 @@ __m128i __lsx_vftintrneh_l_s (__m128 _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V4SI, V4SF.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128 __lsx_vfrintrne_s (__m128 _1)
 {
@@ -3420,7 +3420,7 @@ __m128 __lsx_vfrintrne_s (__m128 _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V2DI, V2DF.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128d __lsx_vfrintrne_d (__m128d _1)
 {
@@ -3428,7 +3428,7 @@ __m128d __lsx_vfrintrne_d (__m128d _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V4SI, V4SF.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128 __lsx_vfrintrz_s (__m128 _1)
 {
@@ -3436,7 +3436,7 @@ __m128 __lsx_vfrintrz_s (__m128 _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V2DI, V2DF.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128d __lsx_vfrintrz_d (__m128d _1)
 {
@@ -3444,7 +3444,7 @@ __m128d __lsx_vfrintrz_d (__m128d _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V4SI, V4SF.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128 __lsx_vfrintrp_s (__m128 _1)
 {
@@ -3452,7 +3452,7 @@ __m128 __lsx_vfrintrp_s (__m128 _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V2DI, V2DF.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128d __lsx_vfrintrp_d (__m128d _1)
 {
@@ -3460,7 +3460,7 @@ __m128d __lsx_vfrintrp_d (__m128d _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V4SI, V4SF.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128 __lsx_vfrintrm_s (__m128 _1)
 {
@@ -3468,7 +3468,7 @@ __m128 __lsx_vfrintrm_s (__m128 _1)
 }
 /* Assembly instruction format:	vd, vj.  */
 -/* Data types in instruction templates:  V2DI, V2DF.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128d __lsx_vfrintrm_d (__m128d _1)
 {
 -- 
 2.43.0
--- a/0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
+++ b/0021-LoongArch-Enable-vcond_mask_mn-expanders-for-SF-DF-m.patch
@ -0,0 +1,418 @@
 From 156d9451a5b20ac336370f1610a949db1bef7a26 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Thu, 26 Oct 2023 09:34:32 +0800
 Subject: [PATCH 021/188] LoongArch:Enable vcond_mask_mn expanders for SF/DF
 modes.
 If the vcond_mask patterns don't support fp modes, the vector
 FP comparison instructions will not be generated.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (vcond_mask_<ILASX:mode><ILASX:mode>): Change to
 	(vcond_mask_<mode><mode256_i>): this.
 	* config/loongarch/lsx.md (vcond_mask_<ILSX:mode><ILSX:mode>): Change to
 	(vcond_mask_<mode><mode_i>): this.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: New test.
 	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 14 +--
 gcc/config/loongarch/lsx.md                   | 14 +--
 .../loongarch/vector/lasx/lasx-vcond-1.c      | 64 ++++++++++++++
 .../loongarch/vector/lasx/lasx-vcond-2.c      | 87 +++++++++++++++++++
 .../loongarch/vector/lsx/lsx-vcond-1.c        | 64 ++++++++++++++
 .../loongarch/vector/lsx/lsx-vcond-2.c        | 87 +++++++++++++++++++
 6 files changed, 316 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 442fda246..f0f2dd08d 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -906,15 +906,15 @@
 })
 ;; Same as vcond_
 -(define_expand "vcond_mask_<ILASX:mode><ILASX:mode>"
 -  [(match_operand:ILASX 0 "register_operand")
 -   (match_operand:ILASX 1 "reg_or_m1_operand")
 -   (match_operand:ILASX 2 "reg_or_0_operand")
 -   (match_operand:ILASX 3 "register_operand")]
 +(define_expand "vcond_mask_<mode><mode256_i>"
 +  [(match_operand:LASX 0 "register_operand")
 +   (match_operand:LASX 1 "reg_or_m1_operand")
 +   (match_operand:LASX 2 "reg_or_0_operand")
 +   (match_operand:<VIMODE256> 3 "register_operand")]
   "ISA_HAS_LASX"
 {
 -  loongarch_expand_vec_cond_mask_expr (<ILASX:MODE>mode,
 -				      <ILASX:VIMODE256>mode, operands);
 +  loongarch_expand_vec_cond_mask_expr (<MODE>mode,
 +				       <VIMODE256>mode, operands);
   DONE;
 })
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index b4e92ae9c..4af32c8df 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -644,15 +644,15 @@
   DONE;
 })
 -(define_expand "vcond_mask_<ILSX:mode><ILSX:mode>"
 -  [(match_operand:ILSX 0 "register_operand")
 -   (match_operand:ILSX 1 "reg_or_m1_operand")
 -   (match_operand:ILSX 2 "reg_or_0_operand")
 -   (match_operand:ILSX 3 "register_operand")]
 +(define_expand "vcond_mask_<mode><mode_i>"
 +  [(match_operand:LSX 0 "register_operand")
 +   (match_operand:LSX 1 "reg_or_m1_operand")
 +   (match_operand:LSX 2 "reg_or_0_operand")
 +   (match_operand:<VIMODE> 3 "register_operand")]
   "ISA_HAS_LSX"
 {
 -  loongarch_expand_vec_cond_mask_expr (<ILSX:MODE>mode,
 -				      <ILSX:VIMODE>mode, operands);
 +  loongarch_expand_vec_cond_mask_expr (<MODE>mode,
 +				       <VIMODE>mode, operands);
   DONE;
 })
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
 new file mode 100644
 index 000000000..ee9cb1a1f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
@@ -0,0 +1,64 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlasx" } */
 +
 +#include <stdint-gcc.h>
 +
 +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)	\
 +  void __attribute__ ((noinline, noclone))			\
 +  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,	\
 +				   DATA_TYPE *__restrict__ x,	\
 +				   DATA_TYPE *__restrict__ y,	\
 +				   CMP_TYPE *__restrict__ a,	\
 +				   CMP_TYPE *__restrict__ b,	\
 +				   int n)			\
 +  {								\
 +    for (int i = 0; i < n; i++)					\
 +      {								\
 +	DATA_TYPE xval = x[i], yval = y[i];			\
 +	CMP_TYPE aval = a[i], bval = b[i];			\
 +	r[i] = aval COND bval ? xval : yval;			\
 +      }								\
 +  }
 +
 +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)	\
 +  T (int8_t, int8_t, COND, SUFFIX)			\
 +  T (int16_t, int16_t, COND, SUFFIX)			\
 +  T (int32_t, int32_t, COND, SUFFIX)			\
 +  T (int64_t, int64_t, COND, SUFFIX)			\
 +  T (float, int32_t, COND, SUFFIX##_float)		\
 +  T (double, int64_t, COND, SUFFIX##_double)
 +
 +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)	\
 +  T (uint8_t, uint8_t, COND, SUFFIX)			\
 +  T (uint16_t, uint16_t, COND, SUFFIX)			\
 +  T (uint32_t, uint32_t, COND, SUFFIX)			\
 +  T (uint64_t, uint64_t, COND, SUFFIX)			\
 +  T (float, uint32_t, COND, SUFFIX##_float)		\
 +  T (double, uint64_t, COND, SUFFIX##_double)
 +
 +#define TEST_COND_VAR_ALL(T, COND, SUFFIX)	\
 +  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)	\
 +  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
 +
 +#define TEST_VAR_ALL(T)				\
 +  TEST_COND_VAR_ALL (T, >, _gt)			\
 +  TEST_COND_VAR_ALL (T, <, _lt)			\
 +  TEST_COND_VAR_ALL (T, >=, _ge)		\
 +  TEST_COND_VAR_ALL (T, <=, _le)		\
 +  TEST_COND_VAR_ALL (T, ==, _eq)		\
 +  TEST_COND_VAR_ALL (T, !=, _ne)
 +
 +TEST_VAR_ALL (DEF_VCOND_VAR)
 +
 +/* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 new file mode 100644
 index 000000000..5f40ed44c
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
@@ -0,0 +1,87 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops  -mlasx" } */
 +
 +#include <stdint-gcc.h>
 +
 +#define eq(A, B) ((A) == (B))
 +#define ne(A, B) ((A) != (B))
 +#define olt(A, B) ((A) < (B))
 +#define ole(A, B) ((A) <= (B))
 +#define oge(A, B) ((A) >= (B))
 +#define ogt(A, B) ((A) > (B))
 +#define ordered(A, B) (!__builtin_isunordered (A, B))
 +#define unordered(A, B) (__builtin_isunordered (A, B))
 +#define ueq(A, B) (!__builtin_islessgreater (A, B))
 +#define ult(A, B) (__builtin_isless (A, B))
 +#define ule(A, B) (__builtin_islessequal (A, B))
 +#define uge(A, B) (__builtin_isgreaterequal (A, B))
 +#define ugt(A, B) (__builtin_isgreater (A, B))
 +#define nueq(A, B) (__builtin_islessgreater (A, B))
 +#define nult(A, B) (!__builtin_isless (A, B))
 +#define nule(A, B) (!__builtin_islessequal (A, B))
 +#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
 +#define nugt(A, B) (!__builtin_isgreater (A, B))
 +
 +#define TEST_LOOP(TYPE1, TYPE2, CMP)				\
 +  void __attribute__ ((noinline, noclone))			\
 +  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,	\
 +					TYPE1 *restrict src,	\
 +					TYPE1 fallback,		\
 +					TYPE2 *restrict a,	\
 +					TYPE2 *restrict b,	\
 +					int count)		\
 +  {								\
 +    for (int i = 0; i < count; ++i)				\
 +      {\
 +        TYPE2 aval = a[i]; \
 +        TYPE2 bval = b[i]; \
 +        TYPE1 srcval = src[i]; \
 +        dest[i] = CMP (aval, bval) ? srcval : fallback;		\
 +      }\
 +  }
 +
 +#define TEST_CMP(CMP) \
 +  TEST_LOOP (int32_t, float, CMP) \
 +  TEST_LOOP (uint32_t, float, CMP) \
 +  TEST_LOOP (float, float, CMP) \
 +  TEST_LOOP (int64_t, double, CMP) \
 +  TEST_LOOP (uint64_t, double, CMP) \
 +  TEST_LOOP (double, double, CMP)
 +
 +TEST_CMP (eq)
 +TEST_CMP (ne)
 +TEST_CMP (olt)
 +TEST_CMP (ole)
 +TEST_CMP (oge)
 +TEST_CMP (ogt)
 +TEST_CMP (ordered)
 +TEST_CMP (unordered)
 +TEST_CMP (ueq)
 +TEST_CMP (ult)
 +TEST_CMP (ule)
 +TEST_CMP (uge)
 +TEST_CMP (ugt)
 +TEST_CMP (nueq)
 +TEST_CMP (nult)
 +TEST_CMP (nule)
 +TEST_CMP (nuge)
 +TEST_CMP (nugt)
 +
 +/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
 new file mode 100644
 index 000000000..138adccfa
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
@@ -0,0 +1,64 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -fno-unroll-loops -fno-vect-cost-model -mlsx" } */
 +
 +#include <stdint-gcc.h>
 +
 +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)	\
 +  void __attribute__ ((noinline, noclone))			\
 +  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,	\
 +				   DATA_TYPE *__restrict__ x,	\
 +				   DATA_TYPE *__restrict__ y,	\
 +				   CMP_TYPE *__restrict__ a,	\
 +				   CMP_TYPE *__restrict__ b,	\
 +				   int n)			\
 +  {								\
 +    for (int i = 0; i < n; i++)					\
 +      {								\
 +	DATA_TYPE xval = x[i], yval = y[i];			\
 +	CMP_TYPE aval = a[i], bval = b[i];			\
 +	r[i] = aval COND bval ? xval : yval;			\
 +      }								\
 +  }
 +
 +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)	\
 +  T (int8_t, int8_t, COND, SUFFIX)			\
 +  T (int16_t, int16_t, COND, SUFFIX)			\
 +  T (int32_t, int32_t, COND, SUFFIX)			\
 +  T (int64_t, int64_t, COND, SUFFIX)			\
 +  T (float, int32_t, COND, SUFFIX##_float)		\
 +  T (double, int64_t, COND, SUFFIX##_double)
 +
 +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)	\
 +  T (uint8_t, uint8_t, COND, SUFFIX)			\
 +  T (uint16_t, uint16_t, COND, SUFFIX)			\
 +  T (uint32_t, uint32_t, COND, SUFFIX)			\
 +  T (uint64_t, uint64_t, COND, SUFFIX)			\
 +  T (float, uint32_t, COND, SUFFIX##_float)		\
 +  T (double, uint64_t, COND, SUFFIX##_double)
 +
 +#define TEST_COND_VAR_ALL(T, COND, SUFFIX)	\
 +  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)	\
 +  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
 +
 +#define TEST_VAR_ALL(T)				\
 +  TEST_COND_VAR_ALL (T, >, _gt)			\
 +  TEST_COND_VAR_ALL (T, <, _lt)			\
 +  TEST_COND_VAR_ALL (T, >=, _ge)		\
 +  TEST_COND_VAR_ALL (T, <=, _le)		\
 +  TEST_COND_VAR_ALL (T, ==, _eq)		\
 +  TEST_COND_VAR_ALL (T, !=, _ne)
 +
 +TEST_VAR_ALL (DEF_VCOND_VAR)
 +
 +/* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 new file mode 100644
 index 000000000..e8fe31f8f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
@@ -0,0 +1,87 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops  -mlsx" } */
 +
 +#include <stdint-gcc.h>
 +
 +#define eq(A, B) ((A) == (B))
 +#define ne(A, B) ((A) != (B))
 +#define olt(A, B) ((A) < (B))
 +#define ole(A, B) ((A) <= (B))
 +#define oge(A, B) ((A) >= (B))
 +#define ogt(A, B) ((A) > (B))
 +#define ordered(A, B) (!__builtin_isunordered (A, B))
 +#define unordered(A, B) (__builtin_isunordered (A, B))
 +#define ueq(A, B) (!__builtin_islessgreater (A, B))
 +#define ult(A, B) (__builtin_isless (A, B))
 +#define ule(A, B) (__builtin_islessequal (A, B))
 +#define uge(A, B) (__builtin_isgreaterequal (A, B))
 +#define ugt(A, B) (__builtin_isgreater (A, B))
 +#define nueq(A, B) (__builtin_islessgreater (A, B))
 +#define nult(A, B) (!__builtin_isless (A, B))
 +#define nule(A, B) (!__builtin_islessequal (A, B))
 +#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
 +#define nugt(A, B) (!__builtin_isgreater (A, B))
 +
 +#define TEST_LOOP(TYPE1, TYPE2, CMP)				\
 +  void __attribute__ ((noinline, noclone))			\
 +  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,	\
 +					TYPE1 *restrict src,	\
 +					TYPE1 fallback,		\
 +					TYPE2 *restrict a,	\
 +					TYPE2 *restrict b,	\
 +					int count)		\
 +  {								\
 +    for (int i = 0; i < count; ++i)				\
 +      {\
 +        TYPE2 aval = a[i]; \
 +        TYPE2 bval = b[i]; \
 +        TYPE1 srcval = src[i]; \
 +        dest[i] = CMP (aval, bval) ? srcval : fallback;		\
 +      }\
 +  }
 +
 +#define TEST_CMP(CMP) \
 +  TEST_LOOP (int32_t, float, CMP) \
 +  TEST_LOOP (uint32_t, float, CMP) \
 +  TEST_LOOP (float, float, CMP) \
 +  TEST_LOOP (int64_t, double, CMP) \
 +  TEST_LOOP (uint64_t, double, CMP) \
 +  TEST_LOOP (double, double, CMP)
 +
 +TEST_CMP (eq)
 +TEST_CMP (ne)
 +TEST_CMP (olt)
 +TEST_CMP (ole)
 +TEST_CMP (oge)
 +TEST_CMP (ogt)
 +TEST_CMP (ordered)
 +TEST_CMP (unordered)
 +TEST_CMP (ueq)
 +TEST_CMP (ult)
 +TEST_CMP (ule)
 +TEST_CMP (uge)
 +TEST_CMP (ugt)
 +TEST_CMP (nueq)
 +TEST_CMP (nult)
 +TEST_CMP (nule)
 +TEST_CMP (nuge)
 +TEST_CMP (nugt)
 +
 +/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */
 -- 
 2.43.0
--- a/0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch
+++ b/0022-LoongArch-Define-HAVE_AS_TLS-to-0-if-it-s-undefined-.patch
@ -0,0 +1,34 @@
 From 0527589fb1b7b97cff2c441c1219fb9c8a44dd23 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 30 Oct 2023 19:39:27 +0800
 Subject: [PATCH 022/188] LoongArch: Define HAVE_AS_TLS to 0 if it's undefined
 [PR112299]
 Now loongarch.md uses HAVE_AS_TLS, we need this to fix the failure
 building a cross compiler if the cross assembler is not installed yet.
 gcc/ChangeLog:
 	PR target/112299
 	* config/loongarch/loongarch-opts.h (HAVE_AS_TLS): Define to 0
 	if not defined yet.
 ---
 gcc/config/loongarch/loongarch-opts.h | 4 ++++
 1 file changed, 4 insertions(+)
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index f2b59abe6..c4975af00 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -103,4 +103,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 +#ifndef HAVE_AS_TLS
 +#define HAVE_AS_TLS 0
 +#endif
 +
 #endif /* LOONGARCH_OPTS_H */
 -- 
 2.43.0
--- a/0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch
+++ b/0023-LoongArch-Fix-instruction-name-typo-in-lsx_vreplgr2v.patch
@ -0,0 +1,30 @@
 From bc3ae60454a51b80538b6deba21975d43de23b6a Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Fri, 3 Nov 2023 17:01:36 +0800
 Subject: [PATCH 023/188] LoongArch: Fix instruction name typo in
 lsx_vreplgr2vr_<lsxfmt_f> template
 gcc/ChangeLog:
 	* config/loongarch/lsx.md: Fix instruction name typo in
 	lsx_vreplgr2vr_<lsxfmt_f> template.
 ---
 gcc/config/loongarch/lsx.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 4af32c8df..55c7d79a0 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -1523,7 +1523,7 @@
   "ISA_HAS_LSX"
 {
   if (which_alternative == 1)
 -    return "ldi.<lsxfmt>\t%w0,0";
 +    return "vldi.<lsxfmt>\t%w0,0";
   if (!TARGET_64BIT && (<MODE>mode == V2DImode || <MODE>mode == V2DFmode))
     return "#";
 -- 
 2.43.0
--- a/0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
+++ b/0024-LoongArch-Use-simplify_gen_subreg-instead-of-gen_rtx.patch
@ -0,0 +1,116 @@
 From b8f47a362000bb51dec88e0a73f885c57a46f568 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 12 Nov 2023 00:55:13 +0800
 Subject: [PATCH 024/188] LoongArch: Use simplify_gen_subreg instead of
 gen_rtx_SUBREG in loongarch_expand_vec_cond_mask_expr [PR112476]
 GCC internal says:
    'subreg's of 'subreg's are not supported.  Using
    'simplify_gen_subreg' is the recommended way to avoid this problem.
 Unfortunately loongarch_expand_vec_cond_mask_expr might create nested
 subreg under certain circumstances, causing an ICE.
 Use simplify_gen_subreg as the internal document suggests.
 gcc/ChangeLog:
 	PR target/112476
 	* config/loongarch/loongarch.cc
 	(loongarch_expand_vec_cond_mask_expr): Call simplify_gen_subreg
 	instead of gen_rtx_SUBREG.
 gcc/testsuite/ChangeLog:
 	PR target/112476
 	* gcc.target/loongarch/pr112476-1.c: New test.
 	* gcc.target/loongarch/pr112476-2.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 11 ++++++---
 .../gcc.target/loongarch/pr112476-1.c         | 24 +++++++++++++++++++
 .../gcc.target/loongarch/pr112476-2.c         |  5 ++++
 3 files changed, 37 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-2.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index fa5c14be6..65ca1489f 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -11190,7 +11190,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
 	  if (mode != vimode)
 	    {
 	      xop1 = gen_reg_rtx (vimode);
 -	      emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
 +	      emit_move_insn (xop1,
 +			      simplify_gen_subreg (vimode, operands[1],
 +						   mode, 0));
 	    }
 	  emit_move_insn (src1, xop1);
 	}
@@ -11207,7 +11209,9 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
 	  if (mode != vimode)
 	    {
 	      xop2 = gen_reg_rtx (vimode);
 -	      emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
 +	      emit_move_insn (xop2,
 +			      simplify_gen_subreg (vimode, operands[2],
 +						   mode, 0));
 	    }
 	  emit_move_insn (src2, xop2);
 	}
@@ -11226,7 +11230,8 @@ loongarch_expand_vec_cond_mask_expr (machine_mode mode, machine_mode vimode,
 			  gen_rtx_AND (vimode, mask, src1));
       /* The result is placed back to a register with the mask.  */
       emit_insn (gen_rtx_SET (mask, bsel));
 -      emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
 +      emit_move_insn (operands[0], simplify_gen_subreg (mode, mask,
 +							vimode, 0));
     }
 }
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-1.c b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
 new file mode 100644
 index 000000000..4cf133e7a
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-1.c
@@ -0,0 +1,24 @@
 +/* PR target/112476: ICE with -mlsx */
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlsx" } */
 +
 +int foo, bar;
 +float baz, res, a;
 +
 +void
 +apply_adjacent_ternary (float *dst, float *src0)
 +{
 +  do
 +    {
 +      __builtin_memcpy (&res, &src0, sizeof (res));
 +      *dst = foo ? baz : res;
 +      dst++;
 +    }
 +  while (dst != src0);
 +}
 +
 +void
 +xx (void)
 +{
 +  apply_adjacent_ternary (&a, &a);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-2.c b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
 new file mode 100644
 index 000000000..cc0dfbfc9
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-2.c
@@ -0,0 +1,5 @@
 +/* PR target/112476: ICE with -mlasx */
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mfpu=64 -mabi=lp64d -mlasx" } */
 +
 +#include "pr112476-1.c"
 -- 
 2.43.0
--- a/0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
+++ b/0025-LoongArch-Optimize-single-used-address-with-mexplici.patch
@ -0,0 +1,116 @@
 From b23a89e835962ae7d89e5c6f87a69c021097d715 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 30 Oct 2023 20:24:58 +0800
 Subject: [PATCH 025/188] LoongArch: Optimize single-used address with
 -mexplicit-relocs=auto for fld/fst
 fld and fst have same address mode as ld.w and st.w, so the same
 optimization as r14-4851 should be applied for them too.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (LD_AT_LEAST_32_BIT): New mode
 	iterator.
 	(ST_ANY): New mode iterator.
 	(define_peephole2): Use LD_AT_LEAST_32_BIT instead of GPR and
 	ST_ANY instead of QHWD for applicable patterns.
 ---
 gcc/config/loongarch/loongarch.md | 38 +++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 80487488d..ed86c95bd 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -400,6 +400,14 @@
    (DI "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
    (TF "TARGET_64BIT && TARGET_DOUBLE_FLOAT")])
 +;; A mode for anything with 32 bits or more, and able to be loaded with
 +;; the same addressing mode as ld.w.
 +(define_mode_iterator LD_AT_LEAST_32_BIT [GPR ANYF])
 +
 +;; A mode for anything able to be stored with the same addressing mode as
 +;; st.w.
 +(define_mode_iterator ST_ANY [QHWD ANYF])
 +
 ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
 ;; 32-bit version and "mul.d" in the 64-bit version.
 (define_mode_attr d [(SI "w") (DI "d")])
@@ -3785,13 +3793,14 @@
 (define_peephole2
   [(set (match_operand:P 0 "register_operand")
 	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:GPR 2 "register_operand")
 -	(mem:GPR (match_dup 0)))]
 +   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
 +	(mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
    && (peep2_reg_dead_p (2, operands[0]) \
        || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
 +  [(set (match_dup 2)
 +	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
   {
     emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
   })
@@ -3799,14 +3808,15 @@
 (define_peephole2
   [(set (match_operand:P 0 "register_operand")
 	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:GPR 2 "register_operand")
 -	(mem:GPR (plus (match_dup 0)
 -		       (match_operand 3 "const_int_operand"))))]
 +   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
 +	(mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
 +				(match_operand 3 "const_int_operand"))))]
   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
    && (peep2_reg_dead_p (2, operands[0]) \
        || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2) (mem:GPR (lo_sum:P (match_dup 0) (match_dup 1))))]
 +  [(set (match_dup 2)
 +	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
   {
     operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
     emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
@@ -3850,13 +3860,13 @@
 (define_peephole2
   [(set (match_operand:P 0 "register_operand")
 	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (mem:QHWD (match_dup 0))
 -	(match_operand:QHWD 2 "register_operand"))]
 +   (set (mem:ST_ANY (match_dup 0))
 +	(match_operand:ST_ANY 2 "register_operand"))]
   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
    && (peep2_reg_dead_p (2, operands[0])) \
    && REGNO (operands[0]) != REGNO (operands[2])"
 -  [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
   {
     emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
   })
@@ -3864,14 +3874,14 @@
 (define_peephole2
   [(set (match_operand:P 0 "register_operand")
 	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (mem:QHWD (plus (match_dup 0)
 -			(match_operand 3 "const_int_operand")))
 -	(match_operand:QHWD 2 "register_operand"))]
 +   (set (mem:ST_ANY (plus (match_dup 0)
 +			  (match_operand 3 "const_int_operand")))
 +	(match_operand:ST_ANY 2 "register_operand"))]
   "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
    && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
    && (peep2_reg_dead_p (2, operands[0])) \
    && REGNO (operands[0]) != REGNO (operands[2])"
 -  [(set (mem:QHWD (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
   {
     operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
     emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 -- 
 2.43.0
--- a/0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
+++ b/0026-LoongArch-Disable-relaxation-if-the-assembler-don-t-.patch
@ -0,0 +1,305 @@
 From f1cfdec1602a5a316a9b9022a95143a7385489c2 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 3 Nov 2023 21:19:59 +0800
 Subject: [PATCH 026/188] LoongArch: Disable relaxation if the assembler don't
 support conditional branch relaxation [PR112330]
 As the commit message of r14-4674 has indicated, if the assembler does
 not support conditional branch relaxation, a relocation overflow may
 happen on conditional branches when relaxation is enabled because the
 number of NOP instructions inserted by the assembler will be more than
 the number estimated by GCC.
 To work around this issue, disable relaxation by default if the
 assembler is detected incapable to perform conditional branch relaxation
 at GCC build time.  We also need to pass -mno-relax to the assembler to
 really disable relaxation.  But, if the assembler does not support
 -mrelax option at all, we should not pass -mno-relax to the assembler or
 it will immediately error out.  Also handle this with the build time
 assembler capability probing, and add a pair of options
 -m[no-]pass-mrelax-to-as to allow using a different assembler from the
 build-time one.
 With this change, if GCC is built with GAS 2.41, relaxation will be
 disabled by default.  So the default value of -mexplicit-relocs= is also
 changed to 'always' if -mno-relax is specified or implied by the
 build-time default, because using assembler macros for symbol addresses
 produces no benefit when relaxation is disabled.
 gcc/ChangeLog:
 	PR target/112330
 	* config/loongarch/genopts/loongarch.opt.in: Add
 	-m[no]-pass-relax-to-as.  Change the default of -m[no]-relax to
 	account conditional branch relaxation support status.
 	* config/loongarch/loongarch.opt: Regenerate.
 	* configure.ac (gcc_cv_as_loongarch_cond_branch_relax): Check if
 	the assembler supports conditional branch relaxation.
 	* configure: Regenerate.
 	* config.in: Regenerate.  Note that there are some unrelated
 	changes introduced by r14-5424 (which does not contain a
 	config.in regeneration).
 	* config/loongarch/loongarch-opts.h
 	(HAVE_AS_COND_BRANCH_RELAXATION): Define to 0 if not defined.
 	* config/loongarch/loongarch-driver.h (ASM_MRELAX_DEFAULT):
 	Define.
 	(ASM_MRELAX_SPEC): Define.
 	(ASM_SPEC): Use ASM_MRELAX_SPEC instead of "%{mno-relax}".
 	* config/loongarch/loongarch.cc: Take the setting of
 	-m[no-]relax into account when determining the default of
 	-mexplicit-relocs=.
 	* doc/invoke.texi: Document -m[no-]relax and
 	-m[no-]pass-mrelax-to-as for LoongArch.  Update the default
 	value of -mexplicit-relocs=.
 ---
 gcc/config.in                                 | 35 ++++++++++++++++++-
 gcc/config/loongarch/genopts/loongarch.opt.in |  6 +++-
 gcc/config/loongarch/loongarch-driver.h       | 16 ++++++++-
 gcc/config/loongarch/loongarch-opts.h         |  4 +++
 gcc/config/loongarch/loongarch.cc             |  2 +-
 gcc/config/loongarch/loongarch.opt            |  6 +++-
 gcc/configure                                 | 35 +++++++++++++++++++
 gcc/configure.ac                              | 10 ++++++
 8 files changed, 109 insertions(+), 5 deletions(-)
 diff --git a/gcc/config.in b/gcc/config.in
 index 0c55e67e7..04968b53c 100644
 --- a/gcc/config.in
 +++ b/gcc/config.in
@@ -374,6 +374,12 @@
 #endif
 +/* Define if your assembler supports conditional branch relaxation. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_AS_COND_BRANCH_RELAXATION
 +#endif
 +
 +
 /* Define if your assembler supports the --debug-prefix-map option. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_DEBUG_PREFIX_MAP
@@ -798,6 +804,20 @@
 #endif
 +/* Define to 1 if you have the Mac OS X function
 +   CFLocaleCopyPreferredLanguages in the CoreFoundation framework. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_CFLOCALECOPYPREFERREDLANGUAGES
 +#endif
 +
 +
 +/* Define to 1 if you have the Mac OS X function CFPreferencesCopyAppValue in
 +   the CoreFoundation framework. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_CFPREFERENCESCOPYAPPVALUE
 +#endif
 +
 +
 /* Define to 1 if you have the `clearerr_unlocked' function. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_CLEARERR_UNLOCKED
@@ -822,6 +842,13 @@
 #endif
 +/* Define if the GNU dcgettext() function is already present or preinstalled.
 +   */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_DCGETTEXT
 +#endif
 +
 +
 /* Define to 1 if we found a declaration for 'abort', otherwise define to 0.
    */
 #ifndef USED_FOR_TARGET
@@ -1554,6 +1581,12 @@
 #endif
 +/* Define if the GNU gettext() function is already present or preinstalled. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_GETTEXT
 +#endif
 +
 +
 /* Define to 1 if you have the `gettimeofday' function. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_GETTIMEOFDAY
@@ -1585,7 +1618,7 @@
 #endif
 -/* Define if you have the iconv() function. */
 +/* Define if you have the iconv() function and it works. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_ICONV
 #endif
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index e7df1964a..bd3cfaf60 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -229,10 +229,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
 mrelax
 -Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 +Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
 +mpass-mrelax-to-as
 +Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
 +Pass -mrelax or -mno-relax option to the assembler.
 +
 -param=loongarch-vect-unroll-limit=
 Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
 Used to limit unroll factor which indicates how much the autovectorizer may
 diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
 index 59fa3263d..c8dba2cc4 100644
 --- a/gcc/config/loongarch/loongarch-driver.h
 +++ b/gcc/config/loongarch/loongarch-driver.h
@@ -51,9 +51,23 @@ along with GCC; see the file COPYING3.  If not see
   "%{G*} %{,ada:-gnatea %{mabi=*} -gnatez} " \
   "%(subtarget_cc1_spec)"
 +#if HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION
 +#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mrelax}}"
 +#else
 +#define ASM_MRELAX_DEFAULT "%{!mrelax:%{!mno-relax:-mno-relax}}"
 +#endif
 +
 +#if HAVE_AS_MRELAX_OPTION
 +#define ASM_MRELAX_SPEC \
 +  "%{!mno-pass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
 +#else
 +#define ASM_MRELAX_SPEC \
 +  "%{mpass-mrelax-to-as:%{mrelax} %{mno-relax} " ASM_MRELAX_DEFAULT "}"
 +#endif
 +
 #undef ASM_SPEC
 #define ASM_SPEC \
 -  "%{mabi=*} %{mno-relax} %(subtarget_asm_spec)"
 +  "%{mabi=*} " ASM_MRELAX_SPEC " %(subtarget_asm_spec)"
 extern const char*
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index c4975af00..dfbe9dd5c 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -103,6 +103,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 +#ifndef HAVE_AS_COND_BRANCH_RELAXATION
 +#define HAVE_AS_COND_BRANCH_RELAXATION 0
 +#endif
 +
 #ifndef HAVE_AS_TLS
 #define HAVE_AS_TLS 0
 #endif
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 65ca1489f..6d580ee75 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -7428,7 +7428,7 @@ loongarch_option_override_internal (struct gcc_options *opts,
   if (la_opt_explicit_relocs == M_OPT_UNSET)
     la_opt_explicit_relocs = (HAVE_AS_EXPLICIT_RELOCS
 -			      ? (HAVE_AS_MRELAX_OPTION
 +			      ? (loongarch_mrelax
 				 ? EXPLICIT_RELOCS_AUTO
 				 : EXPLICIT_RELOCS_ALWAYS)
 			      : EXPLICIT_RELOCS_NONE);
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index 44376fd77..d936954b8 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -236,10 +236,14 @@ Target Var(TARGET_DIRECT_EXTERN_ACCESS) Init(0)
 Avoid using the GOT to access external symbols.
 mrelax
 -Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION)
 +Target Var(loongarch_mrelax) Init(HAVE_AS_MRELAX_OPTION && HAVE_AS_COND_BRANCH_RELAXATION)
 Take advantage of linker relaxations to reduce the number of instructions
 required to materialize symbol addresses.
 +mpass-mrelax-to-as
 +Target Var(loongarch_pass_mrelax_to_as) Init(HAVE_AS_MRELAX_OPTION)
 +Pass -mrelax or -mno-relax option to the assembler.
 +
 -param=loongarch-vect-unroll-limit=
 Target Joined UInteger Var(loongarch_vect_unroll_limit) Init(6) IntegerRange(1, 64) Param
 Used to limit unroll factor which indicates how much the autovectorizer may
 diff --git a/gcc/configure b/gcc/configure
 index 430d44dc3..09bacfec3 100755
 --- a/gcc/configure
 +++ b/gcc/configure
@@ -28901,6 +28901,41 @@ if test $gcc_cv_as_loongarch_relax = yes; then
 $as_echo "#define HAVE_AS_MRELAX_OPTION 1" >>confdefs.h
 +fi
 +
 +    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for conditional branch relaxation support" >&5
 +$as_echo_n "checking assembler for conditional branch relaxation support... " >&6; }
 +if ${gcc_cv_as_loongarch_cond_branch_relax+:} false; then :
 +  $as_echo_n "(cached) " >&6
 +else
 +  gcc_cv_as_loongarch_cond_branch_relax=no
 +  if test x$gcc_cv_as != x; then
 +    $as_echo 'a:
 +       .rept 32769
 +       nop
 +       .endr
 +       beq $a0,$a1,a' > conftest.s
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
 +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
 +  (eval $ac_try) 2>&5
 +  ac_status=$?
 +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
 +  test $ac_status = 0; }; }
 +    then
 +	gcc_cv_as_loongarch_cond_branch_relax=yes
 +    else
 +      echo "configure: failed program was" >&5
 +      cat conftest.s >&5
 +    fi
 +    rm -f conftest.o conftest.s
 +  fi
 +fi
 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_cond_branch_relax" >&5
 +$as_echo "$gcc_cv_as_loongarch_cond_branch_relax" >&6; }
 +if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
 +
 +$as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
 +
 fi
     ;;
 diff --git a/gcc/configure.ac b/gcc/configure.ac
 index 4b24db190..a0999152e 100644
 --- a/gcc/configure.ac
 +++ b/gcc/configure.ac
@@ -5341,6 +5341,16 @@ x:
       [-mrelax], [.text],,
       [AC_DEFINE(HAVE_AS_MRELAX_OPTION, 1,
 		[Define if your assembler supports -mrelax option.])])
 +    gcc_GAS_CHECK_FEATURE([conditional branch relaxation support],
 +      gcc_cv_as_loongarch_cond_branch_relax,
 +      [],
 +      [a:
 +       .rept 32769
 +       nop
 +       .endr
 +       beq $a0,$a1,a],,
 +      [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1,
 +		[Define if your assembler supports conditional branch relaxation.])])
     ;;
     s390*-*-*)
     gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
 -- 
 2.43.0
--- a/0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
+++ b/0027-LoongArch-Remove-redundant-barrier-instructions-befo.patch
@ -0,0 +1,391 @@
 From 4498010fba61c1446286c96cbda24d5ed53c53c7 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 6 Nov 2023 16:06:08 +0800
 Subject: [PATCH 027/188] LoongArch: Remove redundant barrier instructions
 before LL-SC loops
 This is isomorphic to the LLVM changes [1-2].
 On LoongArch, the LL and SC instructions has memory barrier semantics:
 - LL: <memory-barrier> + <load-exclusive>
 - SC: <store-conditional> + <memory-barrier>
 But the compare and swap operation is allowed to fail, and if it fails
 the SC instruction is not executed, thus the guarantee of acquiring
 semantics cannot be ensured. Therefore, an acquire barrier needs to be
 generated when failure_memorder includes an acquire operation.
 On CPUs implementing LoongArch v1.10 or later, "dbar 0b10100" is an
 acquire barrier; on CPUs implementing LoongArch v1.00, it is a full
 barrier.  So it's always enough for acquire semantics.  OTOH if an
 acquire semantic is not needed, we still needs the "dbar 0x700" as the
 load-load barrier like all LL-SC loops.
 [1]:https://github.com/llvm/llvm-project/pull/67391
 [2]:https://github.com/llvm/llvm-project/pull/69339
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc
 	(loongarch_memmodel_needs_release_fence): Remove.
 	(loongarch_cas_failure_memorder_needs_acquire): New static
 	function.
 	(loongarch_print_operand): Redefine 'G' for the barrier on CAS
 	failure.
 	* config/loongarch/sync.md (atomic_cas_value_strong<mode>):
 	Remove the redundant barrier before the LL instruction, and
 	emit an acquire barrier on failure if needed by
 	failure_memorder.
 	(atomic_cas_value_cmp_and_7_<mode>): Likewise.
 	(atomic_cas_value_add_7_<mode>): Remove the unnecessary barrier
 	before the LL instruction.
 	(atomic_cas_value_sub_7_<mode>): Likewise.
 	(atomic_cas_value_and_7_<mode>): Likewise.
 	(atomic_cas_value_xor_7_<mode>): Likewise.
 	(atomic_cas_value_or_7_<mode>): Likewise.
 	(atomic_cas_value_nand_7_<mode>): Likewise.
 	(atomic_cas_value_exchange_7_<mode>): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/cas-acquire.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 30 ++++---
 gcc/config/loongarch/sync.md                  | 49 +++++------
 .../gcc.target/loongarch/cas-acquire.c        | 82 +++++++++++++++++++
 3 files changed, 119 insertions(+), 42 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/cas-acquire.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 6d580ee75..8467f03cf 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -5829,27 +5829,27 @@ loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
     }
 }
 -/* Return true if a FENCE should be emitted to before a memory access to
 -   implement the release portion of memory model MODEL.  */
 +/* Return true if a FENCE should be emitted after a failed CAS to
 +   implement the acquire semantic of failure_memorder.  */
 static bool
 -loongarch_memmodel_needs_release_fence (enum memmodel model)
 +loongarch_cas_failure_memorder_needs_acquire (enum memmodel model)
 {
 -  switch (model)
 +  switch (memmodel_base (model))
     {
 +    case MEMMODEL_ACQUIRE:
     case MEMMODEL_ACQ_REL:
     case MEMMODEL_SEQ_CST:
 -    case MEMMODEL_SYNC_SEQ_CST:
 -    case MEMMODEL_RELEASE:
 -    case MEMMODEL_SYNC_RELEASE:
       return true;
 -    case MEMMODEL_ACQUIRE:
 -    case MEMMODEL_CONSUME:
 -    case MEMMODEL_SYNC_ACQUIRE:
     case MEMMODEL_RELAXED:
 +    case MEMMODEL_RELEASE:
       return false;
 +    /* MEMMODEL_CONSUME is deliberately not handled because it's always
 +       replaced by MEMMODEL_ACQUIRE as at now.  If you see an ICE caused by
 +       MEMMODEL_CONSUME, read the change (re)introducing it carefully and
 +       decide what to do.  See PR 59448 and get_memmodel in builtins.cc.  */
     default:
       gcc_unreachable ();
     }
@@ -5962,7 +5962,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
    'd'	Print CONST_INT OP in decimal.
    'E'	Print CONST_INT OP element 0 of a replicated CONST_VECTOR in decimal.
    'F'	Print the FPU branch condition for comparison OP.
 -   'G'	Print a DBAR insn if the memory model requires a release.
 +   'G'	Print a DBAR insn for CAS failure (with an acquire semantic if
 +	needed, otherwise a simple load-load barrier).
    'H'  Print address 52-61bit relocation associated with OP.
    'h'  Print the high-part relocation associated with OP.
    'i'	Print i if the operand is not a register.
@@ -6053,8 +6054,11 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
       break;
     case 'G':
 -      if (loongarch_memmodel_needs_release_fence ((enum memmodel) INTVAL (op)))
 -	fputs ("dbar\t0", file);
 +      if (loongarch_cas_failure_memorder_needs_acquire (
 +	    memmodel_from_int (INTVAL (op))))
 +	fputs ("dbar\t0b10100", file);
 +      else
 +	fputs ("dbar\t0x700", file);
       break;
     case 'h':
 diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
 index efa40f24c..dd1f98946 100644
 --- a/gcc/config/loongarch/sync.md
 +++ b/gcc/config/loongarch/sync.md
@@ -162,19 +162,18 @@
    (clobber (match_scratch:GPR 6 "=&r"))]
   ""
 {
 -  return "%G5\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "bne\\t%0,%z2,2f\\n\\t"
 	 "or%i3\\t%6,$zero,%3\\n\\t"
 	 "sc.<amo>\\t%6,%1\\n\\t"
 -	 "beq\\t$zero,%6,1b\\n\\t"
 +	 "beqz\\t%6,1b\\n\\t"
 	 "b\\t3f\\n\\t"
 	 "2:\\n\\t"
 -	 "dbar\\t0x700\\n\\t"
 +	 "%G5\\n\\t"
 	 "3:\\n\\t";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand" "")   ;; bool output
@@ -267,8 +266,7 @@
    (clobber (match_scratch:GPR 7 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%2\\n\\t"
 	 "bne\\t%7,%z4,2f\\n\\t"
@@ -278,10 +276,10 @@
 	 "beq\\t$zero,%7,1b\\n\\t"
 	 "b\\t3f\\n\\t"
 	 "2:\\n\\t"
 -	 "dbar\\t0x700\\n\\t"
 +	 "%G6\\n\\t"
 	 "3:\\n\\t";
 }
 -  [(set (attr "length") (const_int 40))])
 +  [(set (attr "length") (const_int 36))])
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand" "")   ;; bool output
@@ -336,8 +334,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "add.w\\t%8,%0,%z5\\n\\t"
@@ -347,7 +344,7 @@
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_sub_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
@@ -363,8 +360,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "sub.w\\t%8,%0,%z5\\n\\t"
@@ -373,7 +369,7 @@
 	 "sc.<amo>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_and_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
@@ -389,8 +385,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "and\\t%8,%0,%z5\\n\\t"
@@ -399,7 +394,7 @@
 	 "sc.<amo>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_xor_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
@@ -415,8 +410,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "xor\\t%8,%0,%z5\\n\\t"
@@ -426,7 +420,7 @@
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_or_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
@@ -442,8 +436,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "or\\t%8,%0,%z5\\n\\t"
@@ -453,7 +446,7 @@
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_nand_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")				;; res
@@ -469,8 +462,7 @@
    (clobber (match_scratch:GPR 8 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%3\\n\\t"
 	 "and\\t%8,%0,%z5\\n\\t"
@@ -479,7 +471,7 @@
 	 "sc.<amo>\\t%7,%1\\n\\t"
 	 "beq\\t$zero,%7,1b";
 }
 -  [(set (attr "length") (const_int 32))])
 +  [(set (attr "length") (const_int 28))])
 (define_insn "atomic_cas_value_exchange_7_<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -494,8 +486,7 @@
    (clobber (match_scratch:GPR 7 "=&r"))]
   ""
 {
 -  return "%G6\\n\\t"
 -	 "1:\\n\\t"
 +  return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "and\\t%7,%0,%z3\\n\\t"
 	 "or%i5\\t%7,%7,%5\\n\\t"
 diff --git a/gcc/testsuite/gcc.target/loongarch/cas-acquire.c b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c
 new file mode 100644
 index 000000000..ff7ba866f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/cas-acquire.c
@@ -0,0 +1,82 @@
 +/* { dg-do run } */
 +/* { dg-require-effective-target c99_runtime } */
 +/* { dg-require-effective-target pthread } */
 +/* { dg-options "-std=c99 -pthread" } */
 +
 +/* https://github.com/llvm/llvm-project/pull/67391#issuecomment-1752403934
 +   reported that this had failed with GCC and 3A6000.  */
 +
 +#include <pthread.h>
 +#include <stdatomic.h>
 +#include <stdbool.h>
 +#include <stdio.h>
 +
 +static unsigned int tags[32];
 +static unsigned int vals[32];
 +
 +static void *
 +writer_entry (void *data)
 +{
 +  atomic_uint *pt = (atomic_uint *)tags;
 +  atomic_uint *pv = (atomic_uint *)vals;
 +
 +  for (unsigned int n = 1; n < 10000; n++)
 +    {
 +      atomic_store_explicit (&pv[n & 31], n, memory_order_release);
 +      atomic_store_explicit (&pt[n & 31], n, memory_order_release);
 +    }
 +
 +  return NULL;
 +}
 +
 +static void *
 +reader_entry (void *data)
 +{
 +  atomic_uint *pt = (atomic_uint *)tags;
 +  atomic_uint *pv = (atomic_uint *)vals;
 +  int i;
 +
 +  for (;;)
 +    {
 +      for (i = 0; i < 32; i++)
 +        {
 +          unsigned int tag = 0;
 +          bool res;
 +
 +          res = atomic_compare_exchange_weak_explicit (
 +              &pt[i], &tag, 0, memory_order_acquire, memory_order_acquire);
 +          if (!res)
 +            {
 +              unsigned int val;
 +
 +              val = atomic_load_explicit (&pv[i], memory_order_relaxed);
 +              if (val < tag)
 +                __builtin_trap ();
 +            }
 +        }
 +    }
 +
 +  return NULL;
 +}
 +
 +int
 +main (int argc, char *argv[])
 +{
 +  pthread_t writer;
 +  pthread_t reader;
 +  int res;
 +
 +  res = pthread_create (&writer, NULL, writer_entry, NULL);
 +  if (res < 0)
 +    __builtin_trap ();
 +
 +  res = pthread_create (&reader, NULL, reader_entry, NULL);
 +  if (res < 0)
 +    __builtin_trap ();
 +
 +  res = pthread_join (writer, NULL);
 +  if (res < 0)
 +    __builtin_trap ();
 +
 +  return 0;
 +}
 -- 
 2.43.0
--- a/0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
+++ b/0028-LoongArch-Fix-scan-assembler-times-of-lasx-lsx-test-.patch
@ -0,0 +1,161 @@
 From 9731abbe19b9fad184dfe728bd9b2cc02b40c543 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Thu, 16 Nov 2023 20:31:09 +0800
 Subject: [PATCH 028/188] LoongArch: Fix scan-assembler-times of lasx/lsx test
 case.
 These tests fail when they are first added,this patch adjusts the scan-assembler-times
 to fix them.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-vcond-1.c: Adjust assembler times.
 	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Ditto.
 	* gcc.target/loongarch/vector/lsx/lsx-vcond-1.c: Ditto.
 	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Ditto.
 ---
 .../loongarch/vector/lasx/lasx-vcond-1.c      | 12 +++----
 .../loongarch/vector/lasx/lasx-vcond-2.c      | 36 +++++++++----------
 .../loongarch/vector/lsx/lsx-vcond-1.c        | 12 +++----
 .../loongarch/vector/lsx/lsx-vcond-2.c        | 36 +++++++++----------
 4 files changed, 48 insertions(+), 48 deletions(-)
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
 index ee9cb1a1f..57064eac9 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-1.c
@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
 /* { dg-final { scan-assembler-times {\txvslt\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\txvslt\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvslt\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvslt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvslt\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvslt\.d} 8 } } */
 /* { dg-final { scan-assembler-times {\txvsle\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\txvsle\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvsle\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvsle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvsle\.d} 8 } } */
 /* { dg-final { scan-assembler-times {\txvseq\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\txvseq\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvseq\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvseq\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvseq\.d} 8 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 index 5f40ed44c..55d5a084c 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
@@ -67,21 +67,21 @@ TEST_CMP (nule)
 TEST_CMP (nuge)
 TEST_CMP (nugt)
 -/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 8 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 8 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 8 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 8 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.sle\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cor\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cun\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cueq\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.s} 12 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cule\.d} 12 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.s} 12 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cult\.d} 12 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
 index 138adccfa..8c69f0d9b 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-1.c
@@ -52,13 +52,13 @@ TEST_VAR_ALL (DEF_VCOND_VAR)
 /* { dg-final { scan-assembler-times {\tvslt\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\tvslt\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvslt\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvslt\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvslt\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvslt\.d} 8 } } */
 /* { dg-final { scan-assembler-times {\tvsle\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\tvsle\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvsle\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvsle\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvsle\.d} 8 } } */
 /* { dg-final { scan-assembler-times {\tvseq\.b} 4 } } */
 /* { dg-final { scan-assembler-times {\tvseq\.h} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvseq\.w} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvseq\.d} 4 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.w} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvseq\.d} 8 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 index e8fe31f8f..2214afd0a 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
@@ -67,21 +67,21 @@ TEST_CMP (nule)
 TEST_CMP (nuge)
 TEST_CMP (nugt)
 -/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 2 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 4 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 8 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 8 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 8 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 8 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.sle\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cor\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cun\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.s} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cueq\.d} 6 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.s} 12 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cule\.d} 12 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.s} 12 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cult\.d} 12 } } */
 -- 
 2.43.0
--- a/0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch
+++ b/0029-LoongArch-Increase-cost-of-vector-aligned-store-load.patch
@ -0,0 +1,45 @@
 From 526e1effd86cfa0b1afae88890ce4f74f7150d88 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Thu, 16 Nov 2023 16:44:36 +0800
 Subject: [PATCH 029/188] LoongArch: Increase cost of vector aligned
 store/load.
 Based on SPEC2017 performance evaluation results, it's better to make them equal
 to the cost of unaligned store/load so as to avoid odd alignment peeling.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc
 	(loongarch_builtin_vectorization_cost): Adjust.
 ---
 gcc/config/loongarch/loongarch.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 8467f03cf..b6f0d61ef 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -3889,11 +3889,9 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       case scalar_stmt:
       case scalar_load:
       case vector_stmt:
 -      case vector_load:
       case vec_to_scalar:
       case scalar_to_vec:
       case scalar_store:
 -      case vector_store:
 	return 1;
       case vec_promote_demote:
@@ -3901,6 +3899,8 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	return LASX_SUPPORTED_MODE_P (mode)
 	  && !LSX_SUPPORTED_MODE_P (mode) ? 2 : 1;
 +      case vector_load:
 +      case vector_store:
       case unaligned_load:
       case unaligned_store:
 	return 2;
 -- 
 2.43.0
--- a/0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
+++ b/0030-LoongArch-Implement-C-LT-Z_DEFINED_VALUE_AT_ZERO.patch
@ -0,0 +1,58 @@
 From bd74cb3e1238e842d15bcd4044c9e2f246cc18bc Mon Sep 17 00:00:00 2001
 From: Li Wei <liwei@loongson.cn>
 Date: Fri, 17 Nov 2023 10:38:02 +0800
 Subject: [PATCH 030/188] LoongArch: Implement C[LT]Z_DEFINED_VALUE_AT_ZERO
 The LoongArch has defined ctz and clz on the backend, but if we want GCC
 do CTZ transformation optimization in forwprop2 pass, GCC need to know
 the value of c[lt]z at zero, which may be beneficial for some test cases
 (like spec2017 deepsjeng_r).
 After implementing the macro, we test dynamic instruction count on
 deepsjeng_r:
 - before 1688423249186
 - after  1660311215745 (1.66% reduction)
 gcc/ChangeLog:
 	* config/loongarch/loongarch.h (CLZ_DEFINED_VALUE_AT_ZERO):
 	Implement.
 	(CTZ_DEFINED_VALUE_AT_ZERO): Same.
 gcc/testsuite/ChangeLog:
 	* gcc.dg/pr90838.c: add clz/ctz test support on LoongArch.
 ---
 gcc/config/loongarch/loongarch.h | 5 +++++
 gcc/testsuite/gcc.dg/pr90838.c   | 5 +++++
 2 files changed, 10 insertions(+)
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index 6e8ac293a..19cf6fd33 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -1239,3 +1239,8 @@ struct GTY (()) machine_function
 #define TARGET_EXPLICIT_RELOCS \
   (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
 +
 +#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
 +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
 +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 diff --git a/gcc/testsuite/gcc.dg/pr90838.c b/gcc/testsuite/gcc.dg/pr90838.c
 index 7502b8463..7aa912525 100644
 --- a/gcc/testsuite/gcc.dg/pr90838.c
 +++ b/gcc/testsuite/gcc.dg/pr90838.c
@@ -82,3 +82,8 @@ int ctz4 (unsigned long x)
 /* { dg-final { scan-assembler-times "ctz\t" 3 { target { rv32 } } } } */
 /* { dg-final { scan-assembler-times "andi\t" 1 { target { rv32 } } } } */
 /* { dg-final { scan-assembler-times "mul\t" 1 { target { rv32 } } } } */
 +
 +/* { dg-final { scan-tree-dump-times {= \.CTZ} 4 "forwprop2" { target { loongarch64*-*-* } } } } */
 +/* { dg-final { scan-assembler-times "ctz.d\t" 1 { target { loongarch64*-*-* } } } } */
 +/* { dg-final { scan-assembler-times "ctz.w\t" 3 { target { loongarch64*-*-* } } } } */
 +/* { dg-final { scan-assembler-times "andi\t" 4 { target { loongarch64*-*-* } } } } */
 -- 
 2.43.0
--- a/0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
+++ b/0031-LoongArch-Handle-vectorized-copysign-x-1-expansion-e.patch
@ -0,0 +1,197 @@
 From 61daf071708947ef8431ac36bc6c6b47339fdd2a Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 14 Nov 2023 00:17:19 +0800
 Subject: [PATCH 031/188] LoongArch: Handle vectorized copysign (x, -1)
 expansion efficiently
 With LSX or LASX, copysign (x[i], -1) (or any negative constant) can be
 vectorized using [x]vbitseti.{w/d} instructions to directly set the
 signbits.
 Inspired by Tamar Christina's "AArch64: Handle copysign (x, -1) expansion
 efficiently" (r14-5289).
 gcc/ChangeLog:
 	* config/loongarch/lsx.md (copysign<mode>3): Allow operand[2] to
 	be an reg_or_vector_same_val_operand.  If it's a const vector
 	with same negative elements, expand the copysign with a bitset
 	instruction.  Otherwise, force it into an register.
 	* config/loongarch/lasx.md (copysign<mode>3): Likewise.
 gcc/testsuite/ChangeLog:
 	* g++.target/loongarch/vect-copysign-negconst.C: New test.
 	* g++.target/loongarch/vect-copysign-negconst-run.C: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 22 ++++++++-
 gcc/config/loongarch/lsx.md                   | 22 ++++++++-
 .../loongarch/vect-copysign-negconst-run.C    | 47 +++++++++++++++++++
 .../loongarch/vect-copysign-negconst.C        | 27 +++++++++++
 4 files changed, 116 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
 create mode 100644 gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index f0f2dd08d..2e11f0612 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -3136,11 +3136,31 @@
 	  (match_operand:FLASX 1 "register_operand")))
    (set (match_dup 5)
 	(and:FLASX (match_dup 3)
 -		   (match_operand:FLASX 2 "register_operand")))
 +		   (match_operand:FLASX 2 "reg_or_vector_same_val_operand")))
    (set (match_operand:FLASX 0 "register_operand")
 	(ior:FLASX (match_dup 4) (match_dup 5)))]
   "ISA_HAS_LASX"
 {
 +  /* copysign (x, -1) should instead be expanded as setting the sign
 +     bit.  */
 +  if (!REG_P (operands[2]))
 +    {
 +      rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
 +      if (GET_CODE (op2_elt) == CONST_DOUBLE
 +	  && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 +	{
 +	  rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
 +	  operands[0] = lowpart_subreg (<VIMODE256>mode, operands[0],
 +					<MODE>mode);
 +	  operands[1] = lowpart_subreg (<VIMODE256>mode, operands[1],
 +					<MODE>mode);
 +	  emit_insn (gen_lasx_xvbitseti_<lasxfmt> (operands[0],
 +						   operands[1], n));
 +	  DONE;
 +	}
 +    }
 +
 +  operands[2] = force_reg (<MODE>mode, operands[2]);
   operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
   operands[4] = gen_reg_rtx (<MODE>mode);
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 55c7d79a0..8ea41c85b 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -2873,11 +2873,31 @@
 	  (match_operand:FLSX 1 "register_operand")))
    (set (match_dup 5)
 	(and:FLSX (match_dup 3)
 -		  (match_operand:FLSX 2 "register_operand")))
 +		  (match_operand:FLSX 2 "reg_or_vector_same_val_operand")))
    (set (match_operand:FLSX 0 "register_operand")
 	(ior:FLSX (match_dup 4) (match_dup 5)))]
   "ISA_HAS_LSX"
 {
 +  /* copysign (x, -1) should instead be expanded as setting the sign
 +     bit.  */
 +  if (!REG_P (operands[2]))
 +    {
 +      rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
 +      if (GET_CODE (op2_elt) == CONST_DOUBLE
 +	  && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
 +	{
 +	  rtx n = GEN_INT (8 * GET_MODE_SIZE (<UNITMODE>mode) - 1);
 +	  operands[0] = lowpart_subreg (<VIMODE>mode, operands[0],
 +					<MODE>mode);
 +	  operands[1] = lowpart_subreg (<VIMODE>mode, operands[1],
 +					<MODE>mode);
 +	  emit_insn (gen_lsx_vbitseti_<lsxfmt> (operands[0], operands[1],
 +						n));
 +	  DONE;
 +	}
 +    }
 +
 +  operands[2] = force_reg (<MODE>mode, operands[2]);
   operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
   operands[4] = gen_reg_rtx (<MODE>mode);
 diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
 new file mode 100644
 index 000000000..d2d5d15c9
 --- /dev/null
 +++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst-run.C
@@ -0,0 +1,47 @@
 +/* { dg-do run } */
 +/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
 +/* { dg-require-effective-target loongarch_asx_hw } */
 +
 +#include "vect-copysign-negconst.C"
 +
 +double d[] = {1.2, -3.4, -5.6, 7.8};
 +float f[] = {1.2, -3.4, -5.6, 7.8, -9.0, -11.4, 51.4, 1919.810};
 +
 +double _abs(double x) { return __builtin_fabs (x); }
 +float _abs(float x) { return __builtin_fabsf (x); }
 +
 +template <class T>
 +void
 +check (T *arr, T *orig, int len)
 +{
 +  for (int i = 0; i < len; i++)
 +    {
 +      if (arr[i] > 0)
 +	__builtin_trap ();
 +      if (_abs (arr[i]) != _abs (orig[i]))
 +	__builtin_trap ();
 +    }
 +}
 +
 +int
 +main()
 +{
 +  double test_d[4];
 +  float test_f[8];
 +
 +  __builtin_memcpy (test_d, d, sizeof (test_d));
 +  force_negative<2> (test_d);
 +  check (test_d, d, 2);
 +
 +  __builtin_memcpy (test_d, d, sizeof (test_d));
 +  force_negative<4> (test_d);
 +  check (test_d, d, 4);
 +
 +  __builtin_memcpy (test_f, f, sizeof (test_f));
 +  force_negative<4> (test_f);
 +  check (test_f, f, 4);
 +
 +  __builtin_memcpy (test_f, f, sizeof (test_f));
 +  force_negative<8> (test_f);
 +  check (test_f, f, 8);
 +}
 diff --git a/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
 new file mode 100644
 index 000000000..5e8820d2b
 --- /dev/null
 +++ b/gcc/testsuite/g++.target/loongarch/vect-copysign-negconst.C
@@ -0,0 +1,27 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mlasx -mno-strict-align" } */
 +/* { dg-final { scan-assembler "\txvbitseti.*63" } } */
 +/* { dg-final { scan-assembler "\txvbitseti.*31" } } */
 +/* { dg-final { scan-assembler "\tvbitseti.*63" } } */
 +/* { dg-final { scan-assembler "\tvbitseti.*31" } } */
 +
 +template <int N>
 +__attribute__ ((noipa)) void
 +force_negative (float *arr)
 +{
 +  for (int i = 0; i < N; i++)
 +    arr[i] = __builtin_copysignf (arr[i], -2);
 +}
 +
 +template <int N>
 +__attribute__ ((noipa)) void
 +force_negative (double *arr)
 +{
 +  for (int i = 0; i < N; i++)
 +    arr[i] = __builtin_copysign (arr[i], -3);
 +}
 +
 +template void force_negative<4>(float *);
 +template void force_negative<8>(float *);
 +template void force_negative<2>(double *);
 +template void force_negative<4>(double *);
 -- 
 2.43.0
--- a/0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
+++ b/0032-LoongArch-Add-code-generation-support-for-call36-fun.patch
@ -0,0 +1,561 @@
 From 5ab014701ddd9968855026f0e2ae1af2b165bcd7 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Thu, 16 Nov 2023 15:06:11 +0800
 Subject: [PATCH 032/188] LoongArch: Add code generation support for call36
 function calls.
 When compiling with '-mcmodel=medium', the function call is made through
 'pcaddu18i+jirl' if binutils supports call36, otherwise the
 native implementation 'pcalau12i+jirl' is used.
 gcc/ChangeLog:
 	* config.in: Regenerate.
 	* config/loongarch/loongarch-opts.h (HAVE_AS_SUPPORT_CALL36): Define macro.
 	* config/loongarch/loongarch.cc (loongarch_legitimize_call_address):
 	If binutils supports call36, the function call is not split over expand.
 	* config/loongarch/loongarch.md: Add call36 generation code.
 	* config/loongarch/predicates.md: Likewise.
 	* configure: Regenerate.
 	* configure.ac: Check whether binutils supports call36.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/func-call-medium-5.c: If the assembler supports call36,
 	the test is abandoned.
 	* gcc.target/loongarch/func-call-medium-6.c: Likewise.
 	* gcc.target/loongarch/func-call-medium-7.c: Likewise.
 	* gcc.target/loongarch/func-call-medium-8.c: Likewise.
 	* lib/target-supports.exp: Added a function to see if the assembler supports
 	the call36 relocation.
 	* gcc.target/loongarch/func-call-medium-call36-1.c: New test.
 	* gcc.target/loongarch/func-call-medium-call36.c: New test.
 Co-authored-by: Xi Ruoyao <xry111@xry111.site>
 ---
 gcc/config.in                                 |   6 +
 gcc/config/loongarch/loongarch-opts.h         |   4 +
 gcc/config/loongarch/loongarch.cc             |  12 +-
 gcc/config/loongarch/loongarch.md             | 171 +++++++++++++++---
 gcc/config/loongarch/predicates.md            |   7 +-
 gcc/configure                                 |  32 ++++
 gcc/configure.ac                              |   6 +
 .../gcc.target/loongarch/func-call-medium-5.c |   1 +
 .../gcc.target/loongarch/func-call-medium-6.c |   1 +
 .../gcc.target/loongarch/func-call-medium-7.c |   1 +
 .../gcc.target/loongarch/func-call-medium-8.c |   1 +
 .../loongarch/func-call-medium-call36-1.c     |  21 +++
 .../loongarch/func-call-medium-call36.c       |  32 ++++
 gcc/testsuite/lib/target-supports.exp         |   9 +
 14 files changed, 268 insertions(+), 36 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
 diff --git a/gcc/config.in b/gcc/config.in
 index 04968b53c..033cfb98b 100644
 --- a/gcc/config.in
 +++ b/gcc/config.in
@@ -759,6 +759,12 @@
 #endif
 +/* Define if your assembler supports call36 relocation. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_AS_SUPPORT_CALL36
 +#endif
 +
 +
 /* Define if your assembler and linker support thread-local storage. */
 #ifndef USED_FOR_TARGET
 #undef HAVE_AS_TLS
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index dfbe9dd5c..22ce1a122 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -99,6 +99,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 #define HAVE_AS_EXPLICIT_RELOCS 0
 #endif
 +#ifndef HAVE_AS_SUPPORT_CALL36
 +#define HAVE_AS_SUPPORT_CALL36 0
 +#endif
 +
 #ifndef HAVE_AS_MRELAX_OPTION
 #define HAVE_AS_MRELAX_OPTION 0
 #endif
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index b6f0d61ef..43f0e82ba 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -3002,12 +3002,16 @@ loongarch_legitimize_call_address (rtx addr)
   enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr);
 -  /* Split function call insn 'bl sym' or 'bl %plt(sym)' to :
 -     pcalau12i $rd, %pc_hi20(sym)
 -     jr $rd, %pc_lo12(sym).  */
 +  /* If add the compilation option '-cmodel=medium', and the assembler does
 +     not support call36.  The following sequence of instructions will be
 +     used for the function call:
 +	pcalau12i $rd, %pc_hi20(sym)
 +	jr $rd, %pc_lo12(sym)
 +  */
   if (TARGET_CMODEL_MEDIUM
 -      && TARGET_EXPLICIT_RELOCS
 +      && !HAVE_AS_SUPPORT_CALL36
 +      && (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE)
       && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr))
       && (symbol_type == SYMBOL_PCREL
 	  || (symbol_type == SYMBOL_GOT_DISP && flag_plt)))
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index ed86c95bd..52e40a208 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -3274,7 +3274,13 @@
 					    XEXP (target, 1),
 					    operands[1]));
   else
 -    emit_call_insn (gen_sibcall_internal (target, operands[1]));
 +    {
 +      rtx call = emit_call_insn (gen_sibcall_internal (target, operands[1]));
 +
 +      if (TARGET_CMODEL_MEDIUM && !REG_P (target))
 +	clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
 +		     gen_rtx_REG (Pmode, T0_REGNUM));
 +    }
   DONE;
 })
@@ -3282,10 +3288,25 @@
   [(call (mem:SI (match_operand 0 "call_insn_operand" "j,c,b"))
 	 (match_operand 1 "" ""))]
   "SIBLING_CALL_P (insn)"
 -  "@
 -   jr\t%0
 -   b\t%0
 -   b\t%%plt(%0)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jr\t%0";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%0";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%0)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%%plt(%0)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@sibcall_internal_1<mode>"
@@ -3318,9 +3339,17 @@
 							   operands[2],
 							   arg2));
       else
 -	emit_call_insn (gen_sibcall_value_multiple_internal (arg1, target,
 -							   operands[2],
 -							   arg2));
 +	{
 +	  rtx call
 +	    = emit_call_insn (gen_sibcall_value_multiple_internal (arg1,
 +								   target,
 +								   operands[2],
 +								   arg2));
 +
 +	  if (TARGET_CMODEL_MEDIUM && !REG_P (target))
 +	    clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
 +			gen_rtx_REG (Pmode, T0_REGNUM));
 +	}
     }
    else
     {
@@ -3334,8 +3363,15 @@
 						  XEXP (target, 1),
 						  operands[2]));
       else
 -	emit_call_insn (gen_sibcall_value_internal (operands[0], target,
 -						  operands[2]));
 +	{
 +	  rtx call = emit_call_insn (gen_sibcall_value_internal (operands[0],
 +								 target,
 +								 operands[2]));
 +
 +	  if (TARGET_CMODEL_MEDIUM && !REG_P (target))
 +	    clobber_reg (&CALL_INSN_FUNCTION_USAGE (call),
 +			gen_rtx_REG (Pmode, T0_REGNUM));
 +	}
     }
   DONE;
 })
@@ -3345,10 +3381,25 @@
 	(call (mem:SI (match_operand 1 "call_insn_operand" "j,c,b"))
 	      (match_operand 2 "" "")))]
   "SIBLING_CALL_P (insn)"
 -  "@
 -   jr\t%1
 -   b\t%1
 -   b\t%%plt(%1)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jr\t%1";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%1";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%%plt(%1)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@sibcall_value_internal_1<mode>"
@@ -3368,10 +3419,25 @@
 	(call (mem:SI (match_dup 1))
 	      (match_dup 2)))]
   "SIBLING_CALL_P (insn)"
 -  "@
 -   jr\t%1
 -   b\t%1
 -   b\t%%plt(%1)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jr\t%1";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%1";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r12,%%call36(%1)\n\tjirl\t$r0,$r12,0";
 +      else
 +	return "b\t%%plt(%1)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@sibcall_value_multiple_internal_1<mode>"
@@ -3411,10 +3477,25 @@
 	 (match_operand 1 "" ""))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
 -  "@
 -   jirl\t$r1,%0,0
 -   bl\t%0
 -   bl\t%%plt(%0)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jirl\t$r1,%0,0";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%0";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%0)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%%plt(%0)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@call_internal_1<mode>"
@@ -3473,10 +3554,25 @@
 	      (match_operand 2 "" "")))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
 -  "@
 -   jirl\t$r1,%1,0
 -   bl\t%1
 -   bl\t%%plt(%1)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jirl\t$r1,%1,0";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%1";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%%plt(%1)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@call_value_internal_1<mode>"
@@ -3498,10 +3594,25 @@
 	      (match_dup 2)))
    (clobber (reg:SI RETURN_ADDR_REGNUM))]
   ""
 -  "@
 -   jirl\t$r1,%1,0
 -   bl\t%1
 -   bl\t%%plt(%1)"
 +{
 +  switch (which_alternative)
 +    {
 +    case 0:
 +      return "jirl\t$r1,%1,0";
 +    case 1:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%1";
 +    case 2:
 +      if (TARGET_CMODEL_MEDIUM)
 +	return "pcaddu18i\t$r1,%%call36(%1)\n\tjirl\t$r1,$r1,0";
 +      else
 +	return "bl\t%%plt(%1)";
 +    default:
 +      gcc_unreachable ();
 +    }
 +}
   [(set_attr "jirl" "indirect,direct,direct")])
 (define_insn "@call_value_multiple_internal_1<mode>"
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 1d669f560..2aae87db4 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -443,7 +443,9 @@
     {
     case SYMBOL_PCREL:
       if (TARGET_CMODEL_EXTREME
 -	  || (TARGET_CMODEL_MEDIUM && !TARGET_EXPLICIT_RELOCS))
 +	  || (TARGET_CMODEL_MEDIUM
 +	      && HAVE_AS_SUPPORT_CALL36
 +	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
 	return false;
       else
 	return 1;
@@ -452,7 +454,8 @@
       if (TARGET_CMODEL_EXTREME
 	  || !flag_plt
 	  || (flag_plt && TARGET_CMODEL_MEDIUM
 -	      && !TARGET_EXPLICIT_RELOCS))
 +	      && HAVE_AS_SUPPORT_CALL36
 +	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
 	return false;
       else
 	return 1;
 diff --git a/gcc/configure b/gcc/configure
 index 09bacfec3..5842e7a18 100755
 --- a/gcc/configure
 +++ b/gcc/configure
@@ -28836,6 +28836,38 @@ if test $gcc_cv_as_loongarch_explicit_relocs = yes; then
 $as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
 +fi
 +
 +    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for call36 relocation support" >&5
 +$as_echo_n "checking assembler for call36 relocation support... " >&6; }
 +if ${gcc_cv_as_loongarch_call36+:} false; then :
 +  $as_echo_n "(cached) " >&6
 +else
 +  gcc_cv_as_loongarch_call36=no
 +  if test x$gcc_cv_as != x; then
 +    $as_echo 'pcaddu18i $r1, %call36(a)
 +       jirl $r1, $r1, 0' > conftest.s
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
 +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
 +  (eval $ac_try) 2>&5
 +  ac_status=$?
 +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
 +  test $ac_status = 0; }; }
 +    then
 +	gcc_cv_as_loongarch_call36=yes
 +    else
 +      echo "configure: failed program was" >&5
 +      cat conftest.s >&5
 +    fi
 +    rm -f conftest.o conftest.s
 +  fi
 +fi
 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_call36" >&5
 +$as_echo "$gcc_cv_as_loongarch_call36" >&6; }
 +if test $gcc_cv_as_loongarch_call36 = yes; then
 +
 +$as_echo "#define HAVE_AS_SUPPORT_CALL36 1" >>confdefs.h
 +
 fi
     { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for eh_frame pcrel encoding support" >&5
 diff --git a/gcc/configure.ac b/gcc/configure.ac
 index a0999152e..9c3fd3ad6 100644
 --- a/gcc/configure.ac
 +++ b/gcc/configure.ac
@@ -5329,6 +5329,12 @@ x:
       [a:pcalau12i $t0,%pc_hi20(a)],,
       [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
 	  [Define if your assembler supports explicit relocation.])])
 +    gcc_GAS_CHECK_FEATURE([call36 relocation support],
 +      gcc_cv_as_loongarch_call36,,
 +      [pcaddu18i $r1, %call36(a)
 +       jirl $r1, $r1, 0],,
 +      [AC_DEFINE(HAVE_AS_SUPPORT_CALL36, 1,
 +	  [Define if your assembler supports call36 relocation.])])
     gcc_GAS_CHECK_FEATURE([eh_frame pcrel encoding support],
       gcc_cv_as_loongarch_eh_frame_pcrel_encoding_support,,
       [.cfi_startproc
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
 index 8a47b5afc..cae880bd8 100644
 --- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-5.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
 +/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
 /* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=medium" } */
 /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */
 /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
 index 1e75e60e0..33819542d 100644
 --- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-6.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
 +/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
 /* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=medium" } */
 /* { dg-final { scan-assembler "test:.*pcalau12i.*%pc_hi20\\(g\\)\n\tjirl.*pc_lo12\\(g\\)" } } */
 /* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
 index 9e89085ca..969b59d04 100644
 --- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-7.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
 +/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
 /* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=medium" } */
 /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
 /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
 index fde9c6e0e..786ff395f 100644
 --- a/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-8.c
@@ -1,4 +1,5 @@
 /* { dg-do compile } */
 +/* { dg-skip-if "dg-require-effective-target loongarch_call36_support" { *-*-* } } */
 /* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=medium" } */
 /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
 /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%pc_hi20\\(f\\)\n\tjirl.*%pc_lo12\\(f\\)" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
 new file mode 100644
 index 000000000..872ff32f8
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36-1.c
@@ -0,0 +1,21 @@
 +/* { dg-do compile } */
 +/* { dg-require-effective-target loongarch_call36_support } */
 +/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */
 +/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r1,%call36\\(func\\)" } } */
 +/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r1,%call36\\(func_value\\)" } } */
 +
 +extern void func (void);
 +int
 +test (void)
 +{
 +  func ();
 +}
 +
 +
 +extern int func_value (void);
 +float
 +test_value (void)
 +{
 +  func_value ();
 +}
 +
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
 new file mode 100644
 index 000000000..98ccd260d
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-medium-call36.c
@@ -0,0 +1,32 @@
 +/* { dg-do compile } */
 +/* { dg-require-effective-target loongarch_call36_support } */
 +/* { dg-options "-mcmodel=medium -mexplicit-relocs -fdump-rtl-final -O2" } */
 +/* { dg-final { scan-rtl-dump-times "\\(clobber \\(reg:DI 12 \\\$r12\\)\\)" 3 "final" } } */
 +/* { dg-final { scan-assembler "test:.*pcaddu18i\t\\\$r12,%call36\\(func\\)" } } */
 +/* { dg-final { scan-assembler "test_value:.*pcaddu18i\t\\\$r12,%call36\\(func_value\\)" } } */
 +/* { dg-final { scan-assembler "test_multi:.*pcaddu18i\t\\\$r12,%call36\\(func_multi\\)" } } */
 +
 +extern void func (void);
 +void
 +test (void)
 +{
 +  func();
 +}
 +
 +
 +extern int func_value (void);
 +int
 +test_value (void)
 +{
 +  func_value ();
 +}
 +
 +struct t {float a; float b;};
 +
 +extern struct t func_multi (void);
 +struct t
 +test_multi (void)
 +{
 +  func_multi ();
 +}
 +
 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
 index bbe145c1c..b8bff1a31 100644
 --- a/gcc/testsuite/lib/target-supports.exp
 +++ b/gcc/testsuite/lib/target-supports.exp
@@ -10573,6 +10573,15 @@ proc check_effective_target_loongarch_asx_hw { } {
     } "-mlasx"]
 }
 +# Check whether LoongArch binutils supports call36 relocation.
 +proc check_effective_target_loongarch_call36_support { } {
 +  return [check_no_compiler_messages loongarch_call36_support object {
 +/* Assembly code */
 +   pcaddu18i $r1,%call36(a)
 +   jirl $r1,$r1,0
 +  } ""]
 +}
 +
 # Return 1 if the target does *not* require strict alignment.
 proc check_effective_target_non_strict_align {} {
 -- 
 2.43.0
--- a/0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
+++ b/0033-LoongArch-Implement-atomic-operations-using-LoongArc.patch
@ -0,0 +1,362 @@
 From 704e67084fcd7f3ea89321e17dfafa7e907c907c Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Fri, 17 Nov 2023 15:42:53 +0800
 Subject: [PATCH 033/188] LoongArch: Implement atomic operations using
 LoongArch1.1 instructions.
 1. short and char type calls for atomic_add_fetch and __atomic_fetch_add are
   implemented using amadd{_db}.{b/h}.
 2. Use amcas{_db}.{b/h/w/d} to implement __atomic_compare_exchange_n and __atomic_compare_exchange.
 3. The short and char types of the functions __atomic_exchange and __atomic_exchange_n are
   implemented using amswap{_db}.{b/h}.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-def.h: Add comments.
 	* config/loongarch/loongarch-opts.h (ISA_BASE_IS_LA64V110): Define macro.
 	* config/loongarch/loongarch.cc (loongarch_memmodel_needs_rel_acq_fence):
 	Remove redundant code implementations.
 	* config/loongarch/sync.md (d): Added QI, HI support.
 	(atomic_add<mode>): New template.
 	(atomic_exchange<mode>_short): Likewise.
 	(atomic_cas_value_strong<mode>_amcas): Likewise..
 	(atomic_fetch_add<mode>_short): Likewise.
 ---
 gcc/config/loongarch/loongarch-def.h  |   2 +
 gcc/config/loongarch/loongarch-opts.h |   2 +-
 gcc/config/loongarch/loongarch.cc     |   6 +-
 gcc/config/loongarch/sync.md          | 186 ++++++++++++++++++++------
 4 files changed, 147 insertions(+), 49 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index 4757de14b..078d8607d 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -54,7 +54,9 @@ extern "C" {
 /* enum isa_base */
 extern const char* loongarch_isa_base_strings[];
 +/* LoongArch V1.00.  */
 #define ISA_BASE_LA64V100     0
 +/* LoongArch V1.10.  */
 #define ISA_BASE_LA64V110     1
 #define N_ISA_BASE_TYPES      2
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 22ce1a122..9b3d023ac 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -86,10 +86,10 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 				   || la_target.isa.simd == ISA_EXT_SIMD_LASX)
 #define ISA_HAS_LASX		  (la_target.isa.simd == ISA_EXT_SIMD_LASX)
 -
 /* TARGET_ macros for use in *.md template conditionals */
 #define TARGET_uARCH_LA464	  (la_target.cpu_tune == CPU_LA464)
 #define TARGET_uARCH_LA664	  (la_target.cpu_tune == CPU_LA664)
 +#define ISA_BASE_IS_LA64V110	  (la_target.isa.base == ISA_BASE_LA64V110)
 /* Note: optimize_size may vary across functions,
    while -m[no]-memcpy imposes a global constraint.  */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 43f0e82ba..7bb46a45d 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -5813,16 +5813,12 @@ loongarch_print_operand_punct_valid_p (unsigned char code)
 static bool
 loongarch_memmodel_needs_rel_acq_fence (enum memmodel model)
 {
 -  switch (model)
 +  switch (memmodel_base (model))
     {
       case MEMMODEL_ACQ_REL:
       case MEMMODEL_SEQ_CST:
 -      case MEMMODEL_SYNC_SEQ_CST:
       case MEMMODEL_RELEASE:
 -      case MEMMODEL_SYNC_RELEASE:
       case MEMMODEL_ACQUIRE:
 -      case MEMMODEL_CONSUME:
 -      case MEMMODEL_SYNC_ACQUIRE:
 	return true;
       case MEMMODEL_RELAXED:
 diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
 index dd1f98946..1eabaec04 100644
 --- a/gcc/config/loongarch/sync.md
 +++ b/gcc/config/loongarch/sync.md
@@ -38,7 +38,7 @@
   [(plus "add") (ior "or") (xor "xor") (and "and")])
 ;; This attribute gives the format suffix for atomic memory operations.
 -(define_mode_attr amo [(SI "w") (DI "d")])
 +(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")])
 ;; <amop> expands to the name of the atomic operand that implements a
 ;; particular code.
@@ -123,7 +123,18 @@
 	 UNSPEC_SYNC_OLD_OP))]
   ""
   "am<amop>%A2.<amo>\t$zero,%z1,%0"
 -  [(set (attr "length") (const_int 8))])
 +  [(set (attr "length") (const_int 4))])
 +
 +(define_insn "atomic_add<mode>"
 +  [(set (match_operand:SHORT 0 "memory_operand" "+ZB")
 +	(unspec_volatile:SHORT
 +	  [(plus:SHORT (match_dup 0)
 +		       (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
 +	   (match_operand:SI 2 "const_int_operand")] ;; model
 +	 UNSPEC_SYNC_OLD_OP))]
 +  "ISA_BASE_IS_LA64V110"
 +  "amadd%A2.<amo>\t$zero,%z1,%0"
 +  [(set (attr "length") (const_int 4))])
 (define_insn "atomic_fetch_<atomic_optab><mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -131,12 +142,12 @@
    (set (match_dup 1)
 	(unspec_volatile:GPR
 	  [(any_atomic:GPR (match_dup 1)
 -		     (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
 +			   (match_operand:GPR 2 "reg_or_0_operand" "rJ"))
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
   ""
   "am<amop>%A3.<amo>\t%0,%z2,%1"
 -  [(set (attr "length") (const_int 8))])
 +  [(set (attr "length") (const_int 4))])
 (define_insn "atomic_exchange<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -148,7 +159,19 @@
 	(match_operand:GPR 2 "register_operand" "r"))]
   ""
   "amswap%A3.<amo>\t%0,%z2,%1"
 -  [(set (attr "length") (const_int 8))])
 +  [(set (attr "length") (const_int 4))])
 +
 +(define_insn "atomic_exchange<mode>_short"
 +  [(set (match_operand:SHORT 0 "register_operand" "=&r")
 +	(unspec_volatile:SHORT
 +	  [(match_operand:SHORT 1 "memory_operand" "+ZB")
 +	   (match_operand:SI 3 "const_int_operand")] ;; model
 +	  UNSPEC_SYNC_EXCHANGE))
 +   (set (match_dup 1)
 +	(match_operand:SHORT 2 "register_operand" "r"))]
 +  "ISA_BASE_IS_LA64V110"
 +  "amswap%A3.<amo>\t%0,%z2,%1"
 +  [(set (attr "length") (const_int 4))])
 (define_insn "atomic_cas_value_strong<mode>"
   [(set (match_operand:GPR 0 "register_operand" "=&r")
@@ -156,25 +179,36 @@
    (set (match_dup 1)
 	(unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ")
 			      (match_operand:GPR 3 "reg_or_0_operand" "rJ")
 -			      (match_operand:SI 4 "const_int_operand")  ;; mod_s
 -			      (match_operand:SI 5 "const_int_operand")] ;; mod_f
 +			      (match_operand:SI 4 "const_int_operand")]  ;; mod_s
 	 UNSPEC_COMPARE_AND_SWAP))
 -   (clobber (match_scratch:GPR 6 "=&r"))]
 +   (clobber (match_scratch:GPR 5 "=&r"))]
   ""
 {
   return "1:\\n\\t"
 	 "ll.<amo>\\t%0,%1\\n\\t"
 	 "bne\\t%0,%z2,2f\\n\\t"
 -	 "or%i3\\t%6,$zero,%3\\n\\t"
 -	 "sc.<amo>\\t%6,%1\\n\\t"
 -	 "beqz\\t%6,1b\\n\\t"
 +	 "or%i3\\t%5,$zero,%3\\n\\t"
 +	 "sc.<amo>\\t%5,%1\\n\\t"
 +	 "beqz\\t%5,1b\\n\\t"
 	 "b\\t3f\\n\\t"
 	 "2:\\n\\t"
 -	 "%G5\\n\\t"
 +	 "%G4\\n\\t"
 	 "3:\\n\\t";
 }
   [(set (attr "length") (const_int 28))])
 +(define_insn "atomic_cas_value_strong<mode>_amcas"
 +  [(set (match_operand:QHWD 0 "register_operand" "=&r")
 +	(match_operand:QHWD 1 "memory_operand" "+ZB"))
 +   (set (match_dup 1)
 +	(unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ")
 +			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
 +			       (match_operand:SI 4 "const_int_operand")]  ;; mod_s
 +	 UNSPEC_COMPARE_AND_SWAP))]
 +  "ISA_BASE_IS_LA64V110"
 +  "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
 +  [(set (attr "length") (const_int 8))])
 +
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:SI 0 "register_operand" "")   ;; bool output
    (match_operand:GPR 1 "register_operand" "")  ;; val output
@@ -186,9 +220,29 @@
    (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
   ""
 {
 -  emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
 -						operands[3], operands[4],
 -						operands[6], operands[7]));
 +  rtx mod_s, mod_f;
 +
 +  mod_s = operands[6];
 +  mod_f = operands[7];
 +
 +  /* Normally the succ memory model must be stronger than fail, but in the
 +     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
 +     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
 +
 +  if (is_mm_acquire (memmodel_base (INTVAL (mod_f)))
 +      && is_mm_release (memmodel_base (INTVAL (mod_s))))
 +    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 +
 +  operands[6] = mod_s;
 +
 +  if (ISA_BASE_IS_LA64V110)
 +    emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 +							 operands[3], operands[4],
 +							 operands[6]));
 +  else
 +    emit_insn (gen_atomic_cas_value_strong<mode> (operands[1], operands[2],
 +						  operands[3], operands[4],
 +						  operands[6]));
   rtx compare = operands[1];
   if (operands[3] != const0_rtx)
@@ -292,31 +346,53 @@
    (match_operand:SI 7 "const_int_operand" "")] ;; mod_f
   ""
 {
 -  union loongarch_gen_fn_ptrs generator;
 -  generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
 -  loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
 -				operands[3], operands[4], operands[7]);
 +  rtx mod_s, mod_f;
 -  rtx compare = operands[1];
 -  if (operands[3] != const0_rtx)
 -    {
 -      machine_mode mode = GET_MODE (operands[3]);
 -      rtx op1 = convert_modes (SImode, mode, operands[1], true);
 -      rtx op3 = convert_modes (SImode, mode, operands[3], true);
 -      rtx difference = gen_rtx_MINUS (SImode, op1, op3);
 -      compare = gen_reg_rtx (SImode);
 -      emit_insn (gen_rtx_SET (compare, difference));
 -    }
 +  mod_s = operands[6];
 +  mod_f = operands[7];
 -  if (word_mode != <MODE>mode)
 +  /* Normally the succ memory model must be stronger than fail, but in the
 +     unlikely event of fail being ACQUIRE and succ being RELEASE we need to
 +     promote succ to ACQ_REL so that we don't lose the acquire semantics.  */
 +
 +  if (is_mm_acquire (memmodel_base (INTVAL (mod_f)))
 +      && is_mm_release (memmodel_base (INTVAL (mod_s))))
 +    mod_s = GEN_INT (MEMMODEL_ACQ_REL);
 +
 +  operands[6] = mod_s;
 +
 +  if (ISA_BASE_IS_LA64V110)
 +    emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 +						       operands[3], operands[4],
 +						       operands[6]));
 +  else
     {
 -      rtx reg = gen_reg_rtx (word_mode);
 -      emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
 -      compare = reg;
 +      union loongarch_gen_fn_ptrs generator;
 +      generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si;
 +      loongarch_expand_atomic_qihi (generator, operands[1], operands[2],
 +				    operands[3], operands[4], operands[6]);
     }
 -  emit_insn (gen_rtx_SET (operands[0],
 -			  gen_rtx_EQ (SImode, compare, const0_rtx)));
 +      rtx compare = operands[1];
 +      if (operands[3] != const0_rtx)
 +	{
 +	  machine_mode mode = GET_MODE (operands[3]);
 +	  rtx op1 = convert_modes (SImode, mode, operands[1], true);
 +	  rtx op3 = convert_modes (SImode, mode, operands[3], true);
 +	  rtx difference = gen_rtx_MINUS (SImode, op1, op3);
 +	  compare = gen_reg_rtx (SImode);
 +	  emit_insn (gen_rtx_SET (compare, difference));
 +	}
 +
 +      if (word_mode != <MODE>mode)
 +	{
 +	  rtx reg = gen_reg_rtx (word_mode);
 +	  emit_insn (gen_rtx_SET (reg, gen_rtx_SIGN_EXTEND (word_mode, compare)));
 +	  compare = reg;
 +	}
 +
 +      emit_insn (gen_rtx_SET (operands[0],
 +			      gen_rtx_EQ (SImode, compare, const0_rtx)));
   DONE;
 })
@@ -505,13 +581,31 @@
 	(match_operand:SHORT 2 "register_operand"))]
   ""
 {
 -  union loongarch_gen_fn_ptrs generator;
 -  generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
 -  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
 -				const0_rtx, operands[2], operands[3]);
 +  if (ISA_BASE_IS_LA64V110)
 +    emit_insn (gen_atomic_exchange<mode>_short (operands[0], operands[1], operands[2], operands[3]));
 +  else
 +    {
 +      union loongarch_gen_fn_ptrs generator;
 +      generator.fn_7 = gen_atomic_cas_value_exchange_7_si;
 +      loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
 +				    const0_rtx, operands[2], operands[3]);
 +    }
   DONE;
 })
 +(define_insn "atomic_fetch_add<mode>_short"
 +  [(set (match_operand:SHORT 0 "register_operand" "=&r")
 +	(match_operand:SHORT 1 "memory_operand" "+ZB"))
 +   (set (match_dup 1)
 +	(unspec_volatile:SHORT
 +	  [(plus:SHORT (match_dup 1)
 +		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
 +	   (match_operand:SI 3 "const_int_operand")] ;; model
 +	 UNSPEC_SYNC_OLD_OP))]
 +  "ISA_BASE_IS_LA64V110"
 +  "amadd%A3.<amo>\t%0,%z2,%1"
 +  [(set (attr "length") (const_int 4))])
 +
 (define_expand "atomic_fetch_add<mode>"
   [(set (match_operand:SHORT 0 "register_operand" "=&r")
 	(match_operand:SHORT 1 "memory_operand" "+ZB"))
@@ -523,10 +617,16 @@
 	 UNSPEC_SYNC_OLD_OP))]
   ""
 {
 -  union loongarch_gen_fn_ptrs generator;
 -  generator.fn_7 = gen_atomic_cas_value_add_7_si;
 -  loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
 -				operands[1], operands[2], operands[3]);
 +  if (ISA_BASE_IS_LA64V110)
 +    emit_insn (gen_atomic_fetch_add<mode>_short (operands[0], operands[1],
 +					     operands[2], operands[3]));
 +  else
 +    {
 +      union loongarch_gen_fn_ptrs generator;
 +      generator.fn_7 = gen_atomic_cas_value_add_7_si;
 +      loongarch_expand_atomic_qihi (generator, operands[0], operands[1],
 +				    operands[1], operands[2], operands[3]);
 +    }
   DONE;
 })
 -- 
 2.43.0
--- a/0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
+++ b/0034-LoongArch-atomic_load-and-atomic_store-are-implement.patch
@ -0,0 +1,140 @@
 From 61a70e6b6b44bf420eae559d998e109b70e5a9b6 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Fri, 17 Nov 2023 16:04:45 +0800
 Subject: [PATCH 034/188] LoongArch: atomic_load and atomic_store are
 implemented using dbar grading.
 Because the la464 memory model design allows the same address load out of order,
 so in the following test example, the Load of 23 lines may be executed first over
 the load of 21 lines, resulting in an error.
 So when memmodel is MEMMODEL_RELAXED, the load instruction will be followed by
 "dbar 0x700" when implementing _atomic_load.
  1 void *
  2 gomp_ptrlock_get_slow (gomp_ptrlock_t *ptrlock)
  3 {
  4   int *intptr;
  5   uintptr_t oldval = 1;
  6
  7   __atomic_compare_exchange_n (ptrlock, &oldval, 2, false,
  8                                MEMMODEL_RELAXED, MEMMODEL_RELAXED);
  9
 10   /* futex works on ints, not pointers.
 11      But a valid work share pointer will be at least
 12      8 byte aligned, so it is safe to assume the low
 13      32-bits of the pointer won't contain values 1 or 2.  */
 14   __asm volatile ("" : "=r" (intptr) : "0" (ptrlock));
 15 #if __BYTE_ORDER == __BIG_ENDIAN
 16   if (sizeof (*ptrlock) > sizeof (int))
 17     intptr += (sizeof (*ptrlock) / sizeof (int)) - 1;
 18 #endif
 19   do
 20     do_wait (intptr, 2);
 21   while (__atomic_load_n (intptr, MEMMODEL_RELAXED) == 2);
 22   __asm volatile ("" : : : "memory");
 23   return (void *) __atomic_load_n (ptrlock, MEMMODEL_ACQUIRE);
 24 }
 gcc/ChangeLog:
 	* config/loongarch/sync.md (atomic_load<mode>): New template.
 ---
 gcc/config/loongarch/sync.md | 70 +++++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 5 deletions(-)
 diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
 index 1eabaec04..f4673c856 100644
 --- a/gcc/config/loongarch/sync.md
 +++ b/gcc/config/loongarch/sync.md
@@ -30,6 +30,7 @@
   UNSPEC_SYNC_OLD_OP
   UNSPEC_SYNC_EXCHANGE
   UNSPEC_ATOMIC_STORE
 +  UNSPEC_ATOMIC_LOAD
   UNSPEC_MEMORY_BARRIER
 ])
@@ -103,16 +104,75 @@
 ;; Atomic memory operations.
 +(define_insn "atomic_load<mode>"
 +  [(set (match_operand:QHWD 0 "register_operand" "=r")
 +    (unspec_volatile:QHWD
 +      [(match_operand:QHWD 1 "memory_operand" "+m")
 +       (match_operand:SI 2 "const_int_operand")]                        ;; model
 +      UNSPEC_ATOMIC_LOAD))]
 +  ""
 +{
 +  enum memmodel model = memmodel_base (INTVAL (operands[2]));
 +
 +  switch (model)
 +    {
 +    case MEMMODEL_SEQ_CST:
 +      return "dbar\t0x11\\n\\t"
 +	     "ld.<size>\t%0,%1\\n\\t"
 +	     "dbar\t0x14\\n\\t";
 +    case MEMMODEL_ACQUIRE:
 +      return "ld.<size>\t%0,%1\\n\\t"
 +	     "dbar\t0x14\\n\\t";
 +    case MEMMODEL_RELAXED:
 +      return "ld.<size>\t%0,%1\\n\\t"
 +	     "dbar\t0x700\\n\\t";
 +
 +    default:
 +      /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
 +	 __ATOMIC_CONSUME and __ATOMIC_ACQUIRE.
 +	 The expand_builtin_atomic_store function converts all invalid memmodels
 +	 to MEMMODEL_SEQ_CST.
 +
 +	 __atomic builtins doc: "Consume is implemented using the
 +	 stronger acquire memory order because of a deficiency in C++11's
 +	 semantics."  See PR 59448 and get_memmodel in builtins.cc.  */
 +      gcc_unreachable ();
 +    }
 +}
 +  [(set (attr "length") (const_int 12))])
 +
 ;; Implement atomic stores with amoswap.  Fall back to fences for atomic loads.
 (define_insn "atomic_store<mode>"
 -  [(set (match_operand:GPR 0 "memory_operand" "+ZB")
 -    (unspec_volatile:GPR
 -      [(match_operand:GPR 1 "reg_or_0_operand" "rJ")
 +  [(set (match_operand:QHWD 0 "memory_operand" "+m")
 +    (unspec_volatile:QHWD
 +      [(match_operand:QHWD 1 "reg_or_0_operand" "rJ")
        (match_operand:SI 2 "const_int_operand")]      ;; model
       UNSPEC_ATOMIC_STORE))]
   ""
 -  "amswap%A2.<amo>\t$zero,%z1,%0"
 -  [(set (attr "length") (const_int 8))])
 +{
 +  enum memmodel model = memmodel_base (INTVAL (operands[2]));
 +
 +  switch (model)
 +    {
 +    case MEMMODEL_SEQ_CST:
 +      return "dbar\t0x12\\n\\t"
 +	     "st.<size>\t%z1,%0\\n\\t"
 +	     "dbar\t0x18\\n\\t";
 +    case MEMMODEL_RELEASE:
 +      return "dbar\t0x12\\n\\t"
 +	     "st.<size>\t%z1,%0\\n\\t";
 +    case MEMMODEL_RELAXED:
 +      return "st.<size>\t%z1,%0";
 +
 +    default:
 +      /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
 +	 and __ATOMIC_RELEASE.
 +	 The expand_builtin_atomic_store function converts all invalid memmodels
 +	 to MEMMODEL_SEQ_CST.  */
 +      gcc_unreachable ();
 +    }
 +}
 +  [(set (attr "length") (const_int 12))])
 (define_insn "atomic_<atomic_optab><mode>"
   [(set (match_operand:GPR 0 "memory_operand" "+ZB")
 -- 
 2.43.0
--- a/0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
+++ b/0035-LoongArch-genopts-Add-infrastructure-to-generate-cod.patch
@ -0,0 +1,615 @@
 From 535fb5a2d4347801439fbb51fa07cd0317183cee Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 25 Oct 2024 02:08:03 +0000
 Subject: [PATCH 035/188] LoongArch: genopts: Add infrastructure to generate
 code for  new features in ISA evolution
 LoongArch v1.10 introduced the concept of ISA evolution.  During ISA
 evolution, many independent features can be added and enumerated via
 CPUCFG.
 Add a data file into genopts storing the CPUCFG word, bit, the name
 of the command line option controlling if this feature should be used
 for compilation, and the text description.  Make genstr.sh process these
 info and add the command line options into loongarch.opt and
 loongarch-str.h, and generate a new file loongarch-cpucfg-map.h for
 mapping CPUCFG output to the corresponding option.  When handling
 -march=native, use the information in loongarch-cpucfg-map.h to generate
 the corresponding option mask.  Enable the features implied by -march
 setting unless the user has explicitly disabled the feature.
 The added options (-mdiv32 and -mld-seq-sa) are not really handled yet.
 They'll be used in the following patches.
 gcc/ChangeLog:
        * config/loongarch/genopts/isa-evolution.in: New data file.
        * config/loongarch/genopts/genstr.sh: Translate info in
        isa-evolution.in when generating loongarch-str.h, loongarch.opt,
        and loongarch-cpucfg-map.h.
        * config/loongarch/genopts/loongarch.opt.in (isa_evolution):
        New variable.
        * config/loongarch/t-loongarch: (loongarch-cpucfg-map.h): New
        rule.
        (loongarch-str.h): Depend on isa-evolution.in.
        (loongarch.opt): Depend on isa-evolution.in.
        (loongarch-cpu.o): Depend on loongarch-cpucfg-map.h.
        * config/loongarch/loongarch-str.h: Regenerate.
        * config/loongarch/loongarch-def.h (loongarch_isa):  Add field
        for evolution features.  Add helper function to enable features
        in this field.
        Probe native CPU capability and save the corresponding options
        into preset.
        * config/loongarch/loongarch-cpu.cc (fill_native_cpu_config):
        Probe native CPU capability and save the corresponding options
        into preset.
        (cache_cpucfg): Simplify with C++11-style for loop.
        (cpucfg_useful_idx, N_CPUCFG_WORDS): Move to ...
        * config/loongarch/loongarch.cc
        (loongarch_option_override_internal): Enable the ISA evolution
        feature options implied by -march and not explicitly disabled.
        (loongarch_asm_code_end): New function, print ISA information as
        comments in the assembly if -fverbose-asm.  It makes easier to
        debug things like -march=native.
        (TARGET_ASM_CODE_END): Define.
        * config/loongarch/loongarch.opt: Regenerate.
        * config/loongarch/loongarch-cpucfg-map.h: Generate.
        (cpucfg_useful_idx, N_CPUCFG_WORDS) ... here.
 ---
 gcc/config/loongarch/genopts/genstr.sh        | 92 ++++++++++++++++++-
 gcc/config/loongarch/genopts/isa-evolution.in |  2 +
 gcc/config/loongarch/genopts/loongarch.opt.in |  7 ++
 gcc/config/loongarch/loongarch-cpu.cc         | 46 +++++-----
 gcc/config/loongarch/loongarch-cpucfg-map.h   | 48 ++++++++++
 gcc/config/loongarch/loongarch-def.h          |  7 ++
 gcc/config/loongarch/loongarch-str.h          |  6 +-
 gcc/config/loongarch/loongarch.cc             | 31 +++++++
 gcc/config/loongarch/loongarch.opt            | 20 +++-
 gcc/config/loongarch/t-loongarch              | 21 ++++-
 10 files changed, 244 insertions(+), 36 deletions(-)
 create mode 100644 gcc/config/loongarch/genopts/isa-evolution.in
 create mode 100644 gcc/config/loongarch/loongarch-cpucfg-map.h
 diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
 index 972ef125f..bcc616e98 100755
 --- a/gcc/config/loongarch/genopts/genstr.sh
 +++ b/gcc/config/loongarch/genopts/genstr.sh
@@ -25,8 +25,8 @@ cd "$(dirname "$0")"
 # Generate a header containing definitions from the string table.
 gen_defines() {
     cat <<EOF
 -/* Generated automatically by "genstr" from "loongarch-strings".
 -   Please do not edit this file directly.
 +/* Generated automatically by "genstr" from "loongarch-strings" and
 +   "isa-evolution.in".  Please do not edit this file directly.
    Copyright (C) 2021-2022 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
@@ -56,6 +56,15 @@ EOF
 	loongarch-strings
     echo
 +
 +   # Generate the strings from isa-evolution.in.
 +   awk '{
 +     a=$3
 +     gsub(/-/, "_", a)
 +     print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
 +   }' isa-evolution.in
 +
 +    echo
     echo "#endif /* LOONGARCH_STR_H */"
 }
@@ -77,11 +86,12 @@ gen_options() {
 	# print a header
 	cat << EOF
 ; Generated by "genstr" from the template "loongarch.opt.in"
 -; and definitions from "loongarch-strings".
 +; and definitions from "loongarch-strings" and "isa-evolution.in".
 ;
 ; Please do not edit this file directly.
 ; It will be automatically updated during a gcc build
 -; if you change "loongarch.opt.in" or "loongarch-strings".
 +; if you change "loongarch.opt.in", "loongarch-strings", or
 +; "isa-evolution.in".
 ;
 EOF
@@ -91,13 +101,85 @@ EOF
 		eval "echo \"$line\""
 	    done
     }
 +
 +    # Generate the strings from isa-evolution.in.
 +    awk '{
 +      print("")
 +      print("m"$3)
 +      gsub(/-/, "_", $3)
 +      print("Target Mask(ISA_"toupper($3)") Var(isa_evolution)")
 +      $1=""; $2=""; $3=""
 +      sub(/^ */, "", $0)
 +      print($0)
 +    }' isa-evolution.in
 +}
 +
 +gen_cpucfg_map() {
 +    cat <<EOF
 +/* Generated automatically by "genstr" from "isa-evolution.in".
 +   Please do not edit this file directly.
 +
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#ifndef LOONGARCH_CPUCFG_MAP_H
 +#define LOONGARCH_CPUCFG_MAP_H
 +
 +#include "options.h"
 +
 +static constexpr struct {
 +  int cpucfg_word;
 +  unsigned int cpucfg_bit;
 +  HOST_WIDE_INT isa_evolution_bit;
 +} cpucfg_map[] = {
 +EOF
 +
 +    # Generate the strings from isa-evolution.in.
 +    awk '{
 +      gsub(/-/, "_", $3)
 +      print("  { "$1", 1u << "$2", OPTION_MASK_ISA_"toupper($3)" },")
 +    }' isa-evolution.in
 +
 +    echo "};"
 +    echo
 +    echo "static constexpr int cpucfg_useful_idx[] = {"
 +
 +    awk 'BEGIN { print("  0,\n  1,\n  2,\n  16,\n  17,\n  18,\n  19,") }
 +    {if ($1+0 > max+0) max=$1; print("  "$1",")}' \
 +   isa-evolution.in | sort -n | uniq
 +
 +    echo "};"
 +    echo ""
 +
 +    awk 'BEGIN { max=19 }
 +    { if ($1+0 > max+0) max=$1 }
 +    END { print "static constexpr int N_CPUCFG_WORDS = "1+max";" }' \
 +   isa-evolution.in
 +
 +    echo "#endif /* LOONGARCH_CPUCFG_MAP_H */"
 }
 main() {
     case "$1" in
 +    cpucfg-map) gen_cpucfg_map;;
 	header) gen_defines;;
 	opt) gen_options;;
 -	*) echo "Unknown Command: \"$1\". Available: header, opt"; exit 1;;
 +    *) echo "Unknown Command: \"$1\". Available: cpucfg-map, header, opt"; exit 1;;
     esac
 }
 diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
 new file mode 100644
 index 000000000..e58f0d6a1
 --- /dev/null
 +++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -0,0 +1,2 @@
 +2	26	div32		Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 +3	23	ld-seq-sa	Do not need load-load barriers (dbar 0x700).
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index bd3cfaf60..a49de07c9 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -247,3 +247,10 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ
 Indicate how many non memory access vector instructions can be issued per
 cycle, it's used in unroll factor determination for autovectorizer.  The
 default value is 4.
 +
 +; Features added during ISA evolution.  This concept is different from ISA
 +; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the
 +; explanation.  These features may be implemented and enumerated with
 +; CPUCFG independantly, so we use bit flags to specify them.
 +Variable
 +HOST_WIDE_INT isa_evolution = 0
 diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
 index cbe52d7ed..e1cd85d02 100644
 --- a/gcc/config/loongarch/loongarch-cpu.cc
 +++ b/gcc/config/loongarch/loongarch-cpu.cc
@@ -29,12 +29,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-def.h"
 #include "loongarch-opts.h"
 #include "loongarch-cpu.h"
 +#include "loongarch-cpucfg-map.h"
 #include "loongarch-str.h"
 /* Native CPU detection with "cpucfg" */
 -#define N_CPUCFG_WORDS 0x15
 static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
 -static const int cpucfg_useful_idx[] = {0, 1, 2, 16, 17, 18, 19};
 static uint32_t
 read_cpucfg_word (int wordno)
@@ -56,11 +55,8 @@ read_cpucfg_word (int wordno)
 void
 cache_cpucfg (void)
 {
 -  for (unsigned int i = 0; i < sizeof (cpucfg_useful_idx) / sizeof (int); i++)
 -    {
 -      cpucfg_cache[cpucfg_useful_idx[i]]
 -	= read_cpucfg_word (cpucfg_useful_idx[i]);
 -    }
 +  for (int idx: cpucfg_useful_idx)
 +    cpucfg_cache[idx] = read_cpucfg_word (idx);
 }
 uint32_t
@@ -125,11 +121,12 @@ fill_native_cpu_config (struct loongarch_target *tgt)
       int tmp;
       tgt->cpu_arch = native_cpu_type;
 +      auto &preset = loongarch_cpu_default_isa[tgt->cpu_arch];
 +
       /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].base
 	 With: base architecture (ARCH)
 	 At:   cpucfg_words[1][1:0] */
 -      #define PRESET_ARCH (loongarch_cpu_default_isa[tgt->cpu_arch].base)
       switch (cpucfg_cache[1] & 0x3)
 	{
 	  case 0x02:
@@ -144,19 +141,18 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 	}
       /* Check consistency with PRID presets.  */
 -      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_ARCH)
 +      if (native_cpu_type != CPU_NATIVE && tmp != preset.base)
 	warning (0, "base architecture %qs differs from PRID preset %qs",
 		 loongarch_isa_base_strings[tmp],
 -		 loongarch_isa_base_strings[PRESET_ARCH]);
 +		 loongarch_isa_base_strings[preset.base]);
       /* Use the native value anyways.  */
 -      PRESET_ARCH = tmp;
 +      preset.base = tmp;
       /* Fill: loongarch_cpu_default_isa[tgt->cpu_arch].fpu
 	 With: FPU type (FP, FP_SP, FP_DP)
 	 At:   cpucfg_words[2][2:0] */
 -      #define PRESET_FPU (loongarch_cpu_default_isa[tgt->cpu_arch].fpu)
       switch (cpucfg_cache[2] & 0x7)
 	{
 	  case 0x07:
@@ -179,20 +175,19 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 	}
       /* Check consistency with PRID presets.  */
 -      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_FPU)
 +      if (native_cpu_type != CPU_NATIVE && tmp != preset.fpu)
 	warning (0, "floating-point unit %qs differs from PRID preset %qs",
 		 loongarch_isa_ext_strings[tmp],
 -		 loongarch_isa_ext_strings[PRESET_FPU]);
 +		 loongarch_isa_ext_strings[preset.fpu]);
       /* Use the native value anyways.  */
 -      PRESET_FPU = tmp;
 +      preset.fpu = tmp;
       /* Fill: loongarch_cpu_default_isa[CPU_NATIVE].simd
 	 With: SIMD extension type (LSX, LASX)
 	 At:   cpucfg_words[2][7:6] */
 -      #define PRESET_SIMD (loongarch_cpu_default_isa[tgt->cpu_arch].simd)
       switch (cpucfg_cache[2] & 0xc0)
 	{
 	  case 0xc0:
@@ -219,14 +214,19 @@ fill_native_cpu_config (struct loongarch_target *tgt)
       /* Check consistency with PRID presets.  */
       /*
 -      if (native_cpu_type != CPU_NATIVE && tmp != PRESET_SIMD)
 +      if (native_cpu_type != CPU_NATIVE && tmp != preset.simd)
 	warning (0, "SIMD extension %qs differs from PRID preset %qs",
 		 loongarch_isa_ext_strings[tmp],
 -		 loongarch_isa_ext_strings[PRESET_SIMD]);
 +		 loongarch_isa_ext_strings[preset.simd]);
       */
       /* Use the native value anyways.  */
 -      PRESET_SIMD = tmp;
 +      preset.simd = tmp;
 +
 +      /* Features added during ISA evolution.  */
 +      for (const auto &entry: cpucfg_map)
 +	if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
 +	  preset.evolution |= entry.isa_evolution_bit;
     }
   if (tune_native_p)
@@ -237,7 +237,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 	 With: cache size info
 	 At:   cpucfg_words[16:20][31:0] */
 -      #define PRESET_CACHE (loongarch_cpu_cache[tgt->cpu_tune])
 +      auto &preset_cache = loongarch_cpu_cache[tgt->cpu_tune];
       struct loongarch_cache native_cache;
       int l1d_present = 0, l1u_present = 0;
       int l2d_present = 0;
@@ -268,8 +268,8 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 	>> 10;					  /* in kibibytes */
       /* Use the native value anyways.  */
 -      PRESET_CACHE.l1d_line_size = native_cache.l1d_line_size;
 -      PRESET_CACHE.l1d_size = native_cache.l1d_size;
 -      PRESET_CACHE.l2d_size = native_cache.l2d_size;
 +      preset_cache.l1d_line_size = native_cache.l1d_line_size;
 +      preset_cache.l1d_size = native_cache.l1d_size;
 +      preset_cache.l2d_size = native_cache.l2d_size;
     }
 }
 diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
 new file mode 100644
 index 000000000..0c078c397
 --- /dev/null
 +++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
@@ -0,0 +1,48 @@
 +/* Generated automatically by "genstr" from "isa-evolution.in".
 +   Please do not edit this file directly.
 +
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#ifndef LOONGARCH_CPUCFG_MAP_H
 +#define LOONGARCH_CPUCFG_MAP_H
 +
 +#include "options.h"
 +
 +static constexpr struct {
 +  int cpucfg_word;
 +  unsigned int cpucfg_bit;
 +  HOST_WIDE_INT isa_evolution_bit;
 +} cpucfg_map[] = {
 +  { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
 +  { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
 +};
 +
 +static constexpr int cpucfg_useful_idx[] = {
 +  0,
 +  1,
 +  2,
 +  3,
 +  16,
 +  17,
 +  18,
 +  19,
 +};
 +
 +static constexpr int N_CPUCFG_WORDS = 20;
 +#endif /* LOONGARCH_CPUCFG_MAP_H */
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index 078d8607d..cb99caebe 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -46,6 +46,7 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef LOONGARCH_DEF_H
 #define LOONGARCH_DEF_H
 +#include <stdint.h>
 #include "loongarch-tune.h"
 #ifdef __cplusplus
@@ -121,6 +122,12 @@ struct loongarch_isa
   int base;	    /* ISA_BASE_ */
   int fpu;	    /* ISA_EXT_FPU_ */
   int simd;	    /* ISA_EXT_SIMD_ */
 +
 +  /* ISA evolution features implied by -march=, for -march=native probed
 +     via CPUCFG.  The features implied by base may be not included here.
 +
 +     Using int64_t instead of HOST_WIDE_INT for C compatibility.  */
 +  int64_t evolution;
 };
 struct loongarch_abi
 diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
 index 037e9e583..cd9dbb41b 100644
 --- a/gcc/config/loongarch/loongarch-str.h
 +++ b/gcc/config/loongarch/loongarch-str.h
@@ -1,5 +1,5 @@
 -/* Generated automatically by "genstr" from "loongarch-strings".
 -   Please do not edit this file directly.
 +/* Generated automatically by "genstr" from "loongarch-strings" and
 +   "isa-evolution.in".  Please do not edit this file directly.
    Copyright (C) 2021-2022 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
@@ -69,4 +69,6 @@ along with GCC; see the file COPYING3.  If not see
 #define STR_EXPLICIT_RELOCS_NONE "none"
 #define STR_EXPLICIT_RELOCS_ALWAYS "always"
 +#define OPTSTR_DIV32   "div32"
 +#define OPTSTR_LD_SEQ_SA   "ld-seq-sa"
 #endif /* LOONGARCH_STR_H */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 7bb46a45d..8bd46da62 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -7451,6 +7451,10 @@ loongarch_option_override_internal (struct gcc_options *opts,
   if (loongarch_branch_cost == 0)
     loongarch_branch_cost = loongarch_cost->branch_cost;
 +  /* If the user hasn't disabled a feature added during ISA evolution,
 +     use the processor's default.  */
 +  isa_evolution |= (la_target.isa.evolution &
 +		    ~global_options_set.x_isa_evolution);
   /* Enable sw prefetching at -O3 and higher.  */
   if (opts->x_flag_prefetch_loop_arrays < 0
@@ -11427,6 +11431,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 						      is_packed);
 }
 +/* If -fverbose-asm, dump some info for debugging.  */
 +static void
 +loongarch_asm_code_end (void)
 +{
 +#define DUMP_FEATURE(PRED) \
 +  fprintf (asm_out_file, "%s %s: %s\n", ASM_COMMENT_START, #PRED, \
 +	   (PRED) ? "enabled" : "disabled")
 +
 +  if (flag_verbose_asm)
 +    {
 +      fprintf (asm_out_file, "\n%s CPU: %s\n", ASM_COMMENT_START,
 +	       loongarch_cpu_strings [la_target.cpu_arch]);
 +      fprintf (asm_out_file, "%s Tune: %s\n", ASM_COMMENT_START,
 +	       loongarch_cpu_strings [la_target.cpu_tune]);
 +      fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
 +	       loongarch_isa_base_strings [la_target.isa.base]);
 +      DUMP_FEATURE (TARGET_DIV32);
 +      DUMP_FEATURE (TARGET_LD_SEQ_SA);
 +    }
 +
 +  fputs ("\n\n", asm_out_file);
 +#undef DUMP_FEATURE
 +}
 +
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11446,6 +11474,9 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 #undef TARGET_ASM_FUNCTION_RODATA_SECTION
 #define TARGET_ASM_FUNCTION_RODATA_SECTION loongarch_function_rodata_section
 +#undef TARGET_ASM_CODE_END
 +#define TARGET_ASM_CODE_END loongarch_asm_code_end
 +
 #undef TARGET_SCHED_INIT
 #define TARGET_SCHED_INIT loongarch_sched_init
 #undef TARGET_SCHED_REORDER
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index d936954b8..5251f705d 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -1,9 +1,10 @@
 ; Generated by "genstr" from the template "loongarch.opt.in"
 -; and definitions from "loongarch-strings".
 +; and definitions from "loongarch-strings" and "isa-evolution.in".
 ;
 ; Please do not edit this file directly.
 ; It will be automatically updated during a gcc build
 -; if you change "loongarch.opt.in" or "loongarch-strings".
 +; if you change "loongarch.opt.in", "loongarch-strings", or
 +; "isa-evolution.in".
 ;
 ; Copyright (C) 2021-2022 Free Software Foundation, Inc.
 ;
@@ -254,3 +255,18 @@ Target Undocumented Joined UInteger Var(loongarch_vect_issue_info) Init(4) Integ
 Indicate how many non memory access vector instructions can be issued per
 cycle, it's used in unroll factor determination for autovectorizer.  The
 default value is 4.
 +
 +; Features added during ISA evolution.  This concept is different from ISA
 +; extension, read Section 1.5 of LoongArch v1.10 Volume 1 for the
 +; explanation.  These features may be implemented and enumerated with
 +; CPUCFG independantly, so we use bit flags to specify them.
 +Variable
 +HOST_WIDE_INT isa_evolution = 0
 +
 +mdiv32
 +Target Mask(ISA_DIV32) Var(isa_evolution)
 +Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 +
 +mld-seq-sa
 +Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution)
 +Do not need load-load barriers (dbar 0x700).
 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
 index 12734c37b..57b1176bc 100644
 --- a/gcc/config/loongarch/t-loongarch
 +++ b/gcc/config/loongarch/t-loongarch
@@ -18,8 +18,9 @@
 GTM_H += loongarch-multilib.h
 -OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h \
 -		   $(srcdir)/config/loongarch/loongarch-tune.h
 +OPTIONS_H_EXTRA += $(srcdir)/config/loongarch/loongarch-def.h	\
 +		   $(srcdir)/config/loongarch/loongarch-tune.h	\
 +		   $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
 # Canonical target triplet from config.gcc
 LA_MULTIARCH_TRIPLET = $(patsubst LA_MULTIARCH_TRIPLET=%,%,$\
@@ -31,7 +32,8 @@ LA_STR_H = $(srcdir)/config/loongarch/loongarch-str.h
 # String definition header
 $(LA_STR_H): s-loongarch-str ; @true
 s-loongarch-str: $(srcdir)/config/loongarch/genopts/genstr.sh \
 -	$(srcdir)/config/loongarch/genopts/loongarch-strings
 +	$(srcdir)/config/loongarch/genopts/loongarch-strings  \
 +	$(srcdir)/config/loongarch/genopts/isa-evolution.in
 	$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh header \
     $(srcdir)/config/loongarch/genopts/loongarch-strings > \
     tmp-loongarch-str.h
@@ -58,7 +60,8 @@ loongarch-driver.o : $(srcdir)/config/loongarch/loongarch-driver.cc $(LA_STR_H)
 loongarch-opts.o: $(srcdir)/config/loongarch/loongarch-opts.cc $(LA_STR_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 -loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H)
 +loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
 +		 $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
@@ -67,6 +70,7 @@ loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
 $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
 s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
 	$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
 +	$(srcdir)/config/loongarch/genopts/isa-evolution.in \
 	$(srcdir)/config/loongarch/genopts/loongarch-strings $(LA_STR_H)
 	$(SHELL) $(srcdir)/config/loongarch/genopts/genstr.sh opt \
     $(srcdir)/config/loongarch/genopts/loongarch.opt.in \
@@ -74,3 +78,12 @@ s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
 	$(SHELL) $(srcdir)/../move-if-change tmp-loongarch.opt \
     $(srcdir)/config/loongarch/loongarch.opt
 	$(STAMP) s-loongarch-opt
 +
 +$(srcdir)/config/loongarch/loongarch-cpucfg-map.h: s-loongarch-cpucfg-map
 +	@true
 +s-loongarch-cpucfg-map: $(srcdir)/config/loongarch/genopts/genstr.sh \
 +	$(srcdir)/config/loongarch/genopts/isa-evolution.in
 +	$(SHELL) $< cpucfg-map > tmp-cpucfg.h
 +	$(SHELL) $(srcdir)/../move-if-change tmp-cpucfg.h \
 +	    $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
 +	$(STAMP) $@
 -- 
 2.43.0
--- a/0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
+++ b/0036-LoongArch-Add-evolution-features-of-base-ISA-revisio.patch
@ -0,0 +1,148 @@
 From 24648180418affbaf044a58ae0b5f79a0cf71155 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 18 Nov 2023 03:19:07 +0800
 Subject: [PATCH 036/188] LoongArch: Add evolution features of base ISA
 revisions
 	* config/loongarch/loongarch-def.h:
 	(loongarch_isa_base_features): Declare.  Define it in ...
 	* config/loongarch/loongarch-cpu.cc
 	(loongarch_isa_base_features): ... here.
 	(fill_native_cpu_config): If we know the base ISA of the CPU
 	model from PRID, use it instead of la64 (v1.0).  Check if all
 	expected features of this base ISA is available, emit a warning
 	if not.
 	* config/loongarch/loongarch-opts.cc (config_target_isa): Enable
 	the features implied by the base ISA if not -march=native.
 ---
 gcc/config/loongarch/loongarch-cpu.cc  | 62 ++++++++++++++++++--------
 gcc/config/loongarch/loongarch-def.h   |  5 +++
 gcc/config/loongarch/loongarch-opts.cc |  3 ++
 3 files changed, 52 insertions(+), 18 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
 index e1cd85d02..76d66fa55 100644
 --- a/gcc/config/loongarch/loongarch-cpu.cc
 +++ b/gcc/config/loongarch/loongarch-cpu.cc
@@ -32,6 +32,19 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-cpucfg-map.h"
 #include "loongarch-str.h"
 +/* loongarch_isa_base_features defined here instead of loongarch-def.c
 +   because we need to use options.h.  Pay attention on the order of elements
 +   in the initializer becaue ISO C++ does not allow C99 designated
 +   initializers!  */
 +
 +#define ISA_BASE_LA64V110_FEATURES \
 +  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
 +
 +int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
 +  /* [ISA_BASE_LA64V100] = */ 0,
 +  /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES,
 +};
 +
 /* Native CPU detection with "cpucfg" */
 static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
@@ -127,24 +140,22 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 	 With: base architecture (ARCH)
 	 At:   cpucfg_words[1][1:0] */
 -      switch (cpucfg_cache[1] & 0x3)
 -	{
 -	  case 0x02:
 -	    tmp = ISA_BASE_LA64V100;
 -	    break;
 -
 -	  default:
 -	    fatal_error (UNKNOWN_LOCATION,
 -			 "unknown native base architecture %<0x%x%>, "
 -			 "%qs failed", (unsigned int) (cpucfg_cache[1] & 0x3),
 -			 "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
 -	}
 -
 -      /* Check consistency with PRID presets.  */
 -      if (native_cpu_type != CPU_NATIVE && tmp != preset.base)
 -	warning (0, "base architecture %qs differs from PRID preset %qs",
 -		 loongarch_isa_base_strings[tmp],
 -		 loongarch_isa_base_strings[preset.base]);
 +      if (native_cpu_type != CPU_NATIVE)
 +	tmp = loongarch_cpu_default_isa[native_cpu_type].base;
 +      else
 +	switch (cpucfg_cache[1] & 0x3)
 +	  {
 +	    case 0x02:
 +	      tmp = ISA_BASE_LA64V100;
 +	      break;
 +
 +	    default:
 +	      fatal_error (UNKNOWN_LOCATION,
 +			   "unknown native base architecture %<0x%x%>, "
 +			   "%qs failed",
 +			   (unsigned int) (cpucfg_cache[1] & 0x3),
 +			   "-m" OPTSTR_ARCH "=" STR_CPU_NATIVE);
 +	  }
       /* Use the native value anyways.  */
       preset.base = tmp;
@@ -227,6 +238,21 @@ fill_native_cpu_config (struct loongarch_target *tgt)
       for (const auto &entry: cpucfg_map)
 	if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
 	  preset.evolution |= entry.isa_evolution_bit;
 +
 +      if (native_cpu_type != CPU_NATIVE)
 +	{
 +	  /* Check if the local CPU really supports the features of the base
 +	     ISA of probed native_cpu_type.  If any feature is not detected,
 +	     either GCC or the hardware is buggy.  */
 +	  auto base_isa_feature = loongarch_isa_base_features[preset.base];
 +	  if ((preset.evolution & base_isa_feature) != base_isa_feature)
 +	    warning (0,
 +		     "detected base architecture %qs, but some of its "
 +		     "features are not detected; the detected base "
 +		     "architecture may be unreliable, only detected "
 +		     "features will be enabled",
 +		     loongarch_isa_base_strings[preset.base]);
 +	}
     }
   if (tune_native_p)
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index cb99caebe..ca0a324dd 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -55,12 +55,17 @@ extern "C" {
 /* enum isa_base */
 extern const char* loongarch_isa_base_strings[];
 +
 /* LoongArch V1.00.  */
 #define ISA_BASE_LA64V100     0
 /* LoongArch V1.10.  */
 #define ISA_BASE_LA64V110     1
 #define N_ISA_BASE_TYPES      2
 +/* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
 +   we cannot use the C++ header options.h in loongarch-def.c.  */
 +extern int64_t loongarch_isa_base_features[];
 +
 /* enum isa_ext_* */
 extern const char* loongarch_isa_ext_strings[];
 #define ISA_EXT_NONE	      0
 diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
 index f10a9d3ff..390720479 100644
 --- a/gcc/config/loongarch/loongarch-opts.cc
 +++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -284,6 +284,9 @@ config_target_isa:
   /* Get default ISA from "-march" or its default value.  */
   t.isa = loongarch_cpu_default_isa[t.cpu_arch];
 +  if (t.cpu_arch != CPU_NATIVE)
 +    t.isa.evolution |= loongarch_isa_base_features[t.isa.base];
 +
   /* Apply incremental changes.  */
   /* "-march=native" overrides the default FPU type.  */
 -- 
 2.43.0
--- a/0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
+++ b/0037-LoongArch-Take-the-advantage-of-mdiv32-if-it-s-enabl.patch
@ -0,0 +1,156 @@
 From 6b483504c4fbb2a05a17d67e8f51b72149f1bbf9 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Thu, 16 Nov 2023 09:21:47 +0800
 Subject: [PATCH 037/188] LoongArch: Take the advantage of -mdiv32 if it's
 enabled
 With -mdiv32, we can assume div.w[u] and mod.w[u] works on low 32 bits
 of a 64-bit GPR even if it's not sign-extended.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (DIV): New mode iterator.
 	(<optab:ANY_DIV><mode:GPR>3): Don't expand if TARGET_DIV32.
 	(<optab:ANY_DIV>di3_fake): Disable if TARGET_DIV32.
 	(*<optab:ANY_DIV><mode:GPR>3): Allow SImode if TARGET_DIV32.
 	(<optab:ANY_DIV>si3_extended): New insn if TARGET_DIV32.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/div-div32.c: New test.
 	* gcc.target/loongarch/div-no-div32.c: New test.
 ---
 gcc/config/loongarch/loongarch.md             | 31 ++++++++++++++++---
 .../gcc.target/loongarch/div-div32.c          | 31 +++++++++++++++++++
 .../gcc.target/loongarch/div-no-div32.c       | 11 +++++++
 3 files changed, 68 insertions(+), 5 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/div-div32.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/div-no-div32.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 52e40a208..c4e7af107 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -408,6 +408,10 @@
 ;; st.w.
 (define_mode_iterator ST_ANY [QHWD ANYF])
 +;; A mode for anything legal as a input of a div or mod instruction.
 +(define_mode_iterator DIV [(DI "TARGET_64BIT")
 +			   (SI "!TARGET_64BIT || TARGET_DIV32")])
 +
 ;; In GPR templates, a string like "mul.<d>" will expand to "mul.w" in the
 ;; 32-bit version and "mul.d" in the 64-bit version.
 (define_mode_attr d [(SI "w") (DI "d")])
@@ -914,7 +918,7 @@
 		     (match_operand:GPR 2 "register_operand")))]
   ""
 {
 - if (GET_MODE (operands[0]) == SImode && TARGET_64BIT)
 + if (GET_MODE (operands[0]) == SImode && TARGET_64BIT && !TARGET_DIV32)
   {
     rtx reg1 = gen_reg_rtx (DImode);
     rtx reg2 = gen_reg_rtx (DImode);
@@ -934,9 +938,9 @@
 })
 (define_insn "*<optab><mode>3"
 -  [(set (match_operand:X 0 "register_operand" "=r,&r,&r")
 -	(any_div:X (match_operand:X 1 "register_operand" "r,r,0")
 -		   (match_operand:X 2 "register_operand" "r,r,r")))]
 +  [(set (match_operand:DIV 0 "register_operand" "=r,&r,&r")
 +	(any_div:DIV (match_operand:DIV 1 "register_operand" "r,r,0")
 +		     (match_operand:DIV 2 "register_operand" "r,r,r")))]
   ""
 {
   return loongarch_output_division ("<insn>.<d><u>\t%0,%1,%2", operands);
@@ -949,6 +953,23 @@
 	(const_string "yes")
 	(const_string "no")))])
 +(define_insn "<optab>si3_extended"
 +  [(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
 +	(sign_extend
 +	  (any_div:SI (match_operand:SI 1 "register_operand" "r,r,0")
 +		      (match_operand:SI 2 "register_operand" "r,r,r"))))]
 +  "TARGET_64BIT && TARGET_DIV32"
 +{
 +  return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
 +}
 +  [(set_attr "type" "idiv")
 +   (set_attr "mode" "SI")
 +   (set (attr "enabled")
 +      (if_then_else
 +	(match_test "!!which_alternative == loongarch_check_zero_div_p()")
 +	(const_string "yes")
 +	(const_string "no")))])
 +
 (define_insn "<optab>di3_fake"
   [(set (match_operand:DI 0 "register_operand" "=r,&r,&r")
 	(sign_extend:DI
@@ -957,7 +978,7 @@
 	     (any_div:DI (match_operand:DI 1 "register_operand" "r,r,0")
 			 (match_operand:DI 2 "register_operand" "r,r,r")) 0)]
 	  UNSPEC_FAKE_ANY_DIV)))]
 -  "TARGET_64BIT"
 +  "TARGET_64BIT && !TARGET_DIV32"
 {
   return loongarch_output_division ("<insn>.w<u>\t%0,%1,%2", operands);
 }
 diff --git a/gcc/testsuite/gcc.target/loongarch/div-div32.c b/gcc/testsuite/gcc.target/loongarch/div-div32.c
 new file mode 100644
 index 000000000..8b1f686ec
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/div-div32.c
@@ -0,0 +1,31 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mdiv32" } */
 +/* { dg-final { scan-assembler "div\.w" } } */
 +/* { dg-final { scan-assembler "div\.wu" } } */
 +/* { dg-final { scan-assembler "mod\.w" } } */
 +/* { dg-final { scan-assembler "mod\.wu" } } */
 +/* { dg-final { scan-assembler-not "slli\.w.*,0" } } */
 +
 +int
 +divw (long a, long b)
 +{
 +  return (int)a / (int)b;
 +}
 +
 +unsigned int
 +divwu (long a, long b)
 +{
 +  return (unsigned int)a / (unsigned int)b;
 +}
 +
 +int
 +modw (long a, long b)
 +{
 +  return (int)a % (int)b;
 +}
 +
 +unsigned int
 +modwu (long a, long b)
 +{
 +  return (unsigned int)a % (unsigned int)b;
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/div-no-div32.c b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
 new file mode 100644
 index 000000000..f0f697ba5
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/div-no-div32.c
@@ -0,0 +1,11 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d" } */
 +/* { dg-final { scan-assembler "div\.w" } } */
 +/* { dg-final { scan-assembler "div\.wu" } } */
 +/* { dg-final { scan-assembler "mod\.w" } } */
 +/* { dg-final { scan-assembler "mod\.wu" } } */
 +
 +/* -mno-div32 should be implied by -march=loongarch64.  */
 +/* { dg-final { scan-assembler-times "slli\.w\[^\n\]*0" 8 } } */
 +
 +#include "div-div32.c"
 -- 
 2.43.0
--- a/0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
+++ b/0038-LoongArch-Don-t-emit-dbar-0x700-if-mld-seq-sa.patch
@ -0,0 +1,61 @@
 From 42368d6ab1200c157ff473c37889b56b596040e2 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Thu, 16 Nov 2023 09:30:14 +0800
 Subject: [PATCH 038/188] LoongArch: Don't emit dbar 0x700 if -mld-seq-sa
 This option (CPUCFG word 0x3 bit 23) means "the hardware guarantee that
 two loads on the same address won't be reordered with each other".  Thus
 we can omit the "load-load" barrier dbar 0x700.
 This is only a micro-optimization because dbar 0x700 is already treated
 as nop if the hardware supports LD_SEQ_SA.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_print_operand): Don't
 	print dbar 0x700 if TARGET_LD_SEQ_SA.
 	* config/loongarch/sync.md (atomic_load<mode>): Likewise.
 ---
 gcc/config/loongarch/loongarch.cc | 2 +-
 gcc/config/loongarch/sync.md      | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 8bd46da62..c86b787c4 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -6057,7 +6057,7 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
       if (loongarch_cas_failure_memorder_needs_acquire (
 	    memmodel_from_int (INTVAL (op))))
 	fputs ("dbar\t0b10100", file);
 -      else
 +      else if (!TARGET_LD_SEQ_SA)
 	fputs ("dbar\t0x700", file);
       break;
 diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
 index f4673c856..65443c899 100644
 --- a/gcc/config/loongarch/sync.md
 +++ b/gcc/config/loongarch/sync.md
@@ -119,13 +119,14 @@
     case MEMMODEL_SEQ_CST:
       return "dbar\t0x11\\n\\t"
 	     "ld.<size>\t%0,%1\\n\\t"
 -	     "dbar\t0x14\\n\\t";
 +	     "dbar\t0x14";
     case MEMMODEL_ACQUIRE:
       return "ld.<size>\t%0,%1\\n\\t"
 -	     "dbar\t0x14\\n\\t";
 +	     "dbar\t0x14";
     case MEMMODEL_RELAXED:
 -      return "ld.<size>\t%0,%1\\n\\t"
 -	     "dbar\t0x700\\n\\t";
 +      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
 +			      : "ld.<size>\t%0,%1\\n\\t"
 +				"dbar\t0x700";
     default:
       /* The valid memory order variants are __ATOMIC_RELAXED, __ATOMIC_SEQ_CST,
 -- 
 2.43.0
--- a/0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
+++ b/0039-LoongArch-Add-fine-grained-control-for-LAM_BH-and-LA.patch
@ -0,0 +1,208 @@
 From 416bdd180a6c0dab4736a6da26de245cb0487c0e Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 25 Oct 2024 02:13:53 +0000
 Subject: [PATCH 039/188] LoongArch: Add fine-grained control for LAM_BH and
 LAMCAS
 gcc/ChangeLog:
        * config/loongarch/genopts/isa-evolution.in: (lam-bh, lamcas):
        Add.
        * config/loongarch/loongarch-str.h: Regenerate.
        * config/loongarch/loongarch.opt: Regenerate.
        * config/loongarch/loongarch-cpucfg-map.h: Regenerate.
        * config/loongarch/loongarch-cpu.cc
        (ISA_BASE_LA64V110_FEATURES): Include OPTION_MASK_ISA_LAM_BH
        and OPTION_MASK_ISA_LAMCAS.
        * config/loongarch/sync.md (atomic_add<mode:SHORT>): Use
        TARGET_LAM_BH instead of ISA_BASE_IS_LA64V110.  Remove empty
        lines from assembly output.
        (atomic_exchange<mode>_short): Likewise.
        (atomic_exchange<mode:SHORT>): Likewise.
        (atomic_fetch_add<mode>_short): Likewise.
        (atomic_fetch_add<mode:SHORT>): Likewise.
        (atomic_cas_value_strong<mode>_amcas): Use TARGET_LAMCAS instead
        of ISA_BASE_IS_LA64V110.
        (atomic_compare_and_swap<mode>): Likewise.
        (atomic_compare_and_swap<mode:GPR>): Likewise.
        (atomic_compare_and_swap<mode:SHORT>): Likewise.
        * config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump
        status if -mlam-bh and -mlamcas if -fverbose-asm.
 ---
 gcc/config/loongarch/genopts/isa-evolution.in |  2 ++
 gcc/config/loongarch/loongarch-cpu.cc         |  3 ++-
 gcc/config/loongarch/loongarch-cpucfg-map.h   |  2 ++
 gcc/config/loongarch/loongarch-str.h          |  2 ++
 gcc/config/loongarch/loongarch.cc             |  2 ++
 gcc/config/loongarch/loongarch.opt            |  8 ++++++++
 gcc/config/loongarch/sync.md                  | 18 +++++++++---------
 7 files changed, 27 insertions(+), 10 deletions(-)
 diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
 index e58f0d6a1..a6bc3f87f 100644
 --- a/gcc/config/loongarch/genopts/isa-evolution.in
 +++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -1,2 +1,4 @@
 2	26	div32		Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 +2	27	lam-bh		Support am{swap/add}[_db].{b/h} instructions.
 +2	28	lamcas		Support amcas[_db].{b/h/w/d} instructions.
 3	23	ld-seq-sa	Do not need load-load barriers (dbar 0x700).
 diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
 index 76d66fa55..bbce82c9c 100644
 --- a/gcc/config/loongarch/loongarch-cpu.cc
 +++ b/gcc/config/loongarch/loongarch-cpu.cc
@@ -38,7 +38,8 @@ along with GCC; see the file COPYING3.  If not see
    initializers!  */
 #define ISA_BASE_LA64V110_FEATURES \
 -  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA)
 +  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
 +   | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
 int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
   /* [ISA_BASE_LA64V100] = */ 0,
 diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
 index 0c078c397..02ff16712 100644
 --- a/gcc/config/loongarch/loongarch-cpucfg-map.h
 +++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
@@ -30,6 +30,8 @@ static constexpr struct {
   HOST_WIDE_INT isa_evolution_bit;
 } cpucfg_map[] = {
   { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
 +  { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
 +  { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
   { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA },
 };
 diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
 index cd9dbb41b..0fee9abe5 100644
 --- a/gcc/config/loongarch/loongarch-str.h
 +++ b/gcc/config/loongarch/loongarch-str.h
@@ -70,5 +70,7 @@ along with GCC; see the file COPYING3.  If not see
 #define STR_EXPLICIT_RELOCS_ALWAYS "always"
 #define OPTSTR_DIV32   "div32"
 +#define OPTSTR_LAM_BH  "lam-bh"
 +#define OPTSTR_LAMCAS  "lamcas"
 #define OPTSTR_LD_SEQ_SA   "ld-seq-sa"
 #endif /* LOONGARCH_STR_H */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index c86b787c4..33d23a731 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -11448,6 +11448,8 @@ loongarch_asm_code_end (void)
       fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
 	       loongarch_isa_base_strings [la_target.isa.base]);
       DUMP_FEATURE (TARGET_DIV32);
 +      DUMP_FEATURE (TARGET_LAM_BH);
 +      DUMP_FEATURE (TARGET_LAMCAS);
       DUMP_FEATURE (TARGET_LD_SEQ_SA);
     }
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index 5251f705d..ea0d5bb4e 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -267,6 +267,14 @@ mdiv32
 Target Mask(ISA_DIV32) Var(isa_evolution)
 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 +mlam-bh
 +Target Mask(ISA_LAM_BH) Var(isa_evolution)
 +Support am{swap/add}[_db].{b/h} instructions.
 +
 +mlamcas
 +Target Mask(ISA_LAMCAS) Var(isa_evolution)
 +Support amcas[_db].{b/h/w/d} instructions.
 +
 mld-seq-sa
 Target Mask(ISA_LD_SEQ_SA) Var(isa_evolution)
 Do not need load-load barriers (dbar 0x700).
 diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
 index 65443c899..a678e7131 100644
 --- a/gcc/config/loongarch/sync.md
 +++ b/gcc/config/loongarch/sync.md
@@ -124,7 +124,7 @@
       return "ld.<size>\t%0,%1\\n\\t"
 	     "dbar\t0x14";
     case MEMMODEL_RELAXED:
 -      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1\\n\\t"
 +      return TARGET_LD_SEQ_SA ? "ld.<size>\t%0,%1"
 			      : "ld.<size>\t%0,%1\\n\\t"
 				"dbar\t0x700";
@@ -193,7 +193,7 @@
 		       (match_operand:SHORT 1 "reg_or_0_operand" "rJ"))
 	   (match_operand:SI 2 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
 -  "ISA_BASE_IS_LA64V110"
 +  "TARGET_LAM_BH"
   "amadd%A2.<amo>\t$zero,%z1,%0"
   [(set (attr "length") (const_int 4))])
@@ -230,7 +230,7 @@
 	  UNSPEC_SYNC_EXCHANGE))
    (set (match_dup 1)
 	(match_operand:SHORT 2 "register_operand" "r"))]
 -  "ISA_BASE_IS_LA64V110"
 +  "TARGET_LAM_BH"
   "amswap%A3.<amo>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
@@ -266,7 +266,7 @@
 			       (match_operand:QHWD 3 "reg_or_0_operand" "rJ")
 			       (match_operand:SI 4 "const_int_operand")]  ;; mod_s
 	 UNSPEC_COMPARE_AND_SWAP))]
 -  "ISA_BASE_IS_LA64V110"
 +  "TARGET_LAMCAS"
   "ori\t%0,%z2,0\n\tamcas%A4.<amo>\t%0,%z3,%1"
   [(set (attr "length") (const_int 8))])
@@ -296,7 +296,7 @@
   operands[6] = mod_s;
 -  if (ISA_BASE_IS_LA64V110)
 +  if (TARGET_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 							 operands[3], operands[4],
 							 operands[6]));
@@ -422,7 +422,7 @@
   operands[6] = mod_s;
 -  if (ISA_BASE_IS_LA64V110)
 +  if (TARGET_LAMCAS)
     emit_insn (gen_atomic_cas_value_strong<mode>_amcas (operands[1], operands[2],
 						       operands[3], operands[4],
 						       operands[6]));
@@ -642,7 +642,7 @@
 	(match_operand:SHORT 2 "register_operand"))]
   ""
 {
 -  if (ISA_BASE_IS_LA64V110)
 +  if (TARGET_LAM_BH)
     emit_insn (gen_atomic_exchange<mode>_short (operands[0], operands[1], operands[2], operands[3]));
   else
     {
@@ -663,7 +663,7 @@
 		     (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	 UNSPEC_SYNC_OLD_OP))]
 -  "ISA_BASE_IS_LA64V110"
 +  "TARGET_LAM_BH"
   "amadd%A3.<amo>\t%0,%z2,%1"
   [(set (attr "length") (const_int 4))])
@@ -678,7 +678,7 @@
 	 UNSPEC_SYNC_OLD_OP))]
   ""
 {
 -  if (ISA_BASE_IS_LA64V110)
 +  if (TARGET_LAM_BH)
     emit_insn (gen_atomic_fetch_add<mode>_short (operands[0], operands[1],
 					     operands[2], operands[3]));
   else
 -- 
 2.43.0
--- a/0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch
+++ b/0040-LoongArch-Fix-mexplict-relocs-none-mcmodel-medium-pr.patch
@ -0,0 +1,50 @@
 From 8ca46859ad70fb9473f6dbb1d3069e68ed43ef36 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 19 Nov 2023 01:41:12 +0800
 Subject: [PATCH 040/188] LoongArch: Fix "-mexplict-relocs=none
 -mcmodel=medium" producing %call36 when the assembler does not support it
 Even if !HAVE_AS_SUPPORT_CALL36, const_call_insn_operand should still
 return false when -mexplict-relocs=none -mcmodel=medium to make
 loongarch_legitimize_call_address emit la.local or la.global.
 gcc/ChangeLog:
 	* config/loongarch/predicates.md (const_call_insn_operand):
 	Remove buggy "HAVE_AS_SUPPORT_CALL36" conditions.  Change "1" to
 	"true" to make the coding style consistent.
 ---
 gcc/config/loongarch/predicates.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 2aae87db4..30a0dee9f 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -444,21 +444,19 @@
     case SYMBOL_PCREL:
       if (TARGET_CMODEL_EXTREME
 	  || (TARGET_CMODEL_MEDIUM
 -	      && HAVE_AS_SUPPORT_CALL36
 	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
 	return false;
       else
 -	return 1;
 +	return true;
     case SYMBOL_GOT_DISP:
       if (TARGET_CMODEL_EXTREME
 	  || !flag_plt
 	  || (flag_plt && TARGET_CMODEL_MEDIUM
 -	      && HAVE_AS_SUPPORT_CALL36
 	      && (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)))
 	return false;
       else
 -	return 1;
 +	return true;
     default:
       return false;
 -- 
 2.43.0
--- a/0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
+++ b/0041-LoongArch-Modify-MUSL_DYNAMIC_LINKER.patch
@ -0,0 +1,43 @@
 From 4c24f920e52c0dddf4bbbc391d2e5d2524754b4a Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Sat, 18 Nov 2023 11:04:42 +0800
 Subject: [PATCH 041/188] LoongArch: Modify MUSL_DYNAMIC_LINKER.
 Use no suffix at all in the musl dynamic linker name for hard
 float ABI. Use -sf and -sp suffixes in musl dynamic linker name
 for soft float and single precision ABIs. The following table
 outlines the musl interpreter names for the LoongArch64 ABI names.
 musl interpreter            | LoongArch64 ABI
 --------------------------- | -----------------
 ld-musl-loongarch64.so.1    | loongarch64-lp64d
 ld-musl-loongarch64-sp.so.1 | loongarch64-lp64f
 ld-musl-loongarch64-sf.so.1 | loongarch64-lp64s
 gcc/ChangeLog:
 	* config/loongarch/gnu-user.h (MUSL_ABI_SPEC): Modify suffix.
 ---
 gcc/config/loongarch/gnu-user.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
 index 60ef75601..9fc49dc8f 100644
 --- a/gcc/config/loongarch/gnu-user.h
 +++ b/gcc/config/loongarch/gnu-user.h
@@ -34,9 +34,9 @@ along with GCC; see the file COPYING3.  If not see
   "/lib" ABI_GRLEN_SPEC "/ld-linux-loongarch-" ABI_SPEC ".so.1"
 #define MUSL_ABI_SPEC \
 -  "%{mabi=lp64d:-lp64d}" \
 -  "%{mabi=lp64f:-lp64f}" \
 -  "%{mabi=lp64s:-lp64s}"
 +  "%{mabi=lp64d:}" \
 +  "%{mabi=lp64f:-sp}" \
 +  "%{mabi=lp64s:-sf}"
 #undef MUSL_DYNAMIC_LINKER
 #define MUSL_DYNAMIC_LINKER \
 -- 
 2.43.0
--- a/0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch
+++ b/0042-LoongArch-Fix-libgcc-build-failure-when-libc-is-not-.patch
@ -0,0 +1,85 @@
 From 0f65e5ebe60d9ad5141115661ed71c321156cd95 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 21 Nov 2023 09:09:25 +0800
 Subject: [PATCH 042/188] LoongArch: Fix libgcc build failure when libc is not
 available
 To use int64_t we included <stdint.h> in loongarch-def.h.
 Unfortunately, loongarch-def.h is also used by libgcc etc., causing a
 build failure when building a "stage1" cross compiler at which the
 target libc is not built yet.
 As int64_t is used for a C-compatible replacement of HOST_WIDE_INT, it's
 not directly or indirectly referred by the target libraries.  So
 guard everything requiring stdint.h with #if then they'll not block
 target libraries.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-def.h (stdint.h): Guard with #if to
 	exclude it for target libraries.
 	(loongarch_isa_base_features): Likewise.
 	(loongarch_isa): Likewise.
 	(loongarch_abi): Likewise.
 	(loongarch_target): Likewise.
 	(loongarch_cpu_default_isa): Likewise.
 ---
 gcc/config/loongarch/loongarch-def.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index ca0a324dd..ef848f606 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -46,7 +46,10 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef LOONGARCH_DEF_H
 #define LOONGARCH_DEF_H
 +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 #include <stdint.h>
 +#endif
 +
 #include "loongarch-tune.h"
 #ifdef __cplusplus
@@ -62,9 +65,11 @@ extern const char* loongarch_isa_base_strings[];
 #define ISA_BASE_LA64V110     1
 #define N_ISA_BASE_TYPES      2
 +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 /* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
    we cannot use the C++ header options.h in loongarch-def.c.  */
 extern int64_t loongarch_isa_base_features[];
 +#endif
 /* enum isa_ext_* */
 extern const char* loongarch_isa_ext_strings[];
@@ -121,6 +126,7 @@ extern const char* loongarch_cmodel_strings[];
 #define M_OPT_ABSENT(opt_enum)  ((opt_enum) == M_OPT_UNSET)
 +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 /* Internal representation of the target.  */
 struct loongarch_isa
 {
@@ -150,6 +156,9 @@ struct loongarch_target
   int cmodel;	    /* CMODEL_ */
 };
 +extern struct loongarch_isa loongarch_cpu_default_isa[];
 +#endif
 +
 /* CPU properties.  */
 /* index */
 #define CPU_NATIVE	  0
@@ -162,7 +171,6 @@ struct loongarch_target
 /* parallel tables.  */
 extern const char* loongarch_cpu_strings[];
 -extern struct loongarch_isa loongarch_cpu_default_isa[];
 extern int loongarch_cpu_issue_rate[];
 extern int loongarch_cpu_multipass_dfa_lookahead[];
 -- 
 2.43.0
--- a/0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
+++ b/0043-LoongArch-Optimize-LSX-vector-shuffle-on-floating-po.patch
@ -0,0 +1,148 @@
 From cdea7c114fa48012705d65134276619b5679fa35 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 19 Nov 2023 06:12:22 +0800
 Subject: [PATCH 043/188] LoongArch: Optimize LSX vector shuffle on
 floating-point vector
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
 The vec_perm expander was wrongly defined.  GCC internal says:
 Operand 3 is the “selector”.  It is an integral mode vector of the same
 width and number of elements as mode M.
 But we made operand 3 in the same mode as the shuffled vectors, so it
 would be a FP mode vector if the shuffled vectors are FP mode.
 With this mistake, the generic code manages to work around and it ends
 up creating some very nasty code for a simple __builtin_shuffle (a, b,
 c) where a and b are V4SF, c is V4SI:
    la.local    $r12,.LANCHOR0
    la.local    $r13,.LANCHOR1
    vld $vr1,$r12,48
    vslli.w $vr1,$vr1,2
    vld $vr2,$r12,16
    vld $vr0,$r13,0
    vld $vr3,$r13,16
    vshuf.b $vr0,$vr1,$vr1,$vr0
    vld $vr1,$r12,32
    vadd.b  $vr0,$vr0,$vr3
    vandi.b $vr0,$vr0,31
    vshuf.b $vr0,$vr1,$vr2,$vr0
    vst $vr0,$r12,0
    jr  $r1
 This is obviously stupid.  Fix the expander definition and adjust
 loongarch_expand_vec_perm to handle it correctly.
 gcc/ChangeLog:
 	* config/loongarch/lsx.md (vec_perm<mode:LSX>): Make the
 	selector VIMODE.
 	* config/loongarch/loongarch.cc (loongarch_expand_vec_perm):
 	Use the mode of the selector (instead of the shuffled vector)
 	for truncating it.  Operate on subregs in the selector mode if
 	the shuffled vector has a different mode (i. e. it's a
 	floating-point vector).
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-shuf-fp.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc              | 18 ++++++++++--------
 gcc/config/loongarch/lsx.md                    |  2 +-
 .../gcc.target/loongarch/vect-shuf-fp.c        | 16 ++++++++++++++++
 3 files changed, 27 insertions(+), 9 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 33d23a731..d95ac68e8 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -8603,8 +8603,9 @@ void
 loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
 {
   machine_mode vmode = GET_MODE (target);
 +  machine_mode vimode = GET_MODE (sel);
   auto nelt = GET_MODE_NUNITS (vmode);
 -  auto round_reg = gen_reg_rtx (vmode);
 +  auto round_reg = gen_reg_rtx (vimode);
   rtx round_data[MAX_VECT_LEN];
   for (int i = 0; i < nelt; i += 1)
@@ -8612,9 +8613,16 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
       round_data[i] = GEN_INT (0x1f);
     }
 -  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, round_data));
 +  rtx round_data_rtx = gen_rtx_CONST_VECTOR (vimode, gen_rtvec_v (nelt, round_data));
   emit_move_insn (round_reg, round_data_rtx);
 +  if (vmode != vimode)
 +    {
 +      target = lowpart_subreg (vimode, target, vmode);
 +      op0 = lowpart_subreg (vimode, op0, vmode);
 +      op1 = lowpart_subreg (vimode, op1, vmode);
 +    }
 +
   switch (vmode)
     {
     case E_V16QImode:
@@ -8622,17 +8630,11 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
       emit_insn (gen_lsx_vshuf_b (target, op1, op0, sel));
       break;
     case E_V2DFmode:
 -      emit_insn (gen_andv2di3 (sel, sel, round_reg));
 -      emit_insn (gen_lsx_vshuf_d_f (target, sel, op1, op0));
 -      break;
     case E_V2DImode:
       emit_insn (gen_andv2di3 (sel, sel, round_reg));
       emit_insn (gen_lsx_vshuf_d (target, sel, op1, op0));
       break;
     case E_V4SFmode:
 -      emit_insn (gen_andv4si3 (sel, sel, round_reg));
 -      emit_insn (gen_lsx_vshuf_w_f (target, sel, op1, op0));
 -      break;
     case E_V4SImode:
       emit_insn (gen_andv4si3 (sel, sel, round_reg));
       emit_insn (gen_lsx_vshuf_w (target, sel, op1, op0));
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 8ea41c85b..5e8d8d74b 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -837,7 +837,7 @@
  [(match_operand:LSX 0 "register_operand")
   (match_operand:LSX 1 "register_operand")
   (match_operand:LSX 2 "register_operand")
 -  (match_operand:LSX 3 "register_operand")]
 +  (match_operand:<VIMODE> 3 "register_operand")]
   "ISA_HAS_LSX"
 {
   loongarch_expand_vec_perm (operands[0], operands[1],
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
 new file mode 100644
 index 000000000..7acc2113a
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-shuf-fp.c
@@ -0,0 +1,16 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mlasx -O3" } */
 +/* { dg-final { scan-assembler "vshuf\.w" } } */
 +
 +#define V __attribute__ ((vector_size (16)))
 +
 +int a V;
 +float b V;
 +float c V;
 +float d V;
 +
 +void
 +test (void)
 +{
 +  d = __builtin_shuffle (b, c, a);
 +}
 -- 
 2.43.0
--- a/0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
+++ b/0044-LoongArch-Optimize-the-loading-of-immediate-numbers-.patch
@ -0,0 +1,112 @@
 From aaf58efe8414a4eaceb6721d9c242df710d1762c Mon Sep 17 00:00:00 2001
 From: Guo Jie <guojie@loongson.cn>
 Date: Thu, 23 Nov 2023 11:04:17 +0800
 Subject: [PATCH 044/188] LoongArch: Optimize the loading of immediate numbers
 with the same high and low 32-bit values
 For the following immediate load operation in gcc/testsuite/gcc.target/loongarch/imm-load1.c:
 	long long r = 0x0101010101010101;
 Before this patch:
 	lu12i.w	    $r15,16842752>>12
 	ori	    $r15,$r15,257
 	lu32i.d	    $r15,0x1010100000000>>32
 	lu52i.d	    $r15,$r15,0x100000000000000>>52
 After this patch:
 	lu12i.w     $r15,16842752>>12
 	ori         $r15,$r15,257
 	bstrins.d   $r15,$r15,63,32
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc
 	(enum loongarch_load_imm_method): Add new method.
 	(loongarch_build_integer): Add relevant implementations for
 	new method.
 	(loongarch_move_integer): Ditto.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/imm-load1.c: Change old check.
 ---
 gcc/config/loongarch/loongarch.cc             | 22 ++++++++++++++++++-
 .../gcc.target/loongarch/imm-load1.c          |  3 ++-
 2 files changed, 23 insertions(+), 2 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index d95ac68e8..048d3802b 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -142,12 +142,16 @@ struct loongarch_address_info
    METHOD_LU52I:
      Load 52-63 bit of the immediate number.
 +
 +   METHOD_MIRROR:
 +     Copy 0-31 bit of the immediate number to 32-63bit.
 */
 enum loongarch_load_imm_method
 {
   METHOD_NORMAL,
   METHOD_LU32I,
 -  METHOD_LU52I
 +  METHOD_LU52I,
 +  METHOD_MIRROR
 };
 struct loongarch_integer_op
@@ -1553,11 +1557,23 @@ loongarch_build_integer (struct loongarch_integer_op *codes,
       int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31;
       int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51;
 +
 +      uint32_t hival = (uint32_t) (value >> 32);
 +      uint32_t loval = (uint32_t) value;
 +
       /* Determine whether the upper 32 bits are sign-extended from the lower
 	 32 bits. If it is, the instructions to load the high order can be
 	 ommitted.  */
       if (lu32i[sign31] && lu52i[sign31])
 	return cost;
 +      /* If the lower 32 bits are the same as the upper 32 bits, just copy
 +	 the lower 32 bits to the upper 32 bits.  */
 +      else if (loval == hival)
 +	{
 +	  codes[cost].method = METHOD_MIRROR;
 +	  codes[cost].curr_value = value;
 +	  return cost + 1;
 +	}
       /* Determine whether bits 32-51 are sign-extended from the lower 32
 	 bits. If so, directly load 52-63 bits.  */
       else if (lu32i[sign31])
@@ -3230,6 +3246,10 @@ loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value)
 			   gen_rtx_AND (DImode, x, GEN_INT (0xfffffffffffff)),
 			   GEN_INT (codes[i].value));
 	  break;
 +	case METHOD_MIRROR:
 +	  gcc_assert (mode == DImode);
 +	  emit_insn (gen_insvdi (x, GEN_INT (32), GEN_INT (32), x));
 +	  break;
 	default:
 	  gcc_unreachable ();
 	}
 diff --git a/gcc/testsuite/gcc.target/loongarch/imm-load1.c b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
 index 2ff029712..f64cc2956 100644
 --- a/gcc/testsuite/gcc.target/loongarch/imm-load1.c
 +++ b/gcc/testsuite/gcc.target/loongarch/imm-load1.c
@@ -1,6 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mabi=lp64d -O2" } */
 -/* { dg-final { scan-assembler "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
 +/* { dg-final { scan-assembler-not "test:.*lu52i\.d.*\n\taddi\.w.*\n\.L2:" } } */
 +/* { dg-final { scan-assembler "test:.*lu12i\.w.*\n\tbstrins\.d.*\n\.L2:" } } */
 extern long long b[10];
 -- 
 2.43.0
--- a/0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch
+++ b/0045-LoongArch-Fix-runtime-error-in-a-gcc-build-with-with.patch
@ -0,0 +1,30 @@
 From fa28ce4ac91691595e14838be49c9dd42b153b7f Mon Sep 17 00:00:00 2001
 From: Guo Jie <guojie@loongson.cn>
 Date: Thu, 23 Nov 2023 11:05:56 +0800
 Subject: [PATCH 045/188] LoongArch: Fix runtime error in a gcc build with
 --with-build-config=bootstrap-ubsan
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_split_plus_constant):
 	avoid left shift of negative value -0x8000.
 ---
 gcc/config/loongarch/loongarch.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 048d3802b..ecceca22d 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -4265,7 +4265,7 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
   else if (loongarch_addu16i_imm12_operand_p (v, mode))
     a = (v & ~HWIT_UC_0xFFF) + ((v & 0x800) << 1);
   else if (mode == DImode && DUAL_ADDU16I_OPERAND (v))
 -    a = (v > 0 ? 0x7fff : -0x8000) << 16;
 +    a = (v > 0 ? 0x7fff0000 : ~0x7fffffff);
   else
     gcc_unreachable ();
 -- 
 2.43.0
--- a/0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
+++ b/0046-LoongArch-Fix-usage-of-LSX-and-LASX-frint-ftint-inst.patch
--- a/0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
+++ b/0047-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
@ -0,0 +1,268 @@
 From 4c13256ea34b4169ceb3f9c7826843b754c6a6e0 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 19 Nov 2023 16:28:59 +0800
 Subject: [PATCH 047/188] LoongArch: Use standard pattern name and RTX code for
 LSX/LASX muh instructions
 Removes unnecessary UNSPECs and make the muh instructions useful with
 GNU vectors or auto vectorization.
 gcc/ChangeLog:
 	* config/loongarch/simd.md (muh): New code attribute mapping
 	any_extend to smul_highpart or umul_highpart.
 	(<su>mul<mode>3_highpart): New define_insn.
 	* config/loongarch/lsx.md (UNSPEC_LSX_VMUH_S): Remove.
 	(UNSPEC_LSX_VMUH_U): Remove.
 	(lsx_vmuh_s_<lsxfmt>): Remove.
 	(lsx_vmuh_u_<lsxfmt>): Remove.
 	* config/loongarch/lasx.md (UNSPEC_LASX_XVMUH_S): Remove.
 	(UNSPEC_LASX_XVMUH_U): Remove.
 	(lasx_xvmuh_s_<lasxfmt>): Remove.
 	(lasx_xvmuh_u_<lasxfmt>): Remove.
 	* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vmuh_b):
 	Redefine to standard pattern name.
 	(CODE_FOR_lsx_vmuh_h): Likewise.
 	(CODE_FOR_lsx_vmuh_w): Likewise.
 	(CODE_FOR_lsx_vmuh_d): Likewise.
 	(CODE_FOR_lsx_vmuh_bu): Likewise.
 	(CODE_FOR_lsx_vmuh_hu): Likewise.
 	(CODE_FOR_lsx_vmuh_wu): Likewise.
 	(CODE_FOR_lsx_vmuh_du): Likewise.
 	(CODE_FOR_lasx_xvmuh_b): Likewise.
 	(CODE_FOR_lasx_xvmuh_h): Likewise.
 	(CODE_FOR_lasx_xvmuh_w): Likewise.
 	(CODE_FOR_lasx_xvmuh_d): Likewise.
 	(CODE_FOR_lasx_xvmuh_bu): Likewise.
 	(CODE_FOR_lasx_xvmuh_hu): Likewise.
 	(CODE_FOR_lasx_xvmuh_wu): Likewise.
 	(CODE_FOR_lasx_xvmuh_du): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-muh.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 22 ------------
 gcc/config/loongarch/loongarch-builtins.cc    | 32 ++++++++---------
 gcc/config/loongarch/lsx.md                   | 22 ------------
 gcc/config/loongarch/simd.md                  | 16 +++++++++
 gcc/testsuite/gcc.target/loongarch/vect-muh.c | 36 +++++++++++++++++++
 5 files changed, 68 insertions(+), 60 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-muh.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index d4a56c307..023a023b4 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -68,8 +68,6 @@
   UNSPEC_LASX_BRANCH
   UNSPEC_LASX_BRANCH_V
 -  UNSPEC_LASX_XVMUH_S
 -  UNSPEC_LASX_XVMUH_U
   UNSPEC_LASX_MXVEXTW_U
   UNSPEC_LASX_XVSLLWIL_S
   UNSPEC_LASX_XVSLLWIL_U
@@ -2823,26 +2821,6 @@
   [(set_attr "type" "simd_logic")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lasx_xvmuh_s_<lasxfmt>"
 -  [(set (match_operand:ILASX 0 "register_operand" "=f")
 -	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
 -		       (match_operand:ILASX 2 "register_operand" "f")]
 -		      UNSPEC_LASX_XVMUH_S))]
 -  "ISA_HAS_LASX"
 -  "xvmuh.<lasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_insn "lasx_xvmuh_u_<lasxfmt_u>"
 -  [(set (match_operand:ILASX 0 "register_operand" "=f")
 -	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
 -		       (match_operand:ILASX 2 "register_operand" "f")]
 -		      UNSPEC_LASX_XVMUH_U))]
 -  "ISA_HAS_LASX"
 -  "xvmuh.<lasxfmt_u>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_insn "lasx_xvsllwil_s_<dlasxfmt>_<lasxfmt>"
   [(set (match_operand:<VDMODE256> 0 "register_operand" "=f")
 	(unspec:<VDMODE256> [(match_operand:ILASX_WHB 1 "register_operand" "f")
 diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
 index fb458feac..41ea357cf 100644
 --- a/gcc/config/loongarch/loongarch-builtins.cc
 +++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -319,6 +319,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lsx_vmod_hu CODE_FOR_umodv8hi3
 #define CODE_FOR_lsx_vmod_wu CODE_FOR_umodv4si3
 #define CODE_FOR_lsx_vmod_du CODE_FOR_umodv2di3
 +#define CODE_FOR_lsx_vmuh_b CODE_FOR_smulv16qi3_highpart
 +#define CODE_FOR_lsx_vmuh_h CODE_FOR_smulv8hi3_highpart
 +#define CODE_FOR_lsx_vmuh_w CODE_FOR_smulv4si3_highpart
 +#define CODE_FOR_lsx_vmuh_d CODE_FOR_smulv2di3_highpart
 +#define CODE_FOR_lsx_vmuh_bu CODE_FOR_umulv16qi3_highpart
 +#define CODE_FOR_lsx_vmuh_hu CODE_FOR_umulv8hi3_highpart
 +#define CODE_FOR_lsx_vmuh_wu CODE_FOR_umulv4si3_highpart
 +#define CODE_FOR_lsx_vmuh_du CODE_FOR_umulv2di3_highpart
 #define CODE_FOR_lsx_vmul_b CODE_FOR_mulv16qi3
 #define CODE_FOR_lsx_vmul_h CODE_FOR_mulv8hi3
 #define CODE_FOR_lsx_vmul_w CODE_FOR_mulv4si3
@@ -439,14 +447,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lsx_vfnmsub_s CODE_FOR_vfnmsubv4sf4_nmsub4
 #define CODE_FOR_lsx_vfnmsub_d CODE_FOR_vfnmsubv2df4_nmsub4
 -#define CODE_FOR_lsx_vmuh_b CODE_FOR_lsx_vmuh_s_b
 -#define CODE_FOR_lsx_vmuh_h CODE_FOR_lsx_vmuh_s_h
 -#define CODE_FOR_lsx_vmuh_w CODE_FOR_lsx_vmuh_s_w
 -#define CODE_FOR_lsx_vmuh_d CODE_FOR_lsx_vmuh_s_d
 -#define CODE_FOR_lsx_vmuh_bu CODE_FOR_lsx_vmuh_u_bu
 -#define CODE_FOR_lsx_vmuh_hu CODE_FOR_lsx_vmuh_u_hu
 -#define CODE_FOR_lsx_vmuh_wu CODE_FOR_lsx_vmuh_u_wu
 -#define CODE_FOR_lsx_vmuh_du CODE_FOR_lsx_vmuh_u_du
 #define CODE_FOR_lsx_vsllwil_h_b CODE_FOR_lsx_vsllwil_s_h_b
 #define CODE_FOR_lsx_vsllwil_w_h CODE_FOR_lsx_vsllwil_s_w_h
 #define CODE_FOR_lsx_vsllwil_d_w CODE_FOR_lsx_vsllwil_s_d_w
@@ -588,6 +588,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lasx_xvmul_h CODE_FOR_mulv16hi3
 #define CODE_FOR_lasx_xvmul_w CODE_FOR_mulv8si3
 #define CODE_FOR_lasx_xvmul_d CODE_FOR_mulv4di3
 +#define CODE_FOR_lasx_xvmuh_b CODE_FOR_smulv32qi3_highpart
 +#define CODE_FOR_lasx_xvmuh_h CODE_FOR_smulv16hi3_highpart
 +#define CODE_FOR_lasx_xvmuh_w CODE_FOR_smulv8si3_highpart
 +#define CODE_FOR_lasx_xvmuh_d CODE_FOR_smulv4di3_highpart
 +#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_umulv32qi3_highpart
 +#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_umulv16hi3_highpart
 +#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_umulv8si3_highpart
 +#define CODE_FOR_lasx_xvmuh_du CODE_FOR_umulv4di3_highpart
 #define CODE_FOR_lasx_xvclz_b CODE_FOR_clzv32qi2
 #define CODE_FOR_lasx_xvclz_h CODE_FOR_clzv16hi2
 #define CODE_FOR_lasx_xvclz_w CODE_FOR_clzv8si2
@@ -697,14 +705,6 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lasx_xvavgr_hu CODE_FOR_lasx_xvavgr_u_hu
 #define CODE_FOR_lasx_xvavgr_wu CODE_FOR_lasx_xvavgr_u_wu
 #define CODE_FOR_lasx_xvavgr_du CODE_FOR_lasx_xvavgr_u_du
 -#define CODE_FOR_lasx_xvmuh_b CODE_FOR_lasx_xvmuh_s_b
 -#define CODE_FOR_lasx_xvmuh_h CODE_FOR_lasx_xvmuh_s_h
 -#define CODE_FOR_lasx_xvmuh_w CODE_FOR_lasx_xvmuh_s_w
 -#define CODE_FOR_lasx_xvmuh_d CODE_FOR_lasx_xvmuh_s_d
 -#define CODE_FOR_lasx_xvmuh_bu CODE_FOR_lasx_xvmuh_u_bu
 -#define CODE_FOR_lasx_xvmuh_hu CODE_FOR_lasx_xvmuh_u_hu
 -#define CODE_FOR_lasx_xvmuh_wu CODE_FOR_lasx_xvmuh_u_wu
 -#define CODE_FOR_lasx_xvmuh_du CODE_FOR_lasx_xvmuh_u_du
 #define CODE_FOR_lasx_xvssran_b_h CODE_FOR_lasx_xvssran_s_b_h
 #define CODE_FOR_lasx_xvssran_h_w CODE_FOR_lasx_xvssran_s_h_w
 #define CODE_FOR_lasx_xvssran_w_d CODE_FOR_lasx_xvssran_s_w_d
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index c1c3719e3..537afaf96 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -64,8 +64,6 @@
   UNSPEC_LSX_VSRLR
   UNSPEC_LSX_VSRLRI
   UNSPEC_LSX_VSHUF
 -  UNSPEC_LSX_VMUH_S
 -  UNSPEC_LSX_VMUH_U
   UNSPEC_LSX_VEXTW_S
   UNSPEC_LSX_VEXTW_U
   UNSPEC_LSX_VSLLWIL_S
@@ -2506,26 +2504,6 @@
   [(set_attr "type" "simd_logic")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lsx_vmuh_s_<lsxfmt>"
 -  [(set (match_operand:ILSX 0 "register_operand" "=f")
 -	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
 -		      (match_operand:ILSX 2 "register_operand" "f")]
 -		     UNSPEC_LSX_VMUH_S))]
 -  "ISA_HAS_LSX"
 -  "vmuh.<lsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_insn "lsx_vmuh_u_<lsxfmt_u>"
 -  [(set (match_operand:ILSX 0 "register_operand" "=f")
 -	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
 -		      (match_operand:ILSX 2 "register_operand" "f")]
 -		     UNSPEC_LSX_VMUH_U))]
 -  "ISA_HAS_LSX"
 -  "vmuh.<lsxfmt_u>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_insn "lsx_vextw_s_d"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(unspec:V2DI [(match_operand:V4SI 1 "register_operand" "f")]
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index 27d1ffecd..a0e8db3c0 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -206,6 +206,22 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "<MODE>")])
 +;; <x>vmuh.{b/h/w/d}
 +
 +(define_code_attr muh
 +  [(sign_extend "smul_highpart")
 +   (zero_extend "umul_highpart")])
 +
 +(define_insn "<su>mul<mode>3_highpart"
 +  [(set (match_operand:IVEC 0 "register_operand" "=f")
 +	(<muh>:IVEC (match_operand:IVEC 1 "register_operand" "f")
 +		    (match_operand:IVEC 2 "register_operand" "f")))
 +   (any_extend (const_int 0))]
 +  ""
 +  "<x>vmuh.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_int_arith")
 +   (set_attr "mode" "<MODE>")])
 +
 ; The LoongArch SX Instructions.
 (include "lsx.md")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-muh.c b/gcc/testsuite/gcc.target/loongarch/vect-muh.c
 new file mode 100644
 index 000000000..a788840b2
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-muh.c
@@ -0,0 +1,36 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mlasx -O3" } */
 +/* { dg-final { scan-assembler "\tvmuh\.w\t" } } */
 +/* { dg-final { scan-assembler "\tvmuh\.wu\t" } } */
 +/* { dg-final { scan-assembler "\txvmuh\.w\t" } } */
 +/* { dg-final { scan-assembler "\txvmuh\.wu\t" } } */
 +
 +int a[8], b[8], c[8];
 +
 +void
 +test1 (void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    c[i] = ((long)a[i] * (long)b[i]) >> 32;
 +}
 +
 +void
 +test2 (void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32;
 +}
 +
 +void
 +test3 (void)
 +{
 +  for (int i = 0; i < 8; i++)
 +    c[i] = ((long)a[i] * (long)b[i]) >> 32;
 +}
 +
 +void
 +test4 (void)
 +{
 +  for (int i = 0; i < 8; i++)
 +    c[i] = ((long)(unsigned)a[i] * (long)(unsigned)b[i]) >> 32;
 +}
 -- 
 2.43.0
--- a/0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
+++ b/0048-LoongArch-Use-standard-pattern-name-and-RTX-code-for.patch
@ -0,0 +1,285 @@
 From 9dde2178e64893e4c46b1c375a658f8ab6d34fdd Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 19 Nov 2023 17:28:06 +0800
 Subject: [PATCH 048/188] LoongArch: Use standard pattern name and RTX code for
 LSX/LASX rotate shift
 Remove unnecessary UNSPECs and make the [x]vrotr[i] instructions useful
 with GNU vectors and auto vectorization.
 gcc/ChangeLog:
 	* config/loongarch/lsx.md (bitimm): Move to ...
 	(UNSPEC_LSX_VROTR): Remove.
 	(lsx_vrotr_<lsxfmt>): Remove.
 	(lsx_vrotri_<lsxfmt>): Remove.
 	* config/loongarch/lasx.md (UNSPEC_LASX_XVROTR): Remove.
 	(lsx_vrotr_<lsxfmt>): Remove.
 	(lsx_vrotri_<lsxfmt>): Remove.
 	* config/loongarch/simd.md (bitimm): ... here.  Expand it to
 	cover LASX modes.
 	(vrotr<mode>3): New define_insn.
 	(vrotri<mode>3): New define_insn.
 	* config/loongarch/loongarch-builtins.cc:
 	(CODE_FOR_lsx_vrotr_b): Use standard pattern name.
 	(CODE_FOR_lsx_vrotr_h): Likewise.
 	(CODE_FOR_lsx_vrotr_w): Likewise.
 	(CODE_FOR_lsx_vrotr_d): Likewise.
 	(CODE_FOR_lasx_xvrotr_b): Likewise.
 	(CODE_FOR_lasx_xvrotr_h): Likewise.
 	(CODE_FOR_lasx_xvrotr_w): Likewise.
 	(CODE_FOR_lasx_xvrotr_d): Likewise.
 	(CODE_FOR_lsx_vrotri_b): Define to standard pattern name.
 	(CODE_FOR_lsx_vrotri_h): Likewise.
 	(CODE_FOR_lsx_vrotri_w): Likewise.
 	(CODE_FOR_lsx_vrotri_d): Likewise.
 	(CODE_FOR_lasx_xvrotri_b): Likewise.
 	(CODE_FOR_lasx_xvrotri_h): Likewise.
 	(CODE_FOR_lasx_xvrotri_w): Likewise.
 	(CODE_FOR_lasx_xvrotri_d): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-rotr.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 22 ------------
 gcc/config/loongarch/loongarch-builtins.cc    | 16 +++++++++
 gcc/config/loongarch/lsx.md                   | 28 ---------------
 gcc/config/loongarch/simd.md                  | 29 +++++++++++++++
 .../gcc.target/loongarch/vect-rotr.c          | 36 +++++++++++++++++++
 5 files changed, 81 insertions(+), 50 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-rotr.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 023a023b4..116b30c07 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -138,7 +138,6 @@
   UNSPEC_LASX_XVHSUBW_Q_D
   UNSPEC_LASX_XVHADDW_QU_DU
   UNSPEC_LASX_XVHSUBW_QU_DU
 -  UNSPEC_LASX_XVROTR
   UNSPEC_LASX_XVADD_Q
   UNSPEC_LASX_XVSUB_Q
   UNSPEC_LASX_XVREPLVE
@@ -4232,18 +4231,6 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V4DI")])
 -;;XVROTR.B   XVROTR.H   XVROTR.W   XVROTR.D
 -;;TODO-478
 -(define_insn "lasx_xvrotr_<lasxfmt>"
 -  [(set (match_operand:ILASX 0 "register_operand" "=f")
 -	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
 -		       (match_operand:ILASX 2 "register_operand" "f")]
 -		      UNSPEC_LASX_XVROTR))]
 -  "ISA_HAS_LASX"
 -  "xvrotr.<lasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 ;;XVADD.Q
 ;;TODO2
 (define_insn "lasx_xvadd_q"
@@ -4426,15 +4413,6 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V4DI")])
 -(define_insn "lasx_xvrotri_<lasxfmt>"
 -  [(set (match_operand:ILASX 0 "register_operand" "=f")
 -	(rotatert:ILASX (match_operand:ILASX 1 "register_operand" "f")
 -		       (match_operand 2 "const_<bitimm256>_operand" "")))]
 -  "ISA_HAS_LASX"
 -  "xvrotri.<lasxfmt>\t%u0,%u1,%2"
 -  [(set_attr "type" "simd_shf")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_insn "lasx_xvextl_q_d"
   [(set (match_operand:V4DI 0 "register_operand" "=f")
 	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")]
 diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
 index 41ea357cf..f4523c8bf 100644
 --- a/gcc/config/loongarch/loongarch-builtins.cc
 +++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -369,6 +369,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lsx_vsrli_h CODE_FOR_vlshrv8hi3
 #define CODE_FOR_lsx_vsrli_w CODE_FOR_vlshrv4si3
 #define CODE_FOR_lsx_vsrli_d CODE_FOR_vlshrv2di3
 +#define CODE_FOR_lsx_vrotr_b CODE_FOR_vrotrv16qi3
 +#define CODE_FOR_lsx_vrotr_h CODE_FOR_vrotrv8hi3
 +#define CODE_FOR_lsx_vrotr_w CODE_FOR_vrotrv4si3
 +#define CODE_FOR_lsx_vrotr_d CODE_FOR_vrotrv2di3
 +#define CODE_FOR_lsx_vrotri_b CODE_FOR_rotrv16qi3
 +#define CODE_FOR_lsx_vrotri_h CODE_FOR_rotrv8hi3
 +#define CODE_FOR_lsx_vrotri_w CODE_FOR_rotrv4si3
 +#define CODE_FOR_lsx_vrotri_d CODE_FOR_rotrv2di3
 #define CODE_FOR_lsx_vsub_b CODE_FOR_subv16qi3
 #define CODE_FOR_lsx_vsub_h CODE_FOR_subv8hi3
 #define CODE_FOR_lsx_vsub_w CODE_FOR_subv4si3
@@ -634,6 +642,14 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
 #define CODE_FOR_lasx_xvsrli_h CODE_FOR_vlshrv16hi3
 #define CODE_FOR_lasx_xvsrli_w CODE_FOR_vlshrv8si3
 #define CODE_FOR_lasx_xvsrli_d CODE_FOR_vlshrv4di3
 +#define CODE_FOR_lasx_xvrotr_b CODE_FOR_vrotrv32qi3
 +#define CODE_FOR_lasx_xvrotr_h CODE_FOR_vrotrv16hi3
 +#define CODE_FOR_lasx_xvrotr_w CODE_FOR_vrotrv8si3
 +#define CODE_FOR_lasx_xvrotr_d CODE_FOR_vrotrv4di3
 +#define CODE_FOR_lasx_xvrotri_b CODE_FOR_rotrv32qi3
 +#define CODE_FOR_lasx_xvrotri_h CODE_FOR_rotrv16hi3
 +#define CODE_FOR_lasx_xvrotri_w CODE_FOR_rotrv8si3
 +#define CODE_FOR_lasx_xvrotri_d CODE_FOR_rotrv4di3
 #define CODE_FOR_lasx_xvsub_b CODE_FOR_subv32qi3
 #define CODE_FOR_lasx_xvsub_h CODE_FOR_subv16hi3
 #define CODE_FOR_lasx_xvsub_w CODE_FOR_subv8si3
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 537afaf96..232399934 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -141,7 +141,6 @@
   UNSPEC_LSX_VMADDWOD
   UNSPEC_LSX_VMADDWOD2
   UNSPEC_LSX_VMADDWOD3
 -  UNSPEC_LSX_VROTR
   UNSPEC_LSX_VADD_Q
   UNSPEC_LSX_VSUB_Q
   UNSPEC_LSX_VEXTH_Q_D
@@ -363,14 +362,6 @@
    (V8HI "exp_8")
    (V16QI "exp_16")])
 -;; This attribute is used to form an immediate operand constraint using
 -;; "const_<bitimm>_operand".
 -(define_mode_attr bitimm
 -  [(V16QI "uimm3")
 -   (V8HI  "uimm4")
 -   (V4SI  "uimm5")
 -   (V2DI  "uimm6")])
 -
 (define_expand "vec_init<mode><unitmode>"
   [(match_operand:LSX 0 "register_operand")
    (match_operand:LSX 1 "")]
@@ -4152,16 +4143,6 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V2DI")])
 -(define_insn "lsx_vrotr_<lsxfmt>"
 -  [(set (match_operand:ILSX 0 "register_operand" "=f")
 -	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
 -		      (match_operand:ILSX 2 "register_operand" "f")]
 -		     UNSPEC_LSX_VROTR))]
 -  "ISA_HAS_LSX"
 -  "vrotr.<lsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_int_arith")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_insn "lsx_vadd_q"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
@@ -4255,15 +4236,6 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V2DI")])
 -(define_insn "lsx_vrotri_<lsxfmt>"
 -  [(set (match_operand:ILSX 0 "register_operand" "=f")
 -	(rotatert:ILSX (match_operand:ILSX 1 "register_operand" "f")
 -		      (match_operand 2 "const_<bitimm>_operand" "")))]
 -  "ISA_HAS_LSX"
 -  "vrotri.<lsxfmt>\t%w0,%w1,%2"
 -  [(set_attr "type" "simd_shf")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_insn "lsx_vextl_q_d"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")]
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index a0e8db3c0..4ecf7a55e 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -91,6 +91,13 @@
 			   (V8HI "16") (V16HI "16")
 			   (V16QI "8") (V32QI "8")])
 +;; This attribute is used to form an immediate operand constraint using
 +;; "const_<bitimm>_operand".
 +(define_mode_attr bitimm [(V16QI "uimm3") (V32QI "uimm3")
 +			  (V8HI  "uimm4") (V16HI "uimm4")
 +			  (V4SI  "uimm5") (V8SI "uimm5")
 +			  (V2DI  "uimm6") (V4DI "uimm6")])
 +
 ;; =======================================================================
 ;; For many LASX instructions, the only difference of it from the LSX
 ;; counterpart is the length of vector operands.  Describe these LSX/LASX
@@ -222,6 +229,28 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 +;; <x>vrotr.{b/h/w/d}
 +
 +(define_insn "vrotr<mode>3"
 +  [(set (match_operand:IVEC 0 "register_operand" "=f")
 +	(rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
 +		       (match_operand:IVEC 2 "register_operand" "f")))]
 +  ""
 +  "<x>vrotr.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_int_arith")
 +   (set_attr "mode" "<MODE>")])
 +
 +;; <x>vrotri.{b/h/w/d}
 +
 +(define_insn "rotr<mode>3"
 +  [(set (match_operand:IVEC 0 "register_operand" "=f")
 +	(rotatert:IVEC (match_operand:IVEC 1 "register_operand" "f")
 +		       (match_operand:SI 2 "const_<bitimm>_operand")))]
 +  ""
 +  "<x>vrotri.<simdfmt>\t%<wu>0,%<wu>1,%2";
 +  [(set_attr "type" "simd_int_arith")
 +   (set_attr "mode" "<MODE>")])
 +
 ; The LoongArch SX Instructions.
 (include "lsx.md")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-rotr.c b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c
 new file mode 100644
 index 000000000..733c36334
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-rotr.c
@@ -0,0 +1,36 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx" } */
 +/* { dg-final { scan-assembler "\tvrotr\.w\t" } } */
 +/* { dg-final { scan-assembler "\txvrotr\.w\t" } } */
 +/* { dg-final { scan-assembler "\tvrotri\.w\t\[^\n\]*7\n" } } */
 +/* { dg-final { scan-assembler "\txvrotri\.w\t\[^\n\]*7\n" } } */
 +
 +unsigned int a[8], b[8];
 +
 +void
 +test1 (void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    a[i] = a[i] >> b[i] | a[i] << (32 - b[i]);
 +}
 +
 +void
 +test2 (void)
 +{
 +  for (int i = 0; i < 8; i++)
 +    a[i] = a[i] >> b[i] | a[i] << (32 - b[i]);
 +}
 +
 +void
 +test3 (void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    a[i] = a[i] >> 7 | a[i] << 25;
 +}
 +
 +void
 +test4 (void)
 +{
 +  for (int i = 0; i < 8; i++)
 +    a[i] = a[i] >> 7 | a[i] << 25;
 +}
 -- 
 2.43.0
--- a/0049-LoongArch-Remove-lrint_allow_inexact.patch
+++ b/0049-LoongArch-Remove-lrint_allow_inexact.patch
@ -0,0 +1,42 @@
 From c898e4a85c04a72f08db9ba2a454130f15f6f280 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 20 Nov 2023 01:34:26 +0800
 Subject: [PATCH 049/188] LoongArch: Remove lrint_allow_inexact
 No functional change, just a cleanup.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (lrint_allow_inexact): Remove.
 	(<lrint_pattern><ANYF:mode><ANYFI:mode>2): Check if <LRINT>
 	== UNSPEC_FTINT instead of <lrint_allow_inexact>.
 ---
 gcc/config/loongarch/loongarch.md | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index d1c766cbf..11577f407 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -585,9 +585,6 @@
 (define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
 				    (UNSPEC_FTINTRM "rm")
 				    (UNSPEC_FTINTRP "rp")])
 -(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
 -				      (UNSPEC_FTINTRM "0")
 -				      (UNSPEC_FTINTRP "0")])
 ;; Iterator and attributes for bytepick.d
 (define_int_iterator bytepick_w_ashift_amount [8 16 24])
@@ -2384,7 +2381,7 @@
 	(unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
 		      LRINT))]
   "TARGET_HARD_FLOAT &&
 -   (<lrint_allow_inexact>
 +   (<LRINT> == UNSPEC_FTINT
     || flag_fp_int_builtin_inexact
     || !flag_trapping_math)"
   "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
 -- 
 2.43.0
--- a/0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
+++ b/0050-LoongArch-Use-LSX-for-scalar-FP-rounding-with-explic.patch
@ -0,0 +1,150 @@
 From 05fafb78b301ce9a545e0dad896b19339f716eaf Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Mon, 20 Nov 2023 03:51:56 +0800
 Subject: [PATCH 050/188] LoongArch: Use LSX for scalar FP rounding with
 explicit rounding mode
 In LoongArch FP base ISA there is only the frint.{s/d} instruction which
 reads the global rounding mode.  Utilize LSX for explicit rounding mode
 even if the operand is scalar.  It seems wasting the CPU power, but
 still much faster than calling the library function.
 gcc/ChangeLog:
 	* config/loongarch/simd.md (LSX_SCALAR_FRINT): New int iterator.
 	(VLSX_FOR_FMODE): New mode attribute.
 	(<simd_for_scalar_frint_pattern><mode>2): New expander,
 	expanding to vreplvei.{w/d} + frint{rp/rz/rm/rne}.{s.d}.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-frint-scalar.c: New test.
 	* gcc.target/loongarch/vect-frint-scalar-no-inexact.c: New test.
 ---
 gcc/config/loongarch/simd.md                  | 28 ++++++++++++
 .../loongarch/vect-frint-scalar-no-inexact.c  | 23 ++++++++++
 .../gcc.target/loongarch/vect-frint-scalar.c  | 43 +++++++++++++++++++
 3 files changed, 94 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index 4ecf7a55e..843b1a41f 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -169,6 +169,34 @@
 		     UNSPEC_SIMD_FRINTRZ))]
   "")
 +;; Use LSX for scalar ceil/floor/trunc/roundeven when -mlsx and -ffp-int-
 +;; builtin-inexact.  The base FP instruction set lacks these operations.
 +;; Yes we are wasting 50% or even 75% of the CPU horsepower, but it's still
 +;; much faster than calling a libc function: on LA464 and LA664 there is a
 +;; 3x ~ 5x speed up.
 +;;
 +;; Note that a vreplvei instruction is needed or we'll also operate on the
 +;; junk in high bits of the vector register and produce random FP exceptions.
 +
 +(define_int_iterator LSX_SCALAR_FRINT
 +  [UNSPEC_SIMD_FRINTRP
 +   UNSPEC_SIMD_FRINTRZ
 +   UNSPEC_SIMD_FRINTRM
 +   UNSPEC_SIMD_FRINTRNE])
 +
 +(define_mode_attr VLSX_FOR_FMODE [(DF "V2DF") (SF "V4SF")])
 +
 +(define_expand "<simd_frint_pattern><mode>2"
 +  [(set (match_dup 2)
 +     (vec_duplicate:<VLSX_FOR_FMODE>
 +       (match_operand:ANYF 1 "register_operand")))
 +   (set (match_dup 2)
 +	(unspec:<VLSX_FOR_FMODE> [(match_dup 2)] LSX_SCALAR_FRINT))
 +   (set (match_operand:ANYF 0 "register_operand")
 +	(vec_select:ANYF (match_dup 2) (parallel [(const_int 0)])))]
 +  "ISA_HAS_LSX && (flag_fp_int_builtin_inexact || !flag_trapping_math)"
 +  "operands[2] = gen_reg_rtx (<VLSX_FOR_FMODE>mode);")
 +
 ;; <x>vftint.{/rp/rz/rm}
 (define_insn
   "<simd_isa>_<x>vftint<simd_frint_rounding>_<simdifmt_for_f>_<simdfmt>"
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
 new file mode 100644
 index 000000000..002e3b92d
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar-no-inexact.c
@@ -0,0 +1,23 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fno-fp-int-builtin-inexact" } */
 +
 +#include "vect-frint-scalar.c"
 +
 +/* cannot use LSX for these with -fno-fp-int-builtin-inexact,
 +   call library function.  */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(ceil\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(ceilf\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(floor\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(floorf\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(trunc\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(truncf\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(roundeven\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(roundevenf\\)" } } */
 +
 +/* nearbyint is not allowed to rasie FE_INEXACT for decades */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
 +
 +/* rint should just use basic FP operation */
 +/* { dg-final { scan-assembler "\tfrint\.s" } } */
 +/* { dg-final { scan-assembler "\tfrint\.d" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
 new file mode 100644
 index 000000000..c7cb40be7
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-frint-scalar.c
@@ -0,0 +1,43 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx" } */
 +
 +#define test(func, suffix) \
 +__typeof__ (1.##suffix) \
 +_##func##suffix (__typeof__ (1.##suffix) x) \
 +{ \
 +  return __builtin_##func##suffix (x); \
 +}
 +
 +test (ceil, f)
 +test (ceil, )
 +test (floor, f)
 +test (floor, )
 +test (trunc, f)
 +test (trunc, )
 +test (roundeven, f)
 +test (roundeven, )
 +test (nearbyint, f)
 +test (nearbyint, )
 +test (rint, f)
 +test (rint, )
 +
 +/* { dg-final { scan-assembler "\tvfrintrp\.s" } } */
 +/* { dg-final { scan-assembler "\tvfrintrm\.s" } } */
 +/* { dg-final { scan-assembler "\tvfrintrz\.s" } } */
 +/* { dg-final { scan-assembler "\tvfrintrne\.s" } } */
 +/* { dg-final { scan-assembler "\tvfrintrp\.d" } } */
 +/* { dg-final { scan-assembler "\tvfrintrm\.d" } } */
 +/* { dg-final { scan-assembler "\tvfrintrz\.d" } } */
 +/* { dg-final { scan-assembler "\tvfrintrne\.d" } } */
 +
 +/* must do vreplvei first */
 +/* { dg-final { scan-assembler-times "\tvreplvei\.w\t\\\$vr0,\\\$vr0,0" 4 } } */
 +/* { dg-final { scan-assembler-times "\tvreplvei\.d\t\\\$vr0,\\\$vr0,0" 4 } } */
 +
 +/* nearbyint is not allowed to rasie FE_INEXACT for decades */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyint\\)" } } */
 +/* { dg-final { scan-assembler "\tb\t%plt\\(nearbyintf\\)" } } */
 +
 +/* rint should just use basic FP operation */
 +/* { dg-final { scan-assembler "\tfrint\.s" } } */
 +/* { dg-final { scan-assembler "\tfrint\.d" } } */
 -- 
 2.43.0
--- a/0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch
+++ b/0051-LoongArch-Remove-duplicate-definition-of-CLZ_DEFINED.patch
@ -0,0 +1,49 @@
 From 21bb4f07db53df717d02e9115dcdb7b5475ede2a Mon Sep 17 00:00:00 2001
 From: Li Wei <liwei@loongson.cn>
 Date: Tue, 28 Nov 2023 15:56:35 +0800
 Subject: [PATCH 051/188] LoongArch: Remove duplicate definition of
 CLZ_DEFINED_VALUE_AT_ZERO.
 In the r14-5547 commit, C[LT]Z_DEFINED_VALUE_AT_ZERO were defined at
 the same time, but in fact, CLZ_DEFINED_VALUE_AT_ZERO has already been
 defined, so remove the duplicate definition.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.h (CTZ_DEFINED_VALUE_AT_ZERO): Add
 	description.
 	(CLZ_DEFINED_VALUE_AT_ZERO): Remove duplicate definition.
 ---
 gcc/config/loongarch/loongarch.h | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
 index 19cf6fd33..8b28be0e4 100644
 --- a/gcc/config/loongarch/loongarch.h
 +++ b/gcc/config/loongarch/loongarch.h
@@ -288,10 +288,12 @@ along with GCC; see the file COPYING3.  If not see
 /* Define if loading short immediate values into registers sign extends.  */
 #define SHORT_IMMEDIATES_SIGN_EXTEND 1
 -/* The clz.{w/d} instructions have the natural values at 0.  */
 +/* The clz.{w/d}, ctz.{w/d} instructions have the natural values at 0.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 +#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
 +  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 /* Standard register usage.  */
@@ -1239,8 +1241,3 @@ struct GTY (()) machine_function
 #define TARGET_EXPLICIT_RELOCS \
   (la_opt_explicit_relocs == EXPLICIT_RELOCS_ALWAYS)
 -
 -#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
 -  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 -#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
 -  ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 -- 
 2.43.0
--- a/0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
+++ b/0052-LoongArch-Added-vectorized-hardware-inspection-for-t.patch
--- a/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
+++ b/0053-LoongArch-Accelerate-optimization-of-scalar-signed-u.patch
@ -0,0 +1,148 @@
 From 87230032bc7fbcec1e3927b2b4a6aeba78040cc6 Mon Sep 17 00:00:00 2001
 From: Li Wei <liwei@loongson.cn>
 Date: Tue, 28 Nov 2023 15:38:37 +0800
 Subject: [PATCH 053/188] LoongArch: Accelerate optimization of scalar
 signed/unsigned popcount.
 In LoongArch, the vector popcount has corresponding instructions, while
 the scalar does not. Currently, the scalar popcount is calculated
 through a loop, and the value of a non-power of two needs to be iterated
 several times, so the vector popcount instruction is considered for
 optimization.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (v2di): Used to simplify the
 	following templates.
 	(popcount<mode>2): New.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/popcnt.c: New test.
 	* gcc.target/loongarch/popcount.c: New test.
 ---
 gcc/config/loongarch/loongarch.md             | 27 +++++++++++-
 gcc/testsuite/gcc.target/loongarch/popcnt.c   | 41 +++++++++++++++++++
 gcc/testsuite/gcc.target/loongarch/popcount.c | 17 ++++++++
 3 files changed, 83 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/popcnt.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/popcount.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 11577f407..cfd7a8ec6 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -1512,7 +1512,30 @@
    (set_attr "cnv_mode"	"D2S")
    (set_attr "mode" "SF")])
 -
 +;; In vector registers, popcount can be implemented directly through
 +;; the vector instruction [X]VPCNT.  For GP registers, we can implement
 +;; it through the following method.  Compared with loop implementation
 +;; of popcount, the following method has better performance.
 +
 +;; This attribute used for get connection of scalar mode and corresponding
 +;; vector mode.
 +(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
 +
 +(define_expand "popcount<mode>2"
 +  [(set (match_operand:GPR 0 "register_operand")
 +	(popcount:GPR (match_operand:GPR 1 "register_operand")))]
 +  "ISA_HAS_LSX"
 +{
 +  rtx in = operands[1];
 +  rtx out = operands[0];
 +  rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
 +				    gen_reg_rtx (V2DImode);
 +  emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
 +  emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
 +  emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
 +  DONE;
 +})
 +
 ;;
 ;;  ....................
 ;;
@@ -3879,7 +3902,7 @@
 		   (any_extend:SI (match_dup 3)))])]
   "")
 -
 +
 (define_mode_iterator QHSD [QI HI SI DI])
 diff --git a/gcc/testsuite/gcc.target/loongarch/popcnt.c b/gcc/testsuite/gcc.target/loongarch/popcnt.c
 new file mode 100644
 index 000000000..a10fca420
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/popcnt.c
@@ -0,0 +1,41 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx" } */
 +/* { dg-final { scan-assembler-not {popcount} } } */
 +/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
 +/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
 +
 +int
 +foo (int x)
 +{
 +  return __builtin_popcount (x);
 +}
 +
 +long
 +foo1 (long x)
 +{
 +  return __builtin_popcountl (x);
 +}
 +
 +long long
 +foo2 (long long x)
 +{
 +  return __builtin_popcountll (x);
 +}
 +
 +int
 +foo3 (int *p)
 +{
 +  return __builtin_popcount (*p);
 +}
 +
 +unsigned
 +foo4 (int x)
 +{
 +  return __builtin_popcount (x);
 +}
 +
 +unsigned long
 +foo5 (int x)
 +{
 +  return __builtin_popcount (x);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/popcount.c b/gcc/testsuite/gcc.target/loongarch/popcount.c
 new file mode 100644
 index 000000000..390ff0676
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/popcount.c
@@ -0,0 +1,17 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
 +/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
 +
 +int
 +PopCount (long b)
 +{
 +  int c = 0;
 +
 +  while (b)
 +    {
 +      b &= b - 1;
 +      c++;
 +    }
 +
 +  return c;
 +}
 -- 
 2.43.0
--- a/0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
+++ b/0054-LoongArch-Optimize-vector-constant-extract-even-odd-.patch
@ -0,0 +1,163 @@
 From 19282fbb0dab42c3553326a1ed01ad9a599622dd Mon Sep 17 00:00:00 2001
 From: Li Wei <liwei@loongson.cn>
 Date: Tue, 28 Nov 2023 15:39:00 +0800
 Subject: [PATCH 054/188] LoongArch: Optimize vector constant
 extract-{even/odd} permutation.
 For vector constant extract-{even/odd} permutation replace the default
 [x]vshuf instruction combination with [x]vilv{l/h} instruction, which
 can reduce instructions and improves performance.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_is_odd_extraction):
 	Supplementary function prototype.
 	(loongarch_is_even_extraction): Adjust.
 	(loongarch_try_expand_lsx_vshuf_const): Adjust.
 	(loongarch_is_extraction_permutation): Adjust.
 	(loongarch_expand_vec_perm_const_2): Adjust.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/lasx-extract-even_odd-opt.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 33 +++++++++++-
 .../loongarch/lasx-extract-even_odd-opt.c     | 54 +++++++++++++++++++
 2 files changed, 85 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index ecceca22d..3ef7e3605 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -8668,6 +8668,12 @@ loongarch_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
     }
 }
 +static bool
 +loongarch_is_odd_extraction (struct expand_vec_perm_d *);
 +
 +static bool
 +loongarch_is_even_extraction (struct expand_vec_perm_d *);
 +
 static bool
 loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
 {
@@ -8690,6 +8696,24 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
       if (d->testing_p)
 	return true;
 +      /* If match extract-even and extract-odd permutations pattern, use
 +       * vselect much better than vshuf.  */
 +      if (loongarch_is_odd_extraction (d)
 +	  || loongarch_is_even_extraction (d))
 +	{
 +	  if (loongarch_expand_vselect_vconcat (d->target, d->op0, d->op1,
 +						d->perm, d->nelt))
 +	    return true;
 +
 +	  unsigned char perm2[MAX_VECT_LEN];
 +	  for (i = 0; i < d->nelt; ++i)
 +	    perm2[i] = (d->perm[i] + d->nelt) & (2 * d->nelt - 1);
 +
 +	  if (loongarch_expand_vselect_vconcat (d->target, d->op1, d->op0,
 +						perm2, d->nelt))
 +	    return true;
 +	}
 +
       for (i = 0; i < d->nelt; i += 1)
 	{
 	  rperm[i] = GEN_INT (d->perm[i]);
@@ -8874,7 +8898,7 @@ loongarch_is_even_extraction (struct expand_vec_perm_d *d)
 	  result = false;
 	  break;
 	}
 -      buf += 1;
 +      buf += 2;
     }
   return result;
@@ -8896,7 +8920,7 @@ loongarch_is_extraction_permutation (struct expand_vec_perm_d *d)
 	  result = false;
 	  break;
 	}
 -      buf += 2;
 +      buf += 1;
     }
   return result;
@@ -9373,6 +9397,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	 Selector after: { 1, 3, 1, 3 }.
 	 Even extraction selector sample: E_V4DImode, { 0, 2, 4, 6 }
 	 Selector after: { 0, 2, 0, 2 }.  */
 +
 +      /* Better implement of extract-even and extract-odd permutations.  */
 +      if (loongarch_expand_vec_perm_even_odd (d))
 +	return true;
 +
       for (i = 0; i < d->nelt / 2; i += 1)
 	{
 	  idx = d->perm[i];
 diff --git a/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
 new file mode 100644
 index 000000000..515f0c862
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/lasx-extract-even_odd-opt.c
@@ -0,0 +1,54 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 +/* { dg-final { scan-assembler "xvilvl.d" } } */
 +/* { dg-final { scan-assembler "xvilvh.d" } } */
 +
 +#define CMUL(a, b, c)                                                         \
 +  {                                                                           \
 +    (c).ai = (a).ai * (b).ai - (a).bi * (b).bi;                               \
 +    (c).bi = (a).ai * (b).bi + (a).bi * (b).ai;                               \
 +    (c).ci = (a).ci * (b).ci - (a).di * (b).di;                               \
 +    (c).di = (a).ci * (b).di + (a).di * (b).ci;                               \
 +  }
 +#define CSUM(a, b)                                                            \
 +  {                                                                           \
 +    (a).ai += (b).ai;                                                         \
 +    (a).bi += (b).bi;                                                         \
 +    (a).ci += (b).ci;                                                         \
 +    (a).di += (b).di;                                                         \
 +  }
 +
 +typedef struct
 +{
 +  double ai;
 +  double bi;
 +  double ci;
 +  double di;
 +} complex;
 +
 +typedef struct
 +{
 +  complex e[6][6];
 +} matrix;
 +
 +typedef struct
 +{
 +  complex c[6];
 +} vector;
 +
 +void
 +mult_adj_mat_vec (matrix *a, vector *b, vector *c)
 +{
 +  register int i, j;
 +  register complex x, y;
 +  for (i = 0; i < 6; i++)
 +    {
 +      x.ai = x.bi = x.ci = x.di = 0.0;
 +      for (j = 0; j < 6; j++)
 +        {
 +          CMUL (a->e[j][i], b->c[j], y);
 +          CSUM (x, y);
 +        }
 +      c->c[i] = x;
 +    }
 +}
 -- 
 2.43.0
--- a/0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
+++ b/0055-LoongArch-Add-intrinsic-function-descriptions-for-LS.patch
--- a/0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
+++ b/0056-LoongArch-Switch-loongarch-def-from-C-to-C-to-make-i.patch
@ -0,0 +1,925 @@
 From 6c85d03940f87770a7e8b7195ffe45f99afef411 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 1 Dec 2023 10:09:33 +0800
 Subject: [PATCH 056/188] LoongArch: Switch loongarch-def from C to C++ to make
 it possible.
 We'll use HOST_WIDE_INT in LoongArch static properties in following patches.
 To keep the same readability as C99 designated initializers, create a
 std::array like data structure with position setter function, and add
 field setter functions for structs used in loongarch-def.cc.
 Remove unneeded guards #if
 !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 in loongarch-def.h and loongarch-opts.h.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-def.h: Remove extern "C".
 	(loongarch_isa_base_strings): Declare as loongarch_def_array
 	instead of plain array.
 	(loongarch_isa_ext_strings): Likewise.
 	(loongarch_abi_base_strings): Likewise.
 	(loongarch_abi_ext_strings): Likewise.
 	(loongarch_cmodel_strings): Likewise.
 	(loongarch_cpu_strings): Likewise.
 	(loongarch_cpu_default_isa): Likewise.
 	(loongarch_cpu_issue_rate): Likewise.
 	(loongarch_cpu_multipass_dfa_lookahead): Likewise.
 	(loongarch_cpu_cache): Likewise.
 	(loongarch_cpu_align): Likewise.
 	(loongarch_cpu_rtx_cost_data): Likewise.
 	(loongarch_isa): Add a constructor and field setter functions.
 	* config/loongarch/loongarch-opts.h (loongarch-defs.h): Do not
 	include for target libraries.
 	* config/loongarch/loongarch-opts.cc: Comment code that doesn't
 	run and causes compilation errors.
 	* config/loongarch/loongarch-tune.h (LOONGARCH_TUNE_H): Likewise.
 	(struct loongarch_rtx_cost_data): Likewise.
 	(struct loongarch_cache): Likewise.
 	(struct loongarch_align): Likewise.
 	* config/loongarch/t-loongarch: Compile loongarch-def.cc with the
 	C++ compiler.
 	* config/loongarch/loongarch-def-array.h: New file for a
 	std:array like data structure with position setter function.
 	* config/loongarch/loongarch-def.c: Rename to ...
 	* config/loongarch/loongarch-def.cc: ... here.
 	(loongarch_cpu_strings): Define as loongarch_def_array instead
 	of plain array.
 	(loongarch_cpu_default_isa): Likewise.
 	(loongarch_cpu_cache): Likewise.
 	(loongarch_cpu_align): Likewise.
 	(loongarch_cpu_rtx_cost_data): Likewise.
 	(loongarch_cpu_issue_rate): Likewise.
 	(loongarch_cpu_multipass_dfa_lookahead): Likewise.
 	(loongarch_isa_base_strings): Likewise.
 	(loongarch_isa_ext_strings): Likewise.
 	(loongarch_abi_base_strings): Likewise.
 	(loongarch_abi_ext_strings): Likewise.
 	(loongarch_cmodel_strings): Likewise.
 	(abi_minimal_isa): Likewise.
 	(loongarch_rtx_cost_optimize_size): Use field setter functions
 	instead of designated initializers.
 	(loongarch_rtx_cost_data): Implement default constructor.
 ---
 gcc/config/loongarch/loongarch-def-array.h |  40 ++++
 gcc/config/loongarch/loongarch-def.c       | 227 ---------------------
 gcc/config/loongarch/loongarch-def.cc      | 187 +++++++++++++++++
 gcc/config/loongarch/loongarch-def.h       |  55 ++---
 gcc/config/loongarch/loongarch-opts.cc     |   7 +
 gcc/config/loongarch/loongarch-opts.h      |   5 +-
 gcc/config/loongarch/loongarch-tune.h      | 123 ++++++++++-
 gcc/config/loongarch/t-loongarch           |   4 +-
 8 files changed, 390 insertions(+), 258 deletions(-)
 create mode 100644 gcc/config/loongarch/loongarch-def-array.h
 delete mode 100644 gcc/config/loongarch/loongarch-def.c
 create mode 100644 gcc/config/loongarch/loongarch-def.cc
 diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h
 new file mode 100644
 index 000000000..bdb3e9c6a
 --- /dev/null
 +++ b/gcc/config/loongarch/loongarch-def-array.h
@@ -0,0 +1,40 @@
 +/* A std::array like data structure for LoongArch static properties.
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#ifndef _LOONGARCH_DEF_ARRAY_H
 +#define _LOONGARCH_DEF_ARRAY_H 1
 +
 +template <class T, int N>
 +class loongarch_def_array {
 +private:
 +  T arr[N];
 +public:
 +  loongarch_def_array () : arr{} {}
 +
 +  T &operator[] (int n) { return arr[n]; }
 +  const T &operator[] (int n) const { return arr[n]; }
 +
 +  loongarch_def_array set (int idx, T &&value)
 +  {
 +    (*this)[idx] = value;
 +    return *this;
 +  }
 +};
 +
 +#endif
 diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
 deleted file mode 100644
 index fe4474e77..000000000
 --- a/gcc/config/loongarch/loongarch-def.c
 +++ /dev/null
@@ -1,227 +0,0 @@
 -/* LoongArch static properties.
 -   Copyright (C) 2021-2022 Free Software Foundation, Inc.
 -   Contributed by Loongson Ltd.
 -
 -This file is part of GCC.
 -
 -GCC is free software; you can redistribute it and/or modify
 -it under the terms of the GNU General Public License as published by
 -the Free Software Foundation; either version 3, or (at your option)
 -any later version.
 -
 -GCC is distributed in the hope that it will be useful,
 -but WITHOUT ANY WARRANTY; without even the implied warranty of
 -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 -GNU General Public License for more details.
 -
 -You should have received a copy of the GNU General Public License
 -along with GCC; see the file COPYING3.  If not see
 -<http://www.gnu.org/licenses/>.  */
 -
 -#include "loongarch-def.h"
 -#include "loongarch-str.h"
 -
 -/* CPU property tables.  */
 -const char*
 -loongarch_cpu_strings[N_TUNE_TYPES] = {
 -  [CPU_NATIVE]		  = STR_CPU_NATIVE,
 -  [CPU_ABI_DEFAULT]	  = STR_CPU_ABI_DEFAULT,
 -  [CPU_LOONGARCH64]	  = STR_CPU_LOONGARCH64,
 -  [CPU_LA464]		  = STR_CPU_LA464,
 -  [CPU_LA664]		  = STR_CPU_LA664,
 -};
 -
 -struct loongarch_isa
 -loongarch_cpu_default_isa[N_ARCH_TYPES] = {
 -  [CPU_LOONGARCH64] = {
 -      .base = ISA_BASE_LA64V100,
 -      .fpu = ISA_EXT_FPU64,
 -      .simd = 0,
 -  },
 -  [CPU_LA464] = {
 -      .base = ISA_BASE_LA64V100,
 -      .fpu = ISA_EXT_FPU64,
 -      .simd = ISA_EXT_SIMD_LASX,
 -  },
 -  [CPU_LA664] = {
 -      .base = ISA_BASE_LA64V110,
 -      .fpu = ISA_EXT_FPU64,
 -      .simd = ISA_EXT_SIMD_LASX,
 -  },
 -};
 -
 -struct loongarch_cache
 -loongarch_cpu_cache[N_TUNE_TYPES] = {
 -  [CPU_LOONGARCH64] = {
 -      .l1d_line_size = 64,
 -      .l1d_size = 64,
 -      .l2d_size = 256,
 -      .simultaneous_prefetches = 4,
 -  },
 -  [CPU_LA464] = {
 -      .l1d_line_size = 64,
 -      .l1d_size = 64,
 -      .l2d_size = 256,
 -      .simultaneous_prefetches = 4,
 -  },
 -  [CPU_LA664] = {
 -      .l1d_line_size = 64,
 -      .l1d_size = 64,
 -      .l2d_size = 256,
 -      .simultaneous_prefetches = 4,
 -  },
 -};
 -
 -struct loongarch_align
 -loongarch_cpu_align[N_TUNE_TYPES] = {
 -  [CPU_LOONGARCH64] = {
 -    .function = "32",
 -    .label = "16",
 -  },
 -  [CPU_LA464] = {
 -    .function = "32",
 -    .label = "16",
 -  },
 -  [CPU_LA664] = {
 -    .function = "32",
 -    .label = "16",
 -  },
 -};
 -
 -
 -/* Default RTX cost initializer.  */
 -#define COSTS_N_INSNS(N) ((N) * 4)
 -#define DEFAULT_COSTS				\
 -    .fp_add		= COSTS_N_INSNS (1),	\
 -    .fp_mult_sf		= COSTS_N_INSNS (2),	\
 -    .fp_mult_df		= COSTS_N_INSNS (4),	\
 -    .fp_div_sf		= COSTS_N_INSNS (6),	\
 -    .fp_div_df		= COSTS_N_INSNS (8),	\
 -    .int_mult_si	= COSTS_N_INSNS (1),	\
 -    .int_mult_di	= COSTS_N_INSNS (1),	\
 -    .int_div_si		= COSTS_N_INSNS (4),	\
 -    .int_div_di		= COSTS_N_INSNS (6),	\
 -    .branch_cost	= 6,			\
 -    .memory_latency	= 4
 -
 -/* The following properties cannot be looked up directly using "cpucfg".
 - So it is necessary to provide a default value for "unknown native"
 - tune targets (i.e. -mtune=native while PRID does not correspond to
 - any known "-mtune" type).  */
 -
 -struct loongarch_rtx_cost_data
 -loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
 -  [CPU_NATIVE] = {
 -      DEFAULT_COSTS
 -  },
 -  [CPU_LOONGARCH64] = {
 -      DEFAULT_COSTS
 -  },
 -  [CPU_LA464] = {
 -      DEFAULT_COSTS
 -  },
 -  [CPU_LA664] = {
 -      DEFAULT_COSTS
 -  },
 -};
 -
 -/* RTX costs to use when optimizing for size.  */
 -const struct loongarch_rtx_cost_data
 -loongarch_rtx_cost_optimize_size = {
 -    .fp_add	      = 4,
 -    .fp_mult_sf	      = 4,
 -    .fp_mult_df	      = 4,
 -    .fp_div_sf	      = 4,
 -    .fp_div_df	      = 4,
 -    .int_mult_si      = 4,
 -    .int_mult_di      = 4,
 -    .int_div_si	      = 4,
 -    .int_div_di	      = 4,
 -    .branch_cost      = 6,
 -    .memory_latency   = 4,
 -};
 -
 -int
 -loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
 -  [CPU_NATIVE]	      = 4,
 -  [CPU_LOONGARCH64]   = 4,
 -  [CPU_LA464]	      = 4,
 -  [CPU_LA664]	      = 6,
 -};
 -
 -int
 -loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
 -  [CPU_NATIVE]	      = 4,
 -  [CPU_LOONGARCH64]   = 4,
 -  [CPU_LA464]	      = 4,
 -  [CPU_LA664]	      = 6,
 -};
 -
 -/* Wiring string definitions from loongarch-str.h to global arrays
 -   with standard index values from loongarch-opts.h, so we can
 -   print config-related messages and do ABI self-spec filtering
 -   from the driver in a self-consistent manner.  */
 -
 -const char*
 -loongarch_isa_base_strings[N_ISA_BASE_TYPES] = {
 -  [ISA_BASE_LA64V100] = STR_ISA_BASE_LA64V100,
 -  [ISA_BASE_LA64V110] = STR_ISA_BASE_LA64V110,
 -};
 -
 -const char*
 -loongarch_isa_ext_strings[N_ISA_EXT_TYPES] = {
 -  [ISA_EXT_NONE] = STR_NONE,
 -  [ISA_EXT_FPU32] = STR_ISA_EXT_FPU32,
 -  [ISA_EXT_FPU64] = STR_ISA_EXT_FPU64,
 -  [ISA_EXT_SIMD_LSX] = STR_ISA_EXT_LSX,
 -  [ISA_EXT_SIMD_LASX] = STR_ISA_EXT_LASX,
 -};
 -
 -const char*
 -loongarch_abi_base_strings[N_ABI_BASE_TYPES] = {
 -  [ABI_BASE_LP64D] = STR_ABI_BASE_LP64D,
 -  [ABI_BASE_LP64F] = STR_ABI_BASE_LP64F,
 -  [ABI_BASE_LP64S] = STR_ABI_BASE_LP64S,
 -};
 -
 -const char*
 -loongarch_abi_ext_strings[N_ABI_EXT_TYPES] = {
 -  [ABI_EXT_BASE] = STR_ABI_EXT_BASE,
 -};
 -
 -const char*
 -loongarch_cmodel_strings[] = {
 -  [CMODEL_NORMAL]	  = STR_CMODEL_NORMAL,
 -  [CMODEL_TINY]		  = STR_CMODEL_TINY,
 -  [CMODEL_TINY_STATIC]	  = STR_CMODEL_TS,
 -  [CMODEL_MEDIUM]	  = STR_CMODEL_MEDIUM,
 -  [CMODEL_LARGE]	  = STR_CMODEL_LARGE,
 -  [CMODEL_EXTREME]	  = STR_CMODEL_EXTREME,
 -};
 -
 -
 -/* ABI-related definitions.  */
 -const struct loongarch_isa
 -abi_minimal_isa[N_ABI_BASE_TYPES][N_ABI_EXT_TYPES] = {
 -  [ABI_BASE_LP64D] = {
 -      [ABI_EXT_BASE] = {
 -	  .base = ISA_BASE_LA64V100,
 -	  .fpu = ISA_EXT_FPU64,
 -	  .simd = 0
 -      },
 -  },
 -  [ABI_BASE_LP64F] = {
 -      [ABI_EXT_BASE] = {
 -	  .base = ISA_BASE_LA64V100,
 -	  .fpu = ISA_EXT_FPU32,
 -	  .simd = 0
 -      },
 -  },
 -  [ABI_BASE_LP64S] = {
 -      [ABI_EXT_BASE] = {
 -	  .base = ISA_BASE_LA64V100,
 -	  .fpu = ISA_EXT_NONE,
 -	  .simd = 0
 -      },
 -  },
 -};
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 new file mode 100644
 index 000000000..6990c86c2
 --- /dev/null
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -0,0 +1,187 @@
 +/* LoongArch static properties.
 +   Copyright (C) 2021-2023 Free Software Foundation, Inc.
 +   Contributed by Loongson Ltd.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#include "loongarch-def.h"
 +#include "loongarch-str.h"
 +
 +template <class T, int N>
 +using array = loongarch_def_array<T, N>;
 +
 +template <class T>
 +using array_tune = array<T, N_TUNE_TYPES>;
 +
 +template <class T>
 +using array_arch = array<T, N_ARCH_TYPES>;
 +
 +/* CPU property tables.  */
 +array_tune<const char *> loongarch_cpu_strings = array_tune<const char *> ()
 +  .set (CPU_NATIVE, STR_CPU_NATIVE)
 +  .set (CPU_ABI_DEFAULT, STR_CPU_ABI_DEFAULT)
 +  .set (CPU_LOONGARCH64, STR_CPU_LOONGARCH64)
 +  .set (CPU_LA464, STR_CPU_LA464)
 +  .set (CPU_LA664, STR_CPU_LA664);
 +
 +array_arch<loongarch_isa> loongarch_cpu_default_isa =
 +  array_arch<loongarch_isa> ()
 +    .set (CPU_LOONGARCH64,
 +	  loongarch_isa ()
 +	    .base_ (ISA_BASE_LA64V100)
 +	    .fpu_ (ISA_EXT_FPU64))
 +    .set (CPU_LA464,
 +	  loongarch_isa ()
 +	    .base_ (ISA_BASE_LA64V100)
 +	    .fpu_ (ISA_EXT_FPU64)
 +	    .simd_ (ISA_EXT_SIMD_LASX))
 +    .set (CPU_LA664,
 +	  loongarch_isa ()
 +	    .base_ (ISA_BASE_LA64V110)
 +	    .fpu_ (ISA_EXT_FPU64)
 +	    .simd_ (ISA_EXT_SIMD_LASX));
 +
 +static inline loongarch_cache la464_cache ()
 +{
 +  return loongarch_cache ()
 +    .l1d_line_size_ (64)
 +    .l1d_size_ (64)
 +    .l2d_size_ (256)
 +    .simultaneous_prefetches_ (4);
 +}
 +
 +array_tune<loongarch_cache> loongarch_cpu_cache =
 +  array_tune<loongarch_cache> ()
 +    .set (CPU_LOONGARCH64, la464_cache ())
 +    .set (CPU_LA464, la464_cache ())
 +    .set (CPU_LA664, la464_cache ());
 +
 +static inline loongarch_align la464_align ()
 +{
 +  return loongarch_align ().function_ ("32").label_ ("16");
 +}
 +
 +array_tune<loongarch_align> loongarch_cpu_align =
 +  array_tune<loongarch_align> ()
 +    .set (CPU_LOONGARCH64, la464_align ())
 +    .set (CPU_LA464, la464_align ())
 +    .set (CPU_LA664, la464_align ());
 +
 +#define COSTS_N_INSNS(N) ((N) * 4)
 +
 +/* Default RTX cost initializer.  */
 +loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
 +  : fp_add (COSTS_N_INSNS (1)),
 +    fp_mult_sf (COSTS_N_INSNS (2)),
 +    fp_mult_df (COSTS_N_INSNS (4)),
 +    fp_div_sf (COSTS_N_INSNS (6)),
 +    fp_div_df (COSTS_N_INSNS (8)),
 +    int_mult_si (COSTS_N_INSNS (1)),
 +    int_mult_di (COSTS_N_INSNS (1)),
 +    int_div_si (COSTS_N_INSNS (4)),
 +    int_div_di (COSTS_N_INSNS (6)),
 +    branch_cost (6),
 +    memory_latency (4) {}
 +
 +/* The following properties cannot be looked up directly using "cpucfg".
 + So it is necessary to provide a default value for "unknown native"
 + tune targets (i.e. -mtune=native while PRID does not correspond to
 + any known "-mtune" type).  Currently all numbers are default.  */
 +array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
 +  array_tune<loongarch_rtx_cost_data> ();
 +
 +/* RTX costs to use when optimizing for size.  */
 +const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
 +  loongarch_rtx_cost_data ()
 +    .fp_add_ (4)
 +    .fp_mult_sf_ (4)
 +    .fp_mult_df_ (4)
 +    .fp_div_sf_ (4)
 +    .fp_div_df_ (4)
 +    .int_mult_si_ (4)
 +    .int_mult_di_ (4)
 +    .int_div_si_ (4)
 +    .int_div_di_ (4);
 +
 +array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
 +  .set (CPU_NATIVE, 4)
 +  .set (CPU_LOONGARCH64, 4)
 +  .set (CPU_LA464, 4)
 +  .set (CPU_LA664, 6);
 +
 +array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
 +  .set (CPU_NATIVE, 4)
 +  .set (CPU_LOONGARCH64, 4)
 +  .set (CPU_LA464, 4)
 +  .set (CPU_LA664, 6);
 +
 +/* Wiring string definitions from loongarch-str.h to global arrays
 +   with standard index values from loongarch-opts.h, so we can
 +   print config-related messages and do ABI self-spec filtering
 +   from the driver in a self-consistent manner.  */
 +
 +array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
 +  array<const char *, N_ISA_BASE_TYPES> ()
 +    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100)
 +    .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110);
 +
 +array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
 +  array<const char *, N_ISA_EXT_TYPES> ()
 +    .set (ISA_EXT_NONE, STR_NONE)
 +    .set (ISA_EXT_FPU32, STR_ISA_EXT_FPU32)
 +    .set (ISA_EXT_FPU64, STR_ISA_EXT_FPU64)
 +    .set (ISA_EXT_SIMD_LSX, STR_ISA_EXT_LSX)
 +    .set (ISA_EXT_SIMD_LASX, STR_ISA_EXT_LASX);
 +
 +array<const char *, N_ABI_BASE_TYPES> loongarch_abi_base_strings =
 +  array<const char *, N_ABI_BASE_TYPES> ()
 +    .set (ABI_BASE_LP64D, STR_ABI_BASE_LP64D)
 +    .set (ABI_BASE_LP64F, STR_ABI_BASE_LP64F)
 +    .set (ABI_BASE_LP64S, STR_ABI_BASE_LP64S);
 +
 +array<const char *, N_ABI_EXT_TYPES> loongarch_abi_ext_strings =
 +  array<const char *, N_ABI_EXT_TYPES> ()
 +    .set (ABI_EXT_BASE, STR_ABI_EXT_BASE);
 +
 +array<const char *, N_CMODEL_TYPES> loongarch_cmodel_strings =
 +  array<const char *, N_CMODEL_TYPES> ()
 +    .set (CMODEL_NORMAL,		STR_CMODEL_NORMAL)
 +    .set (CMODEL_TINY,		STR_CMODEL_TINY)
 +    .set (CMODEL_TINY_STATIC,	STR_CMODEL_TS)
 +    .set (CMODEL_MEDIUM,		STR_CMODEL_MEDIUM)
 +    .set (CMODEL_LARGE,		STR_CMODEL_LARGE)
 +    .set (CMODEL_EXTREME,		STR_CMODEL_EXTREME);
 +
 +array<array<loongarch_isa, N_ABI_EXT_TYPES>, N_ABI_BASE_TYPES>
 +  abi_minimal_isa = array<array<loongarch_isa, N_ABI_EXT_TYPES>,
 +			  N_ABI_BASE_TYPES> ()
 +    .set (ABI_BASE_LP64D,
 +	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
 +	    .set (ABI_EXT_BASE,
 +		  loongarch_isa ()
 +		    .base_ (ISA_BASE_LA64V100)
 +		    .fpu_ (ISA_EXT_FPU64)))
 +    .set (ABI_BASE_LP64F,
 +	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
 +	    .set (ABI_EXT_BASE,
 +		  loongarch_isa ()
 +		    .base_ (ISA_BASE_LA64V100)
 +		    .fpu_ (ISA_EXT_FPU32)))
 +    .set (ABI_BASE_LP64S,
 +	  array<loongarch_isa, N_ABI_EXT_TYPES> ()
 +	    .set (ABI_EXT_BASE,
 +		  loongarch_isa ().base_ (ISA_BASE_LA64V100)));
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index ef848f606..5ac70dfdd 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -50,20 +50,18 @@ along with GCC; see the file COPYING3.  If not see
 #include <stdint.h>
 #endif
 +#include "loongarch-def-array.h"
 #include "loongarch-tune.h"
 -#ifdef __cplusplus
 -extern "C" {
 -#endif
 -
 /* enum isa_base */
 -extern const char* loongarch_isa_base_strings[];
 /* LoongArch V1.00.  */
 #define ISA_BASE_LA64V100     0
 /* LoongArch V1.10.  */
 #define ISA_BASE_LA64V110     1
 #define N_ISA_BASE_TYPES      2
 +extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
 +  loongarch_isa_base_strings;
 #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 /* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
@@ -72,7 +70,6 @@ extern int64_t loongarch_isa_base_features[];
 #endif
 /* enum isa_ext_* */
 -extern const char* loongarch_isa_ext_strings[];
 #define ISA_EXT_NONE	      0
 #define ISA_EXT_FPU32	      1
 #define ISA_EXT_FPU64	      2
@@ -80,13 +77,16 @@ extern const char* loongarch_isa_ext_strings[];
 #define ISA_EXT_SIMD_LSX      3
 #define ISA_EXT_SIMD_LASX     4
 #define N_ISA_EXT_TYPES	      5
 +extern loongarch_def_array<const char *, N_ISA_EXT_TYPES>
 +  loongarch_isa_ext_strings;
 /* enum abi_base */
 -extern const char* loongarch_abi_base_strings[];
 #define ABI_BASE_LP64D	      0
 #define ABI_BASE_LP64F	      1
 #define ABI_BASE_LP64S	      2
 #define N_ABI_BASE_TYPES      3
 +extern loongarch_def_array<const char *, N_ABI_BASE_TYPES>
 +  loongarch_abi_base_strings;
 #define TO_LP64_ABI_BASE(C) (C)
@@ -99,12 +99,12 @@ extern const char* loongarch_abi_base_strings[];
 /* enum abi_ext */
 -extern const char* loongarch_abi_ext_strings[];
 #define ABI_EXT_BASE	      0
 #define N_ABI_EXT_TYPES	      1
 +extern loongarch_def_array<const char *, N_ABI_EXT_TYPES>
 +  loongarch_abi_ext_strings;
 /* enum cmodel */
 -extern const char* loongarch_cmodel_strings[];
 #define CMODEL_NORMAL	      0
 #define CMODEL_TINY	      1
 #define CMODEL_TINY_STATIC    2
@@ -112,6 +112,8 @@ extern const char* loongarch_cmodel_strings[];
 #define CMODEL_LARGE	      4
 #define CMODEL_EXTREME	      5
 #define N_CMODEL_TYPES	      6
 +extern loongarch_def_array<const char *, N_CMODEL_TYPES>
 +  loongarch_cmodel_strings;
 /* enum explicit_relocs */
 #define EXPLICIT_RELOCS_AUTO	0
@@ -126,7 +128,6 @@ extern const char* loongarch_cmodel_strings[];
 #define M_OPT_ABSENT(opt_enum)  ((opt_enum) == M_OPT_UNSET)
 -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 /* Internal representation of the target.  */
 struct loongarch_isa
 {
@@ -139,6 +140,13 @@ struct loongarch_isa
      Using int64_t instead of HOST_WIDE_INT for C compatibility.  */
   int64_t evolution;
 +
 +  loongarch_isa () : base (0), fpu (0), simd (0), evolution (0) {}
 +  loongarch_isa base_ (int _base) { base = _base; return *this; }
 +  loongarch_isa fpu_ (int _fpu) { fpu = _fpu; return *this; }
 +  loongarch_isa simd_ (int _simd) { simd = _simd; return *this; }
 +  loongarch_isa evolution_ (int64_t _evolution)
 +    { evolution = _evolution; return *this; }
 };
 struct loongarch_abi
@@ -156,9 +164,6 @@ struct loongarch_target
   int cmodel;	    /* CMODEL_ */
 };
 -extern struct loongarch_isa loongarch_cpu_default_isa[];
 -#endif
 -
 /* CPU properties.  */
 /* index */
 #define CPU_NATIVE	  0
@@ -170,15 +175,19 @@ extern struct loongarch_isa loongarch_cpu_default_isa[];
 #define N_TUNE_TYPES	  5
 /* parallel tables.  */
 -extern const char* loongarch_cpu_strings[];
 -extern int loongarch_cpu_issue_rate[];
 -extern int loongarch_cpu_multipass_dfa_lookahead[];
 +extern loongarch_def_array<const char *, N_ARCH_TYPES>
 +  loongarch_cpu_strings;
 +extern loongarch_def_array<loongarch_isa, N_ARCH_TYPES>
 +  loongarch_cpu_default_isa;
 +extern loongarch_def_array<int, N_TUNE_TYPES>
 +  loongarch_cpu_issue_rate;
 +extern loongarch_def_array<int, N_TUNE_TYPES>
 +  loongarch_cpu_multipass_dfa_lookahead;
 +extern loongarch_def_array<loongarch_cache, N_TUNE_TYPES>
 +  loongarch_cpu_cache;
 +extern loongarch_def_array<loongarch_align, N_TUNE_TYPES>
 +  loongarch_cpu_align;
 +extern loongarch_def_array<loongarch_rtx_cost_data, N_TUNE_TYPES>
 +  loongarch_cpu_rtx_cost_data;
 -extern struct loongarch_cache loongarch_cpu_cache[];
 -extern struct loongarch_align loongarch_cpu_align[];
 -extern struct loongarch_rtx_cost_data loongarch_cpu_rtx_cost_data[];
 -
 -#ifdef __cplusplus
 -}
 -#endif
 #endif /* LOONGARCH_DEF_H */
 diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
 index 390720479..45fc521e4 100644
 --- a/gcc/config/loongarch/loongarch-opts.cc
 +++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -163,6 +163,7 @@ loongarch_config_target (struct loongarch_target *target,
 			 int follow_multilib_list_p)
 {
   struct loongarch_target t;
 +
   if (!target)
     return;
@@ -657,12 +658,18 @@ abi_str (struct loongarch_abi abi)
 		     strlen (loongarch_abi_base_strings[abi.base]));
   else
     {
 +      /* This situation has not yet occurred, so in order to avoid the
 +	 -Warray-bounds warning during C++ syntax checking, this part
 +	 of the code is commented first.  */
 +      /*
       APPEND_STRING (loongarch_abi_base_strings[abi.base])
       APPEND1 ('/')
       APPEND_STRING (loongarch_abi_ext_strings[abi.ext])
       APPEND1 ('\0')
       return XOBFINISH (&msg_obstack, const char *);
 +      */
 +      gcc_unreachable ();
     }
 }
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 9b3d023ac..0dabf1551 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -21,7 +21,10 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef LOONGARCH_OPTS_H
 #define LOONGARCH_OPTS_H
 +/* This is a C++ header and it shouldn't be used by target libraries.  */
 +#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 #include "loongarch-def.h"
 +#endif
 /* Target configuration */
 extern struct loongarch_target la_target;
@@ -33,7 +36,6 @@ struct loongarch_flags {
     int sx[2];
 };
 -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 /* Initialize loongarch_target from separate option variables.  */
 void
@@ -54,7 +56,6 @@ void
 loongarch_update_gcc_opt_status (struct loongarch_target *target,
 				 struct gcc_options *opts,
 				 struct gcc_options *opts_set);
 -#endif
 /* Macros for common conditional expressions used in loongarch.{c,h,md} */
 diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
 index d961963f0..616b94e87 100644
 --- a/gcc/config/loongarch/loongarch-tune.h
 +++ b/gcc/config/loongarch/loongarch-tune.h
@@ -21,6 +21,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef LOONGARCH_TUNE_H
 #define LOONGARCH_TUNE_H
 +#include "loongarch-def-array.h"
 +
 /* RTX costs of various operations on the different architectures.  */
 struct loongarch_rtx_cost_data
 {
@@ -35,6 +37,76 @@ struct loongarch_rtx_cost_data
   unsigned short int_div_di;
   unsigned short branch_cost;
   unsigned short memory_latency;
 +
 +  /* Default RTX cost initializer, implemented in loongarch-def.cc.  */
 +  loongarch_rtx_cost_data ();
 +
 +  loongarch_rtx_cost_data fp_add_ (unsigned short _fp_add)
 +  {
 +    fp_add = _fp_add;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data fp_mult_sf_ (unsigned short _fp_mult_sf)
 +  {
 +    fp_mult_sf = _fp_mult_sf;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data fp_mult_df_ (unsigned short _fp_mult_df)
 +  {
 +    fp_mult_df = _fp_mult_df;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data fp_div_sf_ (unsigned short _fp_div_sf)
 +  {
 +    fp_div_sf = _fp_div_sf;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data fp_div_df_ (unsigned short _fp_div_df)
 +  {
 +    fp_div_df = _fp_div_df;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data int_mult_si_ (unsigned short _int_mult_si)
 +  {
 +    int_mult_si = _int_mult_si;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data int_mult_di_ (unsigned short _int_mult_di)
 +  {
 +    int_mult_di = _int_mult_di;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data int_div_si_ (unsigned short _int_div_si)
 +  {
 +    int_div_si = _int_div_si;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data int_div_di_ (unsigned short _int_div_di)
 +  {
 +    int_div_di = _int_div_di;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost)
 +  {
 +    branch_cost = _branch_cost;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency)
 +  {
 +    memory_latency = _memory_latency;
 +    return *this;
 +  }
 +
 };
 /* Costs to use when optimizing for size.  */
@@ -42,10 +114,39 @@ extern const struct loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size;
 /* Cache size record of known processor models.  */
 struct loongarch_cache {
 -    int l1d_line_size;  /* bytes */
 -    int l1d_size;       /* KiB */
 -    int l2d_size;       /* kiB */
 -    int simultaneous_prefetches; /* number of parallel prefetch */
 +  int l1d_line_size;  /* bytes */
 +  int l1d_size;       /* KiB */
 +  int l2d_size;       /* kiB */
 +  int simultaneous_prefetches; /* number of parallel prefetch */
 +
 +  loongarch_cache () : l1d_line_size (0),
 +		       l1d_size (0),
 +		       l2d_size (0),
 +		       simultaneous_prefetches (0) {}
 +
 +  loongarch_cache l1d_line_size_ (int _l1d_line_size)
 +  {
 +    l1d_line_size = _l1d_line_size;
 +    return *this;
 +  }
 +
 +  loongarch_cache l1d_size_ (int _l1d_size)
 +  {
 +    l1d_size = _l1d_size;
 +    return *this;
 +  }
 +
 +  loongarch_cache l2d_size_ (int _l2d_size)
 +  {
 +    l2d_size = _l2d_size;
 +    return *this;
 +  }
 +
 +  loongarch_cache simultaneous_prefetches_ (int _simultaneous_prefetches)
 +  {
 +    simultaneous_prefetches = _simultaneous_prefetches;
 +    return *this;
 +  }
 };
 /* Alignment for functions and labels for best performance.  For new uarchs
@@ -54,6 +155,20 @@ struct loongarch_cache {
 struct loongarch_align {
   const char *function;	/* default value for -falign-functions */
   const char *label;	/* default value for -falign-labels */
 +
 +  loongarch_align () : function (nullptr), label (nullptr) {}
 +
 +  loongarch_align function_ (const char *_function)
 +  {
 +    function = _function;
 +    return *this;
 +  }
 +
 +  loongarch_align label_ (const char *_label)
 +  {
 +    label = _label;
 +    return *this;
 +  }
 };
 #endif /* LOONGARCH_TUNE_H */
 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
 index 57b1176bc..a1a40431f 100644
 --- a/gcc/config/loongarch/t-loongarch
 +++ b/gcc/config/loongarch/t-loongarch
@@ -64,8 +64,8 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
 		 $(srcdir)/config/loongarch/loongarch-cpucfg-map.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 -loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.c $(LA_STR_H)
 -	$(CC) -c $(ALL_CFLAGS) $(INCLUDES) $<
 +loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H)
 +	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
 s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
 -- 
 2.43.0
--- a/0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
+++ b/0057-LoongArch-Remove-the-definition-of-ISA_BASE_LA64V110.patch
@ -0,0 +1,261 @@
 From 1ec35f153636077760b65dc3e0385d0a4d383486 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Fri, 1 Dec 2023 11:51:51 +0800
 Subject: [PATCH 057/188] LoongArch: Remove the definition of ISA_BASE_LA64V110
 from the code.
 The instructions defined in LoongArch Reference Manual v1.1 are not the instruction
 set v1.1 version. The CPU defined later may only support some instructions in
 LoongArch Reference Manual v1.1. Therefore, the macro ISA_BASE_LA64V110 and
 related definitions are removed here.
 gcc/ChangeLog:
 	* config/loongarch/genopts/loongarch-strings: Delete STR_ISA_BASE_LA64V110.
 	* config/loongarch/genopts/loongarch.opt.in: Likewise.
 	* config/loongarch/loongarch-cpu.cc (ISA_BASE_LA64V110_FEATURES): Delete macro.
 	(fill_native_cpu_config): Define a new variable hw_isa_evolution record the
 	extended instruction set support read from cpucfg.
 	* config/loongarch/loongarch-def.cc: Set evolution at initialization.
 	* config/loongarch/loongarch-def.h (ISA_BASE_LA64V100): Delete.
 	(ISA_BASE_LA64V110): Likewise.
 	(N_ISA_BASE_TYPES): Likewise.
 	(defined): Likewise.
 	* config/loongarch/loongarch-opts.cc: Likewise.
 	* config/loongarch/loongarch-opts.h (TARGET_64BIT): Likewise.
 	(ISA_BASE_IS_LA64V110): Likewise.
 	* config/loongarch/loongarch-str.h (STR_ISA_BASE_LA64V110): Likewise.
 	* config/loongarch/loongarch.opt: Regenerate.
 ---
 .../loongarch/genopts/loongarch-strings       |  1 -
 gcc/config/loongarch/genopts/loongarch.opt.in |  3 ---
 gcc/config/loongarch/loongarch-cpu.cc         | 23 +++++--------------
 gcc/config/loongarch/loongarch-def.cc         | 14 +++++++----
 gcc/config/loongarch/loongarch-def.h          | 12 ++--------
 gcc/config/loongarch/loongarch-opts.cc        |  3 ---
 gcc/config/loongarch/loongarch-opts.h         |  4 +---
 gcc/config/loongarch/loongarch-str.h          |  1 -
 gcc/config/loongarch/loongarch.opt            |  3 ---
 9 files changed, 19 insertions(+), 45 deletions(-)
 diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
 index 6c8a42af2..411ad5696 100644
 --- a/gcc/config/loongarch/genopts/loongarch-strings
 +++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -30,7 +30,6 @@ STR_CPU_LA664	      la664
 # Base architecture
 STR_ISA_BASE_LA64V100 la64
 -STR_ISA_BASE_LA64V110 la64v1.1
 # -mfpu
 OPTSTR_ISA_EXT_FPU    fpu
 diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
 index a49de07c9..cd5e75e4f 100644
 --- a/gcc/config/loongarch/genopts/loongarch.opt.in
 +++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -32,9 +32,6 @@ Basic ISAs of LoongArch:
 EnumValue
 Enum(isa_base) String(@@STR_ISA_BASE_LA64V100@@) Value(ISA_BASE_LA64V100)
 -EnumValue
 -Enum(isa_base) String(@@STR_ISA_BASE_LA64V110@@) Value(ISA_BASE_LA64V110)
 -
 ;; ISA extensions / adjustments
 Enum
 Name(isa_ext_fpu) Type(int)
 diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
 index bbce82c9c..7e0625835 100644
 --- a/gcc/config/loongarch/loongarch-cpu.cc
 +++ b/gcc/config/loongarch/loongarch-cpu.cc
@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 -#include "tm.h"
 #include "diagnostic-core.h"
 #include "loongarch-def.h"
@@ -32,19 +31,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "loongarch-cpucfg-map.h"
 #include "loongarch-str.h"
 -/* loongarch_isa_base_features defined here instead of loongarch-def.c
 -   because we need to use options.h.  Pay attention on the order of elements
 -   in the initializer becaue ISO C++ does not allow C99 designated
 -   initializers!  */
 -
 -#define ISA_BASE_LA64V110_FEATURES \
 -  (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA \
 -   | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS)
 -
 -int64_t loongarch_isa_base_features[N_ISA_BASE_TYPES] = {
 -  /* [ISA_BASE_LA64V100] = */ 0,
 -  /* [ISA_BASE_LA64V110] = */ ISA_BASE_LA64V110_FEATURES,
 -};
 /* Native CPU detection with "cpucfg" */
 static uint32_t cpucfg_cache[N_CPUCFG_WORDS] = { 0 };
@@ -235,18 +221,20 @@ fill_native_cpu_config (struct loongarch_target *tgt)
       /* Use the native value anyways.  */
       preset.simd = tmp;
 +
 +      int64_t hw_isa_evolution = 0;
 +
       /* Features added during ISA evolution.  */
       for (const auto &entry: cpucfg_map)
 	if (cpucfg_cache[entry.cpucfg_word] & entry.cpucfg_bit)
 -	  preset.evolution |= entry.isa_evolution_bit;
 +	  hw_isa_evolution |= entry.isa_evolution_bit;
       if (native_cpu_type != CPU_NATIVE)
 	{
 	  /* Check if the local CPU really supports the features of the base
 	     ISA of probed native_cpu_type.  If any feature is not detected,
 	     either GCC or the hardware is buggy.  */
 -	  auto base_isa_feature = loongarch_isa_base_features[preset.base];
 -	  if ((preset.evolution & base_isa_feature) != base_isa_feature)
 +	  if ((preset.evolution & hw_isa_evolution) != hw_isa_evolution)
 	    warning (0,
 		     "detected base architecture %qs, but some of its "
 		     "features are not detected; the detected base "
@@ -254,6 +242,7 @@ fill_native_cpu_config (struct loongarch_target *tgt)
 		     "features will be enabled",
 		     loongarch_isa_base_strings[preset.base]);
 	}
 +      preset.evolution = hw_isa_evolution;
     }
   if (tune_native_p)
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 index 6990c86c2..bc6997e45 100644
 --- a/gcc/config/loongarch/loongarch-def.cc
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -18,6 +18,11 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 +#include "config.h"
 +#include "system.h"
 +#include "coretypes.h"
 +#include "tm.h"
 +
 #include "loongarch-def.h"
 #include "loongarch-str.h"
@@ -51,9 +56,11 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
 	    .simd_ (ISA_EXT_SIMD_LASX))
     .set (CPU_LA664,
 	  loongarch_isa ()
 -	    .base_ (ISA_BASE_LA64V110)
 +	    .base_ (ISA_BASE_LA64V100)
 	    .fpu_ (ISA_EXT_FPU64)
 -	    .simd_ (ISA_EXT_SIMD_LASX));
 +	    .simd_ (ISA_EXT_SIMD_LASX)
 +	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
 +		    | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
 static inline loongarch_cache la464_cache ()
 {
@@ -136,8 +143,7 @@ array_tune<int> loongarch_cpu_multipass_dfa_lookahead = array_tune<int> ()
 array<const char *, N_ISA_BASE_TYPES> loongarch_isa_base_strings =
   array<const char *, N_ISA_BASE_TYPES> ()
 -    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100)
 -    .set (ISA_BASE_LA64V110, STR_ISA_BASE_LA64V110);
 +    .set (ISA_BASE_LA64V100, STR_ISA_BASE_LA64V100);
 array<const char *, N_ISA_EXT_TYPES> loongarch_isa_ext_strings =
   array<const char *, N_ISA_EXT_TYPES> ()
 diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
 index 5ac70dfdd..f8f36f0e2 100644
 --- a/gcc/config/loongarch/loongarch-def.h
 +++ b/gcc/config/loongarch/loongarch-def.h
@@ -56,19 +56,11 @@ along with GCC; see the file COPYING3.  If not see
 /* enum isa_base */
 /* LoongArch V1.00.  */
 -#define ISA_BASE_LA64V100     0
 -/* LoongArch V1.10.  */
 -#define ISA_BASE_LA64V110     1
 -#define N_ISA_BASE_TYPES      2
 +#define ISA_BASE_LA64V100	0
 +#define N_ISA_BASE_TYPES	1
 extern loongarch_def_array<const char *, N_ISA_BASE_TYPES>
   loongarch_isa_base_strings;
 -#if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 -/* Unlike other arrays, this is defined in loongarch-cpu.cc.  The problem is
 -   we cannot use the C++ header options.h in loongarch-def.c.  */
 -extern int64_t loongarch_isa_base_features[];
 -#endif
 -
 /* enum isa_ext_* */
 #define ISA_EXT_NONE	      0
 #define ISA_EXT_FPU32	      1
 diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
 index 45fc521e4..d31becc67 100644
 --- a/gcc/config/loongarch/loongarch-opts.cc
 +++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -285,9 +285,6 @@ config_target_isa:
   /* Get default ISA from "-march" or its default value.  */
   t.isa = loongarch_cpu_default_isa[t.cpu_arch];
 -  if (t.cpu_arch != CPU_NATIVE)
 -    t.isa.evolution |= loongarch_isa_base_features[t.isa.base];
 -
   /* Apply incremental changes.  */
   /* "-march=native" overrides the default FPU type.  */
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 0dabf1551..7010ddfec 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -77,8 +77,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 #define TARGET_DOUBLE_FLOAT	  (la_target.isa.fpu == ISA_EXT_FPU64)
 #define TARGET_DOUBLE_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64D)
 -#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100 \
 -				   || la_target.isa.base == ISA_BASE_LA64V110)
 +#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100)
 #define TARGET_ABI_LP64		  (la_target.abi.base == ABI_BASE_LP64D	\
 				   || la_target.abi.base == ABI_BASE_LP64F \
 				   || la_target.abi.base == ABI_BASE_LP64S)
@@ -90,7 +89,6 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
 /* TARGET_ macros for use in *.md template conditionals */
 #define TARGET_uARCH_LA464	  (la_target.cpu_tune == CPU_LA464)
 #define TARGET_uARCH_LA664	  (la_target.cpu_tune == CPU_LA664)
 -#define ISA_BASE_IS_LA64V110	  (la_target.isa.base == ISA_BASE_LA64V110)
 /* Note: optimize_size may vary across functions,
    while -m[no]-memcpy imposes a global constraint.  */
 diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
 index 0fee9abe5..7144bbe28 100644
 --- a/gcc/config/loongarch/loongarch-str.h
 +++ b/gcc/config/loongarch/loongarch-str.h
@@ -33,7 +33,6 @@ along with GCC; see the file COPYING3.  If not see
 #define STR_CPU_LA664 "la664"
 #define STR_ISA_BASE_LA64V100 "la64"
 -#define STR_ISA_BASE_LA64V110 "la64v1.1"
 #define OPTSTR_ISA_EXT_FPU "fpu"
 #define STR_NONE "none"
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index ea0d5bb4e..7fe36feb9 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -40,9 +40,6 @@ Basic ISAs of LoongArch:
 EnumValue
 Enum(isa_base) String(la64) Value(ISA_BASE_LA64V100)
 -EnumValue
 -Enum(isa_base) String(la64v1.1) Value(ISA_BASE_LA64V110)
 -
 ;; ISA extensions / adjustments
 Enum
 Name(isa_ext_fpu) Type(int)
 -- 
 2.43.0
--- a/0058-LoongArch-Add-support-for-xorsign.patch
+++ b/0058-LoongArch-Add-support-for-xorsign.patch
@ -0,0 +1,412 @@
 From dac02bbb72cae374ddc905fffcc6c94c901f9b26 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Fri, 17 Nov 2023 17:00:21 +0800
 Subject: [PATCH 058/188] LoongArch: Add support for xorsign.
 This patch adds support for xorsign pattern to scalar fp and vector. With the
 new expands, uniformly using vector bitwise logical operations to handle xorsign.
 On LoongArch64, floating-point registers and vector registers share the same register,
 so this patch also allows conversion between LSX vector mode and scalar fp mode to
 avoid unnecessary instruction generation.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (xorsign<mode>3): New expander.
 	* config/loongarch/loongarch.cc (loongarch_can_change_mode_class): Allow
 	conversion between LSX vector mode and scalar fp mode.
 	* config/loongarch/loongarch.md (@xorsign<mode>3): New expander.
 	* config/loongarch/lsx.md (@xorsign<mode>3): Ditto.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c: New test.
 	* gcc.target/loongarch/vector/lasx/lasx-xorsign.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-xorsign.c: New test.
 	* gcc.target/loongarch/xorsign-run.c: New test.
 	* gcc.target/loongarch/xorsign.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 22 +++++--
 gcc/config/loongarch/loongarch.cc             |  5 ++
 gcc/config/loongarch/loongarch.md             | 17 ++++++
 gcc/config/loongarch/lsx.md                   | 23 +++++--
 .../loongarch/vector/lasx/lasx-xorsign-run.c  | 60 +++++++++++++++++++
 .../loongarch/vector/lasx/lasx-xorsign.c      | 19 ++++++
 .../loongarch/vector/lsx/lsx-xorsign-run.c    | 60 +++++++++++++++++++
 .../loongarch/vector/lsx/lsx-xorsign.c        | 19 ++++++
 .../gcc.target/loongarch/xorsign-run.c        | 25 ++++++++
 gcc/testsuite/gcc.target/loongarch/xorsign.c  | 18 ++++++
 10 files changed, 260 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign-run.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/xorsign.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 116b30c07..de7c88f14 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -1065,10 +1065,10 @@
    (set_attr "mode" "<MODE>")])
 (define_insn "xor<mode>3"
 -  [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
 -	(xor:ILASX
 -	  (match_operand:ILASX 1 "register_operand" "f,f,f")
 -	  (match_operand:ILASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
 +  [(set (match_operand:LASX 0 "register_operand" "=f,f,f")
 +	(xor:LASX
 +	  (match_operand:LASX 1 "register_operand" "f,f,f")
 +	  (match_operand:LASX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
   "ISA_HAS_LASX"
   "@
    xvxor.v\t%u0,%u1,%u2
@@ -3061,6 +3061,20 @@
   operands[5] = gen_reg_rtx (<MODE>mode);
 })
 +(define_expand "xorsign<mode>3"
 +  [(set (match_dup 4)
 +    (and:FLASX (match_dup 3)
 +        (match_operand:FLASX 2 "register_operand")))
 +   (set (match_operand:FLASX 0 "register_operand")
 +    (xor:FLASX (match_dup 4)
 +         (match_operand:FLASX 1 "register_operand")))]
 +  "ISA_HAS_LASX"
 +{
 +  operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
 +
 +  operands[4] = gen_reg_rtx (<MODE>mode);
 +})
 +
 (define_insn "absv4df2"
   [(set (match_operand:V4DF 0 "register_operand" "=f")
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 3ef7e3605..3c8ae9a42 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -6703,6 +6703,11 @@ loongarch_can_change_mode_class (machine_mode from, machine_mode to,
   if (LSX_SUPPORTED_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))
     return true;
 +  /* Allow conversion between LSX vector mode and scalar fp mode. */
 +  if ((LSX_SUPPORTED_MODE_P (from) && SCALAR_FLOAT_MODE_P (to))
 +      || ((SCALAR_FLOAT_MODE_P (from) && LSX_SUPPORTED_MODE_P (to))))
 +    return true;
 +
   return !reg_classes_intersect_p (FP_REGS, rclass);
 }
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index cfd7a8ec6..afc3c591f 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -1164,6 +1164,23 @@
   "fcopysign.<fmt>\t%0,%1,%2"
   [(set_attr "type" "fcopysign")
    (set_attr "mode" "<UNITMODE>")])
 +
 +(define_expand "@xorsign<mode>3"
 +  [(match_operand:ANYF 0 "register_operand")
 +   (match_operand:ANYF 1 "register_operand")
 +   (match_operand:ANYF 2 "register_operand")]
 +  "ISA_HAS_LSX"
 +{
 +  machine_mode lsx_mode
 +    = <MODE>mode == SFmode ? V4SFmode : V2DFmode;
 +  rtx tmp = gen_reg_rtx (lsx_mode);
 +  rtx op1 = lowpart_subreg (lsx_mode, operands[1], <MODE>mode);
 +  rtx op2 = lowpart_subreg (lsx_mode, operands[2], <MODE>mode);
 +  emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2));
 +  emit_move_insn (operands[0],
 +          lowpart_subreg (<MODE>mode, tmp, lsx_mode));
 +  DONE;
 +})
 ;;
 ;;  ....................
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 232399934..ce6ec6d69 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -957,10 +957,10 @@
    (set_attr "mode" "<MODE>")])
 (define_insn "xor<mode>3"
 -  [(set (match_operand:ILSX 0 "register_operand" "=f,f,f")
 -	(xor:ILSX
 -	  (match_operand:ILSX 1 "register_operand" "f,f,f")
 -	  (match_operand:ILSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
 +  [(set (match_operand:LSX 0 "register_operand" "=f,f,f")
 +	(xor:LSX
 +	  (match_operand:LSX 1 "register_operand" "f,f,f")
 +	  (match_operand:LSX 2 "reg_or_vector_same_val_operand" "f,YC,Urv8")))]
   "ISA_HAS_LSX"
   "@
    vxor.v\t%w0,%w1,%w2
@@ -2786,6 +2786,21 @@
   operands[5] = gen_reg_rtx (<MODE>mode);
 })
 +(define_expand "@xorsign<mode>3"
 +  [(set (match_dup 4)
 +    (and:FLSX (match_dup 3)
 +        (match_operand:FLSX 2 "register_operand")))
 +   (set (match_operand:FLSX 0 "register_operand")
 +    (xor:FLSX (match_dup 4)
 +         (match_operand:FLSX 1 "register_operand")))]
 +  "ISA_HAS_LSX"
 +{
 +  operands[3] = loongarch_build_signbit_mask (<MODE>mode, 1, 0);
 +
 +  operands[4] = gen_reg_rtx (<MODE>mode);
 +})
 +
 +
 (define_insn "absv2df2"
   [(set (match_operand:V2DF 0 "register_operand" "=f")
 	(abs:V2DF (match_operand:V2DF 1 "register_operand" "f")))]
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
 new file mode 100644
 index 000000000..2295503d4
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign-run.c
@@ -0,0 +1,60 @@
 +/* { dg-do run } */
 +/* { dg-options "-O2 -ftree-vectorize -mlasx" } */
 +/* { dg-require-effective-target loongarch_asx_hw } */
 +
 +#include "lasx-xorsign.c"
 +
 +extern void abort ();
 +
 +#define N 16
 +float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
 +              -12.5f, -15.6f, -18.7f, -21.8f,
 +              24.9f, 27.1f, 30.2f, 33.3f,
 +              36.4f, 39.5f, 42.6f, 45.7f};
 +float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
 +              -9.0f, 1.0f, -2.0f, 3.0f,
 +              -4.0f, -5.0f, 6.0f, 7.0f,
 +              -8.0f, -9.0f, 10.0f, 11.0f};
 +float r[N];
 +
 +double ad[N] = {-0.1d,  -3.2d,  -6.3d,  -9.4d,
 +                -12.5d, -15.6d, -18.7d, -21.8d,
 +                 24.9d,  27.1d,  30.2d,  33.3d,
 +                 36.4d,  39.5d,  42.6d, 45.7d};
 +double bd[N] = {-1.2d,  3.4d, -5.6d,  7.8d,
 +                -9.0d,  1.0d, -2.0d,  3.0d,
 +                -4.0d, -5.0d,  6.0d,  7.0d,
 +                -8.0d, -9.0d, 10.0d, 11.0d};
 +double rd[N];
 +
 +void
 +__attribute__ ((optimize ("-O0")))
 +check_xorsignf (void)
 +{
 +  for (int i = 0; i < N; i++)
 +    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
 +      abort ();
 +}
 +
 +void
 +__attribute__ ((optimize ("-O0")))
 +check_xorsign (void)
 +{
 +  for (int i = 0; i < N; i++)
 +    if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
 +      abort ();
 +}
 +
 +int
 +main (void)
 +{
 +  my_xorsignf (r, a, b, N); 
 +  /* check results:  */
 +  check_xorsignf ();
 +
 +  my_xorsign (rd, ad, bd, N);
 +  /* check results:  */
 +  check_xorsign ();
 +
 +  return 0;
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
 new file mode 100644
 index 000000000..190a9239b
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xorsign.c
@@ -0,0 +1,19 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -mlasx" } */
 +/* { dg-final { scan-assembler "xvand\\.v" } } */
 +/* { dg-final { scan-assembler "xvxor\\.v" } } */
 +/* { dg-final { scan-assembler-not "xvfmul" } } */
 +
 +double
 +my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n)
 +{
 +  for (int i = 0; i < n; i++)
 +    a[i] = b[i] * __builtin_copysign (1.0d, c[i]);
 +}
 +
 +float
 +my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n)
 +{
 +  for (int i = 0; i < n; i++)
 +    a[i] = b[i] * __builtin_copysignf (1.0f, c[i]);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
 new file mode 100644
 index 000000000..22c5c03cc
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign-run.c
@@ -0,0 +1,60 @@
 +/* { dg-do run } */
 +/* { dg-options "-O2 -ftree-vectorize -mlsx" } */
 +/* { dg-require-effective-target loongarch_sx_hw } */
 +
 +#include "lsx-xorsign.c"
 +
 +extern void abort ();
 +
 +#define N 16
 +float a[N] = {-0.1f, -3.2f, -6.3f, -9.4f,
 +              -12.5f, -15.6f, -18.7f, -21.8f,
 +              24.9f, 27.1f, 30.2f, 33.3f,
 +              36.4f, 39.5f, 42.6f, 45.7f};
 +float b[N] = {-1.2f, 3.4f, -5.6f, 7.8f,
 +              -9.0f, 1.0f, -2.0f, 3.0f,
 +              -4.0f, -5.0f, 6.0f, 7.0f,
 +              -8.0f, -9.0f, 10.0f, 11.0f};
 +float r[N];
 +
 +double ad[N] = {-0.1d,  -3.2d,  -6.3d,  -9.4d,
 +                -12.5d, -15.6d, -18.7d, -21.8d,
 +                 24.9d,  27.1d,  30.2d,  33.3d,
 +                 36.4d,  39.5d,  42.6d, 45.7d};
 +double bd[N] = {-1.2d,  3.4d, -5.6d,  7.8d,
 +                -9.0d,  1.0d, -2.0d,  3.0d,
 +                -4.0d, -5.0d,  6.0d,  7.0d,
 +                -8.0d, -9.0d, 10.0d, 11.0d};
 +double rd[N];
 +
 +void
 +__attribute__ ((optimize ("-O0")))
 +check_xorsignf (void)
 +{
 +  for (int i = 0; i < N; i++)
 +    if (r[i] != a[i] * __builtin_copysignf (1.0f, b[i]))
 +      abort ();
 +}
 +
 +void
 +__attribute__ ((optimize ("-O0")))
 +check_xorsign (void)
 +{
 +  for (int i = 0; i < N; i++)
 +    if (rd[i] != ad[i] * __builtin_copysign (1.0d, bd[i]))
 +      abort ();
 +}
 +
 +int
 +main (void)
 +{
 +  my_xorsignf (r, a, b, N);
 +  /* check results:  */
 +  check_xorsignf ();
 +
 +  my_xorsign (rd, ad, bd, N);
 +  /* check results:  */
 +  check_xorsign ();
 +
 +  return 0;
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
 new file mode 100644
 index 000000000..c2694c11e
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-xorsign.c
@@ -0,0 +1,19 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -ftree-vectorize -mlsx" } */
 +/* { dg-final { scan-assembler "vand\\.v" } } */
 +/* { dg-final { scan-assembler "vxor\\.v" } } */
 +/* { dg-final { scan-assembler-not "vfmul" } } */
 +
 +double
 +my_xorsign (double *restrict a, double *restrict b, double *restrict c, int n)
 +{
 +  for (int i = 0; i < n; i++)
 +    a[i] = b[i] * __builtin_copysign (1.0d, c[i]);
 +}
 +
 +float
 +my_xorsignf (float *restrict a, float *restrict b, float *restrict c, int n)
 +{
 +  for (int i = 0; i < n; i++)
 +    a[i] = b[i] * __builtin_copysignf (1.0f, c[i]);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign-run.c b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c
 new file mode 100644
 index 000000000..b4f28adf8
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/xorsign-run.c
@@ -0,0 +1,25 @@
 +/* { dg-do run } */
 +/* { dg-options "-O2 -mlsx" } */
 +/* { dg-require-effective-target loongarch_sx_hw } */
 +
 +extern void abort(void);
 +
 +static double x = 2.0;
 +static float  y = 2.0;
 +
 +int main()
 +{
 +  if ((2.5 * __builtin_copysign(1.0d, x)) != 2.5)
 +     abort();
 +
 +  if ((2.5 * __builtin_copysign(1.0f, y)) != 2.5)
 +     abort();
 +
 +  if ((2.5 * __builtin_copysignf(1.0d, -x)) != -2.5)
 +     abort();
 +
 +  if ((2.5 * __builtin_copysignf(1.0f, -y)) != -2.5)
 +     abort();
 +
 +  return 0;
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/xorsign.c b/gcc/testsuite/gcc.target/loongarch/xorsign.c
 new file mode 100644
 index 000000000..ca80603d4
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/xorsign.c
@@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx" } */
 +/* { dg-final { scan-assembler "vand\\.v" } } */
 +/* { dg-final { scan-assembler "vxor\\.v" } } */
 +/* { dg-final { scan-assembler-not "fcopysign" } } */
 +/* { dg-final { scan-assembler-not "fmul" } } */
 +
 +double
 +my_xorsign (double a, double b)
 +{
 +  return a * __builtin_copysign (1.0d, b);
 +}
 +
 +float
 +my_xorsignf (float a, float b)
 +{
 +  return a * __builtin_copysignf (1.0f, b);
 +}
 -- 
 2.43.0
--- a/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
+++ b/0059-LoongArch-Add-support-for-LoongArch-V1.1-approximate.patch
@ -0,0 +1,730 @@
 From 88117f2703d06e44983e54a985ec0ad6f2397a46 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 6 Dec 2023 15:04:49 +0800
 Subject: [PATCH 059/188] LoongArch: Add support for LoongArch V1.1 approximate
 instructions.
 This patch adds define_insn/builtins/intrinsics for these instructions, and add option
 -mfrecipe to control instruction generation.
 gcc/ChangeLog:
 	* config/loongarch/genopts/isa-evolution.in (fecipe): Add.
 	* config/loongarch/larchintrin.h (__frecipe_s): New intrinsic.
 	(__frecipe_d): Ditto.
 	(__frsqrte_s): Ditto.
 	(__frsqrte_d): Ditto.
 	* config/loongarch/lasx.md (lasx_xvfrecipe_<flasxfmt>): New insn pattern.
 	(lasx_xvfrsqrte_<flasxfmt>): Ditto.
 	* config/loongarch/lasxintrin.h (__lasx_xvfrecipe_s): New intrinsic.
 	(__lasx_xvfrecipe_d): Ditto.
 	(__lasx_xvfrsqrte_s): Ditto.
 	(__lasx_xvfrsqrte_d): Ditto.
 	* config/loongarch/loongarch-builtins.cc (AVAIL_ALL): Add predicates.
 	(LSX_EXT_BUILTIN): New macro.
 	(LASX_EXT_BUILTIN): Ditto.
 	* config/loongarch/loongarch-cpucfg-map.h: Regenerate.
 	* config/loongarch/loongarch-c.cc: Add builtin macro "__loongarch_frecipe".
 	* config/loongarch/loongarch-def.cc: Regenerate.
 	* config/loongarch/loongarch-str.h (OPTSTR_FRECIPE): Regenerate.
 	* config/loongarch/loongarch.cc (loongarch_asm_code_end): Dump status for TARGET_FRECIPE.
 	* config/loongarch/loongarch.md (loongarch_frecipe_<fmt>): New insn pattern.
 	(loongarch_frsqrte_<fmt>): Ditto.
 	* config/loongarch/loongarch.opt: Regenerate.
 	* config/loongarch/lsx.md (lsx_vfrecipe_<flsxfmt>): New insn pattern.
 	(lsx_vfrsqrte_<flsxfmt>): Ditto.
 	* config/loongarch/lsxintrin.h (__lsx_vfrecipe_s): New intrinsic.
 	(__lsx_vfrecipe_d): Ditto.
 	(__lsx_vfrsqrte_s): Ditto.
 	(__lsx_vfrsqrte_d): Ditto.
 	* doc/extend.texi: Add documentation for LoongArch new builtins and intrinsics.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/larch-frecipe-builtin.c: New test.
 	* gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c: New test.
 ---
 gcc/config/loongarch/genopts/isa-evolution.in |  1 +
 gcc/config/loongarch/larchintrin.h            | 38 +++++++++++++++++
 gcc/config/loongarch/lasx.md                  | 24 +++++++++++
 gcc/config/loongarch/lasxintrin.h             | 34 +++++++++++++++
 gcc/config/loongarch/loongarch-builtins.cc    | 42 +++++++++++++++++++
 gcc/config/loongarch/loongarch-c.cc           |  3 ++
 gcc/config/loongarch/loongarch-cpucfg-map.h   |  1 +
 gcc/config/loongarch/loongarch-def.cc         |  3 +-
 gcc/config/loongarch/loongarch-str.h          |  1 +
 gcc/config/loongarch/loongarch.cc             |  1 +
 gcc/config/loongarch/loongarch.md             | 35 +++++++++++++++-
 gcc/config/loongarch/loongarch.opt            |  4 ++
 gcc/config/loongarch/lsx.md                   | 24 +++++++++++
 gcc/config/loongarch/lsxintrin.h              | 34 +++++++++++++++
 gcc/doc/extend.texi                           | 35 ++++++++++++++++
 .../loongarch/larch-frecipe-builtin.c         | 28 +++++++++++++
 .../vector/lasx/lasx-frecipe-builtin.c        | 30 +++++++++++++
 .../vector/lsx/lsx-frecipe-builtin.c          | 30 +++++++++++++
 18 files changed, 365 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
 diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in
 index a6bc3f87f..11a198b64 100644
 --- a/gcc/config/loongarch/genopts/isa-evolution.in
 +++ b/gcc/config/loongarch/genopts/isa-evolution.in
@@ -1,3 +1,4 @@
 +2	25	frecipe		Support frecipe.{s/d} and frsqrte.{s/d} instructions.
 2	26	div32		Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 2	27	lam-bh		Support am{swap/add}[_db].{b/h} instructions.
 2	28	lamcas		Support amcas[_db].{b/h/w/d} instructions.
 diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
 index 2833f1487..22035e767 100644
 --- a/gcc/config/loongarch/larchintrin.h
 +++ b/gcc/config/loongarch/larchintrin.h
@@ -333,6 +333,44 @@ __iocsrwr_d (unsigned long int _1, unsigned int _2)
 }
 #endif
 +#ifdef __loongarch_frecipe
 +/* Assembly instruction format: fd, fj.  */
 +/* Data types in instruction templates:  SF, SF.  */
 +extern __inline void
 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 +__frecipe_s (float _1)
 +{
 +  __builtin_loongarch_frecipe_s ((float) _1);
 +}
 +
 +/* Assembly instruction format: fd, fj.  */
 +/* Data types in instruction templates:  DF, DF.  */
 +extern __inline void
 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 +__frecipe_d (double _1)
 +{
 +  __builtin_loongarch_frecipe_d ((double) _1);
 +}
 +
 +/* Assembly instruction format: fd, fj.  */
 +/* Data types in instruction templates:  SF, SF.  */
 +extern __inline void
 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 +__frsqrte_s (float _1)
 +{
 +  __builtin_loongarch_frsqrte_s ((float) _1);
 +}
 +
 +/* Assembly instruction format: fd, fj.  */
 +/* Data types in instruction templates:  DF, DF.  */
 +extern __inline void
 +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 +__frsqrte_d (double _1)
 +{
 +  __builtin_loongarch_frsqrte_d ((double) _1);
 +}
 +#endif
 +
 /* Assembly instruction format:	ui15.  */
 /* Data types in instruction templates:  USI.  */
 #define __dbar(/*ui15*/ _1) __builtin_loongarch_dbar ((_1))
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index de7c88f14..b1416f6c3 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -40,8 +40,10 @@
   UNSPEC_LASX_XVFCVTL
   UNSPEC_LASX_XVFLOGB
   UNSPEC_LASX_XVFRECIP
 +  UNSPEC_LASX_XVFRECIPE
   UNSPEC_LASX_XVFRINT
   UNSPEC_LASX_XVFRSQRT
 +  UNSPEC_LASX_XVFRSQRTE
   UNSPEC_LASX_XVFCMP_SAF
   UNSPEC_LASX_XVFCMP_SEQ
   UNSPEC_LASX_XVFCMP_SLE
@@ -1633,6 +1635,17 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 +;; Approximate Reciprocal Instructions.
 +
 +(define_insn "lasx_xvfrecipe_<flasxfmt>"
 +  [(set (match_operand:FLASX 0 "register_operand" "=f")
 +    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
 +		  UNSPEC_LASX_XVFRECIPE))]
 +  "ISA_HAS_LASX && TARGET_FRECIPE"
 +  "xvfrecipe.<flasxfmt>\t%u0,%u1"
 +  [(set_attr "type" "simd_fdiv")
 +   (set_attr "mode" "<MODE>")])
 +
 (define_insn "lasx_xvfrsqrt_<flasxfmt>"
   [(set (match_operand:FLASX 0 "register_operand" "=f")
 	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
@@ -1642,6 +1655,17 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 +;; Approximate Reciprocal Square Root Instructions.
 +
 +(define_insn "lasx_xvfrsqrte_<flasxfmt>"
 +  [(set (match_operand:FLASX 0 "register_operand" "=f")
 +    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
 +		  UNSPEC_LASX_XVFRSQRTE))]
 +  "ISA_HAS_LASX && TARGET_FRECIPE"
 +  "xvfrsqrte.<flasxfmt>\t%u0,%u1"
 +  [(set_attr "type" "simd_fdiv")
 +   (set_attr "mode" "<MODE>")])
 +
 (define_insn "lasx_xvftint_u_<ilasxfmt_u>_<flasxfmt>"
   [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
 diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
 index 7bce2c757..5e65e76e7 100644
 --- a/gcc/config/loongarch/lasxintrin.h
 +++ b/gcc/config/loongarch/lasxintrin.h
@@ -2399,6 +2399,40 @@ __m256d __lasx_xvfrecip_d (__m256d _1)
   return (__m256d)__builtin_lasx_xvfrecip_d ((v4f64)_1);
 }
 +#if defined(__loongarch_frecipe)
 +/* Assembly instruction format: xd, xj.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m256 __lasx_xvfrecipe_s (__m256 _1)
 +{
 +  return (__m256)__builtin_lasx_xvfrecipe_s ((v8f32)_1);
 +}
 +
 +/* Assembly instruction format: xd, xj.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m256d __lasx_xvfrecipe_d (__m256d _1)
 +{
 +  return (__m256d)__builtin_lasx_xvfrecipe_d ((v4f64)_1);
 +}
 +
 +/* Assembly instruction format: xd, xj.  */
 +/* Data types in instruction templates:  V8SF, V8SF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m256 __lasx_xvfrsqrte_s (__m256 _1)
 +{
 +  return (__m256)__builtin_lasx_xvfrsqrte_s ((v8f32)_1);
 +}
 +
 +/* Assembly instruction format: xd, xj.  */
 +/* Data types in instruction templates:  V4DF, V4DF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m256d __lasx_xvfrsqrte_d (__m256d _1)
 +{
 +  return (__m256d)__builtin_lasx_xvfrsqrte_d ((v4f64)_1);
 +}
 +#endif
 +
 /* Assembly instruction format:	xd, xj.  */
 /* Data types in instruction templates:  V8SF, V8SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
 index f4523c8bf..bc156bd36 100644
 --- a/gcc/config/loongarch/loongarch-builtins.cc
 +++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -120,6 +120,9 @@ struct loongarch_builtin_description
 AVAIL_ALL (hard_float, TARGET_HARD_FLOAT_ABI)
 AVAIL_ALL (lsx, ISA_HAS_LSX)
 AVAIL_ALL (lasx, ISA_HAS_LASX)
 +AVAIL_ALL (frecipe, TARGET_FRECIPE && TARGET_HARD_FLOAT_ABI)
 +AVAIL_ALL (lsx_frecipe, ISA_HAS_LSX && TARGET_FRECIPE)
 +AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
 /* Construct a loongarch_builtin_description from the given arguments.
@@ -164,6 +167,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
     "__builtin_lsx_" #INSN,  LARCH_BUILTIN_DIRECT,			\
     FUNCTION_TYPE, loongarch_builtin_avail_lsx }
 + /* Define an LSX LARCH_BUILTIN_DIRECT function __builtin_lsx_<INSN>
 +    for instruction CODE_FOR_lsx_<INSN>.  FUNCTION_TYPE is a builtin_description
 +    field. AVAIL is the name of the availability predicate, without the leading
 +    loongarch_builtin_avail_.  */
 +#define LSX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)                     \
 +  { CODE_FOR_lsx_ ## INSN,                                              \
 +    "__builtin_lsx_" #INSN,  LARCH_BUILTIN_DIRECT,                      \
 +    FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
 +
 /* Define an LSX LARCH_BUILTIN_LSX_TEST_BRANCH function __builtin_lsx_<INSN>
    for instruction CODE_FOR_lsx_<INSN>.  FUNCTION_TYPE is a builtin_description
@@ -189,6 +201,15 @@ AVAIL_ALL (lasx, ISA_HAS_LASX)
     "__builtin_lasx_" #INSN,  LARCH_BUILTIN_LASX,			\
     FUNCTION_TYPE, loongarch_builtin_avail_lasx }
 +/* Define an LASX LARCH_BUILTIN_DIRECT function __builtin_lasx_<INSN>
 +   for instruction CODE_FOR_lasx_<INSN>.  FUNCTION_TYPE is a builtin_description
 +   field. AVAIL is the name of the availability predicate, without the leading
 +   loongarch_builtin_avail_.  */
 +#define LASX_EXT_BUILTIN(INSN, FUNCTION_TYPE, AVAIL)                    \
 +  { CODE_FOR_lasx_ ## INSN,                                             \
 +    "__builtin_lasx_" #INSN,  LARCH_BUILTIN_LASX,                       \
 +    FUNCTION_TYPE, loongarch_builtin_avail_##AVAIL }
 +
 /* Define an LASX LARCH_BUILTIN_DIRECT_NO_TARGET function __builtin_lasx_<INSN>
    for instruction CODE_FOR_lasx_<INSN>.  FUNCTION_TYPE is a builtin_description
    field.  */
@@ -804,6 +825,27 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   DIRECT_NO_TARGET_BUILTIN (syscall, LARCH_VOID_FTYPE_USI, default),
   DIRECT_NO_TARGET_BUILTIN (break, LARCH_VOID_FTYPE_USI, default),
 +  /* Built-in functions for frecipe.{s/d} and frsqrte.{s/d}.  */
 +
 +  DIRECT_BUILTIN (frecipe_s, LARCH_SF_FTYPE_SF, frecipe),
 +  DIRECT_BUILTIN (frecipe_d, LARCH_DF_FTYPE_DF, frecipe),
 +  DIRECT_BUILTIN (frsqrte_s, LARCH_SF_FTYPE_SF, frecipe),
 +  DIRECT_BUILTIN (frsqrte_d, LARCH_DF_FTYPE_DF, frecipe),
 +
 +  /* Built-in functions for new LSX instructions.  */
 +
 +  LSX_EXT_BUILTIN (vfrecipe_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
 +  LSX_EXT_BUILTIN (vfrecipe_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
 +  LSX_EXT_BUILTIN (vfrsqrte_s, LARCH_V4SF_FTYPE_V4SF, lsx_frecipe),
 +  LSX_EXT_BUILTIN (vfrsqrte_d, LARCH_V2DF_FTYPE_V2DF, lsx_frecipe),
 +
 +  /* Built-in functions for new LASX instructions.  */
 +
 +  LASX_EXT_BUILTIN (xvfrecipe_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
 +  LASX_EXT_BUILTIN (xvfrecipe_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
 +  LASX_EXT_BUILTIN (xvfrsqrte_s, LARCH_V8SF_FTYPE_V8SF, lasx_frecipe),
 +  LASX_EXT_BUILTIN (xvfrsqrte_d, LARCH_V4DF_FTYPE_V4DF, lasx_frecipe),
 +
   /* Built-in functions for LSX.  */
   LSX_BUILTIN (vsll_b, LARCH_V16QI_FTYPE_V16QI_V16QI),
   LSX_BUILTIN (vsll_h, LARCH_V8HI_FTYPE_V8HI_V8HI),
 diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
 index 76c8ea8db..a89477a74 100644
 --- a/gcc/config/loongarch/loongarch-c.cc
 +++ b/gcc/config/loongarch/loongarch-c.cc
@@ -102,6 +102,9 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   else
     builtin_define ("__loongarch_frlen=0");
 +  if (TARGET_HARD_FLOAT && TARGET_FRECIPE)
 +    builtin_define ("__loongarch_frecipe");
 +
   if (ISA_HAS_LSX)
     {
       builtin_define ("__loongarch_simd");
 diff --git a/gcc/config/loongarch/loongarch-cpucfg-map.h b/gcc/config/loongarch/loongarch-cpucfg-map.h
 index 02ff16712..148333c24 100644
 --- a/gcc/config/loongarch/loongarch-cpucfg-map.h
 +++ b/gcc/config/loongarch/loongarch-cpucfg-map.h
@@ -29,6 +29,7 @@ static constexpr struct {
   unsigned int cpucfg_bit;
   HOST_WIDE_INT isa_evolution_bit;
 } cpucfg_map[] = {
 +  { 2, 1u << 25, OPTION_MASK_ISA_FRECIPE },
   { 2, 1u << 26, OPTION_MASK_ISA_DIV32 },
   { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH },
   { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS },
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 index bc6997e45..c41804a18 100644
 --- a/gcc/config/loongarch/loongarch-def.cc
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -60,7 +60,8 @@ array_arch<loongarch_isa> loongarch_cpu_default_isa =
 	    .fpu_ (ISA_EXT_FPU64)
 	    .simd_ (ISA_EXT_SIMD_LASX)
 	    .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA
 -		    | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS));
 +			 | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS
 +			 | OPTION_MASK_ISA_FRECIPE));
 static inline loongarch_cache la464_cache ()
 {
 diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
 index 7144bbe28..a8821acb0 100644
 --- a/gcc/config/loongarch/loongarch-str.h
 +++ b/gcc/config/loongarch/loongarch-str.h
@@ -68,6 +68,7 @@ along with GCC; see the file COPYING3.  If not see
 #define STR_EXPLICIT_RELOCS_NONE "none"
 #define STR_EXPLICIT_RELOCS_ALWAYS "always"
 +#define OPTSTR_FRECIPE "frecipe"
 #define OPTSTR_DIV32   "div32"
 #define OPTSTR_LAM_BH  "lam-bh"
 #define OPTSTR_LAMCAS  "lamcas"
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 3c8ae9a42..ce1c0a8bd 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -11503,6 +11503,7 @@ loongarch_asm_code_end (void)
 	       loongarch_cpu_strings [la_target.cpu_tune]);
       fprintf (asm_out_file, "%s Base ISA: %s\n", ASM_COMMENT_START,
 	       loongarch_isa_base_strings [la_target.isa.base]);
 +      DUMP_FEATURE (TARGET_FRECIPE);
       DUMP_FEATURE (TARGET_DIV32);
       DUMP_FEATURE (TARGET_LAM_BH);
       DUMP_FEATURE (TARGET_LAMCAS);
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index afc3c591f..9080cec1c 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -59,6 +59,12 @@
   ;; Stack tie
   UNSPEC_TIE
 +  ;; RSQRT
 +  UNSPEC_RSQRTE
 +
 +  ;; RECIP
 +  UNSPEC_RECIPE
 +
   ;; CRC
   UNSPEC_CRC
   UNSPEC_CRCC
@@ -220,6 +226,7 @@
 ;; fmadd	floating point multiply-add
 ;; fdiv		floating point divide
 ;; frdiv	floating point reciprocal divide
 +;; frecipe      floating point approximate reciprocal
 ;; fabs		floating point absolute value
 ;; flogb	floating point exponent extract
 ;; fneg		floating point negation
@@ -229,6 +236,7 @@
 ;; fscaleb	floating point scale
 ;; fsqrt	floating point square root
 ;; frsqrt       floating point reciprocal square root
 +;; frsqrte      floating point approximate reciprocal square root
 ;; multi	multiword sequence (or user asm statements)
 ;; atomic	atomic memory update instruction
 ;; syncloop	memory atomic operation implemented as a sync loop
@@ -238,8 +246,8 @@
   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
    shift,slt,signext,clz,trap,imul,idiv,move,
 -   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
 -   fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost,
 +   fmove,fadd,fmul,fmadd,fdiv,frdiv,frecipe,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
 +   fscaleb,fsqrt,frsqrt,frsqrte,accext,accmod,multi,atomic,syncloop,nop,ghost,
    simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
    simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
    simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
@@ -908,6 +916,18 @@
   [(set_attr "type" "frdiv")
    (set_attr "mode" "<UNITMODE>")])
 +;; Approximate Reciprocal Instructions.
 +
 +(define_insn "loongarch_frecipe_<fmt>"
 +  [(set (match_operand:ANYF 0 "register_operand" "=f")
 +    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
 +	     UNSPEC_RECIPE))]
 +  "TARGET_FRECIPE"
 +  "frecipe.<fmt>\t%0,%1"
 +  [(set_attr "type" "frecipe")
 +   (set_attr "mode" "<UNITMODE>")
 +   (set_attr "insn_count" "1")])
 +
 ;; Integer division and modulus.
 (define_expand "<optab><mode>3"
   [(set (match_operand:GPR 0 "register_operand")
@@ -1133,6 +1153,17 @@
   [(set_attr "type" "frsqrt")
    (set_attr "mode" "<UNITMODE>")
    (set_attr "insn_count" "1")])
 +
 +;; Approximate Reciprocal Square Root Instructions.
 +
 +(define_insn "loongarch_frsqrte_<fmt>"
 +  [(set (match_operand:ANYF 0 "register_operand" "=f")
 +    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
 +		 UNSPEC_RSQRTE))]
 +  "TARGET_FRECIPE"
 +  "frsqrte.<fmt>\t%0,%1"
 +  [(set_attr "type" "frsqrte")
 +   (set_attr "mode" "<UNITMODE>")])
 ;;
 ;;  ....................
 diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
 index 7fe36feb9..e7bc8bed4 100644
 --- a/gcc/config/loongarch/loongarch.opt
 +++ b/gcc/config/loongarch/loongarch.opt
@@ -260,6 +260,10 @@ default value is 4.
 Variable
 HOST_WIDE_INT isa_evolution = 0
 +mfrecipe
 +Target Mask(ISA_FRECIPE) Var(isa_evolution)
 +Support frecipe.{s/d} and frsqrte.{s/d} instructions.
 +
 mdiv32
 Target Mask(ISA_DIV32) Var(isa_evolution)
 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended.
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index ce6ec6d69..37bdc6910 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -42,8 +42,10 @@
   UNSPEC_LSX_VFCVTL
   UNSPEC_LSX_VFLOGB
   UNSPEC_LSX_VFRECIP
 +  UNSPEC_LSX_VFRECIPE
   UNSPEC_LSX_VFRINT
   UNSPEC_LSX_VFRSQRT
 +  UNSPEC_LSX_VFRSQRTE
   UNSPEC_LSX_VFCMP_SAF
   UNSPEC_LSX_VFCMP_SEQ
   UNSPEC_LSX_VFCMP_SLE
@@ -1546,6 +1548,17 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 +;; Approximate Reciprocal Instructions.
 +
 +(define_insn "lsx_vfrecipe_<flsxfmt>"
 +  [(set (match_operand:FLSX 0 "register_operand" "=f")
 +    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
 +		 UNSPEC_LSX_VFRECIPE))]
 +  "ISA_HAS_LSX && TARGET_FRECIPE"
 +  "vfrecipe.<flsxfmt>\t%w0,%w1"
 +  [(set_attr "type" "simd_fdiv")
 +   (set_attr "mode" "<MODE>")])
 +
 (define_insn "lsx_vfrsqrt_<flsxfmt>"
   [(set (match_operand:FLSX 0 "register_operand" "=f")
 	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
@@ -1555,6 +1568,17 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 +;; Approximate Reciprocal Square Root Instructions.
 +
 +(define_insn "lsx_vfrsqrte_<flsxfmt>"
 +  [(set (match_operand:FLSX 0 "register_operand" "=f")
 +    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
 +		 UNSPEC_LSX_VFRSQRTE))]
 +  "ISA_HAS_LSX && TARGET_FRECIPE"
 +  "vfrsqrte.<flsxfmt>\t%w0,%w1"
 +  [(set_attr "type" "simd_fdiv")
 +   (set_attr "mode" "<MODE>")])
 +
 (define_insn "lsx_vftint_u_<ilsxfmt_u>_<flsxfmt>"
   [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 	(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")]
 diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
 index 29553c093..57a6fc40a 100644
 --- a/gcc/config/loongarch/lsxintrin.h
 +++ b/gcc/config/loongarch/lsxintrin.h
@@ -2480,6 +2480,40 @@ __m128d __lsx_vfrecip_d (__m128d _1)
   return (__m128d)__builtin_lsx_vfrecip_d ((v2f64)_1);
 }
 +#if defined(__loongarch_frecipe)
 +/* Assembly instruction format: vd, vj.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m128 __lsx_vfrecipe_s (__m128 _1)
 +{
 +  return (__m128)__builtin_lsx_vfrecipe_s ((v4f32)_1);
 +}
 +
 +/* Assembly instruction format: vd, vj.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m128d __lsx_vfrecipe_d (__m128d _1)
 +{
 +  return (__m128d)__builtin_lsx_vfrecipe_d ((v2f64)_1);
 +}
 +
 +/* Assembly instruction format: vd, vj.  */
 +/* Data types in instruction templates:  V4SF, V4SF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m128 __lsx_vfrsqrte_s (__m128 _1)
 +{
 +  return (__m128)__builtin_lsx_vfrsqrte_s ((v4f32)_1);
 +}
 +
 +/* Assembly instruction format: vd, vj.  */
 +/* Data types in instruction templates:  V2DF, V2DF.  */
 +extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 +__m128d __lsx_vfrsqrte_d (__m128d _1)
 +{
 +  return (__m128d)__builtin_lsx_vfrsqrte_d ((v2f64)_1);
 +}
 +#endif
 +
 /* Assembly instruction format:	vd, vj.  */
 /* Data types in instruction templates:  V4SF, V4SF.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index 7edd3974d..bb042ae78 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
@@ -16187,6 +16187,14 @@ The intrinsics provided are listed below:
     void __builtin_loongarch_break (imm0_32767)
 @end smallexample
 +These instrisic functions are available by using @option{-mfrecipe}.
 +@smallexample
 +    float __builtin_loongarch_frecipe_s (float);
 +    double  __builtin_loongarch_frecipe_d (double);
 +    float __builtin_loongarch_frsqrte_s (float);
 +    double  __builtin_loongarch_frsqrte_d (double);
 +@end smallexample
 +
 @emph{Note:}Since the control register is divided into 32-bit and 64-bit,
 but the access instruction is not distinguished. So GCC renames the control
 instructions when implementing intrinsics.
@@ -16259,6 +16267,15 @@ function you need to include @code{larchintrin.h}.
     void __break (imm0_32767)
 @end smallexample
 +These instrisic functions are available by including @code{larchintrin.h} and
 +using @option{-mfrecipe}.
 +@smallexample
 +    float __frecipe_s (float);
 +    double __frecipe_d (double);
 +    float __frsqrte_s (float);
 +    double __frsqrte_d (double);
 +@end smallexample
 +
 Returns the value that is currently set in the @samp{tp} register.
 @smallexample
     void * __builtin_thread_pointer (void)
@@ -17085,6 +17102,15 @@ __m128i __lsx_vxori_b (__m128i, imm0_255);
 __m128i __lsx_vxor_v (__m128i, __m128i);
 @end smallexample
 +These instrisic functions are available by including @code{lsxintrin.h} and
 +using @option{-mfrecipe} and @option{-mlsx}.
 +@smallexample
 +__m128d __lsx_vfrecipe_d (__m128d);
 +__m128 __lsx_vfrecipe_s (__m128);
 +__m128d __lsx_vfrsqrte_d (__m128d);
 +__m128 __lsx_vfrsqrte_s (__m128);
 +@end smallexample
 +
 @node LoongArch ASX Vector Intrinsics
 @subsection LoongArch ASX Vector Intrinsics
@@ -17924,6 +17950,15 @@ __m256i __lasx_xvxori_b (__m256i, imm0_255);
 __m256i __lasx_xvxor_v (__m256i, __m256i);
 @end smallexample
 +These instrisic functions are available by including @code{lasxintrin.h} and
 +using @option{-mfrecipe} and @option{-mlasx}.
 +@smallexample
 +__m256d __lasx_xvfrecipe_d (__m256d);
 +__m256 __lasx_xvfrecipe_s (__m256);
 +__m256d __lasx_xvfrsqrte_d (__m256d);
 +__m256 __lasx_xvfrsqrte_s (__m256);
 +@end smallexample
 +
 @node MIPS DSP Built-in Functions
 @subsection MIPS DSP Built-in Functions
 diff --git a/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
 new file mode 100644
 index 000000000..b9329f346
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/larch-frecipe-builtin.c
@@ -0,0 +1,28 @@
 +/* Test builtins for frecipe.{s/d} and frsqrte.{s/d} instructions */
 +/* { dg-do compile } */
 +/* { dg-options "-mfrecipe" } */
 +/* { dg-final { scan-assembler-times "test_frecipe_s:.*frecipe\\.s.*test_frecipe_s" 1 } } */
 +/* { dg-final { scan-assembler-times "test_frecipe_d:.*frecipe\\.d.*test_frecipe_d" 1 } } */
 +/* { dg-final { scan-assembler-times "test_frsqrte_s:.*frsqrte\\.s.*test_frsqrte_s" 1 } } */
 +/* { dg-final { scan-assembler-times "test_frsqrte_d:.*frsqrte\\.d.*test_frsqrte_d" 1 } } */
 +
 +float
 +test_frecipe_s (float _1)
 +{
 +  return __builtin_loongarch_frecipe_s (_1);
 +}
 +double
 +test_frecipe_d (double _1)
 +{
 +  return __builtin_loongarch_frecipe_d (_1);
 +}
 +float
 +test_frsqrte_s (float _1)
 +{
 +  return __builtin_loongarch_frsqrte_s (_1);
 +}
 +double
 +test_frsqrte_d (double _1)
 +{
 +  return __builtin_loongarch_frsqrte_d (_1);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
 new file mode 100644
 index 000000000..522535b45
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-frecipe-builtin.c
@@ -0,0 +1,30 @@
 +/* Test builtins for xvfrecipe.{s/d} and xvfrsqrte.{s/d} instructions */
 +/* { dg-do compile } */
 +/* { dg-options "-mlasx -mfrecipe" } */
 +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_s:.*xvfrecipe\\.s.*lasx_xvfrecipe_s" 1 } } */
 +/* { dg-final { scan-assembler-times "lasx_xvfrecipe_d:.*xvfrecipe\\.d.*lasx_xvfrecipe_d" 1 } } */
 +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_s:.*xvfrsqrte\\.s.*lasx_xvfrsqrte_s" 1 } } */
 +/* { dg-final { scan-assembler-times "lasx_xvfrsqrte_d:.*xvfrsqrte\\.d.*lasx_xvfrsqrte_d" 1 } } */
 +
 +#include <lasxintrin.h>
 +
 +v8f32
 +__lasx_xvfrecipe_s (v8f32 _1)
 +{
 +  return __builtin_lasx_xvfrecipe_s (_1);
 +}
 +v4f64
 +__lasx_xvfrecipe_d (v4f64 _1)
 +{
 +  return __builtin_lasx_xvfrecipe_d (_1);
 +}
 +v8f32
 +__lasx_xvfrsqrte_s (v8f32 _1)
 +{
 +  return __builtin_lasx_xvfrsqrte_s (_1);
 +}
 +v4f64
 +__lasx_xvfrsqrte_d (v4f64 _1)
 +{
 +  return __builtin_lasx_xvfrsqrte_d (_1);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
 new file mode 100644
 index 000000000..4ad0cb0ff
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-frecipe-builtin.c
@@ -0,0 +1,30 @@
 +/* Test builtins for vfrecipe.{s/d} and vfrsqrte.{s/d} instructions */
 +/* { dg-do compile } */
 +/* { dg-options "-mlsx -mfrecipe" } */
 +/* { dg-final { scan-assembler-times "lsx_vfrecipe_s:.*vfrecipe\\.s.*lsx_vfrecipe_s" 1 } } */
 +/* { dg-final { scan-assembler-times "lsx_vfrecipe_d:.*vfrecipe\\.d.*lsx_vfrecipe_d" 1 } } */
 +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_s:.*vfrsqrte\\.s.*lsx_vfrsqrte_s" 1 } } */
 +/* { dg-final { scan-assembler-times "lsx_vfrsqrte_d:.*vfrsqrte\\.d.*lsx_vfrsqrte_d" 1 } } */
 +
 +#include <lsxintrin.h>
 +
 +v4f32
 +__lsx_vfrecipe_s (v4f32 _1)
 +{
 +  return __builtin_lsx_vfrecipe_s (_1);
 +}
 +v2f64
 +__lsx_vfrecipe_d (v2f64 _1)
 +{
 +  return __builtin_lsx_vfrecipe_d (_1);
 +}
 +v4f32
 +__lsx_vfrsqrte_s (v4f32 _1)
 +{
 +  return __builtin_lsx_vfrsqrte_s (_1);
 +}
 +v2f64
 +__lsx_vfrsqrte_d (v2f64 _1)
 +{
 +  return __builtin_lsx_vfrsqrte_d (_1);
 +}
 -- 
 2.43.0
--- a/0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
+++ b/0060-LoongArch-Use-standard-pattern-name-for-xvfrsqrt-vfr.patch
@ -0,0 +1,257 @@
 From e8210e26ac638eb443f8991fee6d412b297cb279 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 6 Dec 2023 15:04:50 +0800
 Subject: [PATCH 060/188] LoongArch: Use standard pattern name for
 xvfrsqrt/vfrsqrt instructions.
 Rename lasx_xvfrsqrt*/lsx_vfrsqrt* to rsqrt<mode>2 to align with standard
 pattern name. Define function use_rsqrt_p to decide when to use rsqrt optab.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (lasx_xvfrsqrt_<flasxfmt>): Renamed to ..
 	(rsqrt<mode>2): .. this.
 	* config/loongarch/loongarch-builtins.cc
 	(CODE_FOR_lsx_vfrsqrt_d): Redefine to standard pattern name.
 	(CODE_FOR_lsx_vfrsqrt_s): Ditto.
 	(CODE_FOR_lasx_xvfrsqrt_d): Ditto.
 	(CODE_FOR_lasx_xvfrsqrt_s): Ditto.
 	* config/loongarch/loongarch.cc (use_rsqrt_p): New function.
 	(loongarch_optab_supported_p): Ditto.
 	(TARGET_OPTAB_SUPPORTED_P): New hook.
 	* config/loongarch/loongarch.md (*rsqrt<mode>a): Remove.
 	(*rsqrt<mode>2): New insn pattern.
 	(*rsqrt<mode>b): Remove.
 	* config/loongarch/lsx.md (lsx_vfrsqrt_<flsxfmt>): Renamed to ..
 	(rsqrt<mode>2): .. this.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-rsqrt.c: New test.
 	* gcc.target/loongarch/vector/lsx/lsx-rsqrt.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  |  6 ++---
 gcc/config/loongarch/loongarch-builtins.cc    |  4 +++
 gcc/config/loongarch/loongarch.cc             | 27 +++++++++++++++++++
 gcc/config/loongarch/loongarch.md             | 24 +++++------------
 gcc/config/loongarch/lsx.md                   |  6 ++---
 .../loongarch/vector/lasx/lasx-rsqrt.c        | 26 ++++++++++++++++++
 .../loongarch/vector/lsx/lsx-rsqrt.c          | 26 ++++++++++++++++++
 7 files changed, 96 insertions(+), 23 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index b1416f6c3..3a4a1fe51 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -1646,10 +1646,10 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lasx_xvfrsqrt_<flasxfmt>"
 +(define_insn "rsqrt<mode>2"
   [(set (match_operand:FLASX 0 "register_operand" "=f")
 -	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
 -		      UNSPEC_LASX_XVFRSQRT))]
 +    (unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
 +		  UNSPEC_LASX_XVFRSQRT))]
   "ISA_HAS_LASX"
   "xvfrsqrt.<flasxfmt>\t%u0,%u1"
   [(set_attr "type" "simd_fdiv")
 diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
 index bc156bd36..4aae27a5e 100644
 --- a/gcc/config/loongarch/loongarch-builtins.cc
 +++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -500,6 +500,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
 #define CODE_FOR_lsx_vssrlrn_bu_h CODE_FOR_lsx_vssrlrn_u_bu_h
 #define CODE_FOR_lsx_vssrlrn_hu_w CODE_FOR_lsx_vssrlrn_u_hu_w
 #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
 +#define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
 +#define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
 /* LoongArch ASX define CODE_FOR_lasx_mxxx */
 #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
@@ -776,6 +778,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
 #define CODE_FOR_lasx_xvsat_hu CODE_FOR_lasx_xvsat_u_hu
 #define CODE_FOR_lasx_xvsat_wu CODE_FOR_lasx_xvsat_u_wu
 #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
 +#define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
 +#define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
 static const struct loongarch_builtin_description loongarch_builtins[] = {
 #define LARCH_MOVFCSR2GR 0
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index ce1c0a8bd..95aa9453b 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -11487,6 +11487,30 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 						      is_packed);
 }
 +static bool
 +use_rsqrt_p (void)
 +{
 +  return (flag_finite_math_only
 +	  && !flag_trapping_math
 +	  && flag_unsafe_math_optimizations);
 +}
 +
 +/* Implement the TARGET_OPTAB_SUPPORTED_P hook.  */
 +
 +static bool
 +loongarch_optab_supported_p (int op, machine_mode, machine_mode,
 +			     optimization_type opt_type)
 +{
 +  switch (op)
 +    {
 +    case rsqrt_optab:
 +      return opt_type == OPTIMIZE_FOR_SPEED && use_rsqrt_p ();
 +
 +    default:
 +      return true;
 +    }
 +}
 +
 /* If -fverbose-asm, dump some info for debugging.  */
 static void
 loongarch_asm_code_end (void)
@@ -11625,6 +11649,9 @@ loongarch_asm_code_end (void)
 #undef TARGET_FUNCTION_ARG_BOUNDARY
 #define TARGET_FUNCTION_ARG_BOUNDARY loongarch_function_arg_boundary
 +#undef TARGET_OPTAB_SUPPORTED_P
 +#define TARGET_OPTAB_SUPPORTED_P loongarch_optab_supported_p
 +
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P loongarch_vector_mode_supported_p
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 9080cec1c..4dfe583e2 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -60,6 +60,7 @@
   UNSPEC_TIE
   ;; RSQRT
 +  UNSPEC_RSQRT
   UNSPEC_RSQRTE
   ;; RECIP
@@ -1134,25 +1135,14 @@
    (set_attr "mode" "<UNITMODE>")
    (set_attr "insn_count" "1")])
 -(define_insn "*rsqrt<mode>a"
 +(define_insn "*rsqrt<mode>2"
   [(set (match_operand:ANYF 0 "register_operand" "=f")
 -	(div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
 -		  (sqrt:ANYF (match_operand:ANYF 2 "register_operand" "f"))))]
 -  "flag_unsafe_math_optimizations"
 -  "frsqrt.<fmt>\t%0,%2"
 -  [(set_attr "type" "frsqrt")
 -   (set_attr "mode" "<UNITMODE>")
 -   (set_attr "insn_count" "1")])
 -
 -(define_insn "*rsqrt<mode>b"
 -  [(set (match_operand:ANYF 0 "register_operand" "=f")
 -	(sqrt:ANYF (div:ANYF (match_operand:ANYF 1 "const_1_operand" "")
 -			     (match_operand:ANYF 2 "register_operand" "f"))))]
 -  "flag_unsafe_math_optimizations"
 -  "frsqrt.<fmt>\t%0,%2"
 +    (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
 +	     UNSPEC_RSQRT))]
 +  "TARGET_HARD_FLOAT"
 +  "frsqrt.<fmt>\t%0,%1"
   [(set_attr "type" "frsqrt")
 -   (set_attr "mode" "<UNITMODE>")
 -   (set_attr "insn_count" "1")])
 +   (set_attr "mode" "<UNITMODE>")])
 ;; Approximate Reciprocal Square Root Instructions.
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 37bdc6910..cb4a448e7 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -1559,10 +1559,10 @@
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lsx_vfrsqrt_<flsxfmt>"
 +(define_insn "rsqrt<mode>2"
   [(set (match_operand:FLSX 0 "register_operand" "=f")
 -	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
 -		     UNSPEC_LSX_VFRSQRT))]
 +    (unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
 +		 UNSPEC_LSX_VFRSQRT))]
   "ISA_HAS_LSX"
   "vfrsqrt.<flsxfmt>\t%w0,%w1"
   [(set_attr "type" "simd_fdiv")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
 new file mode 100644
 index 000000000..24316944d
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-rsqrt.c
@@ -0,0 +1,26 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -ffast-math" } */
 +/* { dg-final { scan-assembler "xvfrsqrt.s" } } */
 +/* { dg-final { scan-assembler "xvfrsqrt.d" } } */
 +
 +extern float sqrtf (float);
 +
 +float a[8], b[8];
 +
 +void
 +foo1(void)
 +{
 +  for (int i = 0; i < 8; i++)
 +    a[i] = 1 / sqrtf (b[i]);
 +}
 +
 +extern double sqrt (double);
 +
 +double da[4], db[4];
 +
 +void
 +foo2(void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    da[i] = 1 / sqrt (db[i]);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
 new file mode 100644
 index 000000000..519cc4764
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-rsqrt.c
@@ -0,0 +1,26 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -ffast-math" } */
 +/* { dg-final { scan-assembler "vfrsqrt.s" } } */
 +/* { dg-final { scan-assembler "vfrsqrt.d" } } */
 +
 +extern float sqrtf (float);
 +
 +float a[4], b[4];
 +
 +void
 +foo1(void)
 +{
 +  for (int i = 0; i < 4; i++)
 +    a[i] = 1 / sqrtf (b[i]);
 +}
 +
 +extern double sqrt (double);
 +
 +double da[2], db[2];
 +
 +void
 +foo2(void)
 +{
 +  for (int i = 0; i < 2; i++)
 +    da[i] = 1 / sqrt (db[i]);
 +}
 -- 
 2.43.0
--- a/0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
+++ b/0061-LoongArch-Redefine-pattern-for-xvfrecip-vfrecip-inst.patch
@ -0,0 +1,135 @@
 From 74924710ee8d662d883bf898d69aef1946d91ea5 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 6 Dec 2023 15:04:51 +0800
 Subject: [PATCH 061/188] LoongArch: Redefine pattern for xvfrecip/vfrecip
 instructions.
 Redefine pattern for [x]vfrecip instructions use rtx code instead of unspec, and enable
 [x]vfrecip instructions to be generated during auto-vectorization.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (lasx_xvfrecip_<flasxfmt>): Renamed to ..
 	(recip<mode>3): .. this.
 	* config/loongarch/loongarch-builtins.cc (CODE_FOR_lsx_vfrecip_d): Redefine
 	to new pattern name.
 	(CODE_FOR_lsx_vfrecip_s): Ditto.
 	(CODE_FOR_lasx_xvfrecip_d): Ditto.
 	(CODE_FOR_lasx_xvfrecip_s): Ditto.
 	(loongarch_expand_builtin_direct): For the vector recip instructions, construct a
 	temporary parameter const1_vector.
 	* config/loongarch/lsx.md (lsx_vfrecip_<flsxfmt>): Renamed to ..
 	(recip<mode>3): .. this.
 	* config/loongarch/predicates.md (const_vector_1_operand): New predicate.
 ---
 gcc/config/loongarch/lasx.md               |  8 ++++----
 gcc/config/loongarch/loongarch-builtins.cc | 20 ++++++++++++++++++++
 gcc/config/loongarch/lsx.md                |  8 ++++----
 gcc/config/loongarch/predicates.md         |  4 ++++
 4 files changed, 32 insertions(+), 8 deletions(-)
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 3a4a1fe51..ad49a3ffb 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -1626,12 +1626,12 @@
   [(set_attr "type" "simd_fminmax")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lasx_xvfrecip_<flasxfmt>"
 +(define_insn "recip<mode>3"
   [(set (match_operand:FLASX 0 "register_operand" "=f")
 -	(unspec:FLASX [(match_operand:FLASX 1 "register_operand" "f")]
 -		      UNSPEC_LASX_XVFRECIP))]
 +       (div:FLASX (match_operand:FLASX 1 "const_vector_1_operand" "")
 +		  (match_operand:FLASX 2 "register_operand" "f")))]
   "ISA_HAS_LASX"
 -  "xvfrecip.<flasxfmt>\t%u0,%u1"
 +  "xvfrecip.<flasxfmt>\t%u0,%u2"
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
 index 4aae27a5e..85849ed29 100644
 --- a/gcc/config/loongarch/loongarch-builtins.cc
 +++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -502,6 +502,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
 #define CODE_FOR_lsx_vssrlrn_wu_d CODE_FOR_lsx_vssrlrn_u_wu_d
 #define CODE_FOR_lsx_vfrsqrt_d CODE_FOR_rsqrtv2df2
 #define CODE_FOR_lsx_vfrsqrt_s CODE_FOR_rsqrtv4sf2
 +#define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3
 +#define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3
 /* LoongArch ASX define CODE_FOR_lasx_mxxx */
 #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
@@ -780,6 +782,8 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && TARGET_FRECIPE)
 #define CODE_FOR_lasx_xvsat_du CODE_FOR_lasx_xvsat_u_du
 #define CODE_FOR_lasx_xvfrsqrt_d CODE_FOR_rsqrtv4df2
 #define CODE_FOR_lasx_xvfrsqrt_s CODE_FOR_rsqrtv8sf2
 +#define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3
 +#define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3
 static const struct loongarch_builtin_description loongarch_builtins[] = {
 #define LARCH_MOVFCSR2GR 0
@@ -3019,6 +3023,22 @@ loongarch_expand_builtin_direct (enum insn_code icode, rtx target, tree exp,
   if (has_target_p)
     create_output_operand (&ops[opno++], target, TYPE_MODE (TREE_TYPE (exp)));
 +  /* For the vector reciprocal instructions, we need to construct a temporary
 +     parameter const1_vector.  */
 +  switch (icode)
 +    {
 +    case CODE_FOR_recipv8sf3:
 +    case CODE_FOR_recipv4df3:
 +    case CODE_FOR_recipv4sf3:
 +    case CODE_FOR_recipv2df3:
 +      loongarch_prepare_builtin_arg (&ops[2], exp, 0);
 +      create_input_operand (&ops[1], CONST1_RTX (ops[0].mode), ops[0].mode);
 +      return loongarch_expand_builtin_insn (icode, 3, ops, has_target_p);
 +
 +    default:
 +      break;
 +    }
 +
   /* Map the arguments to the other operands.  */
   gcc_assert (opno + call_expr_nargs (exp)
 	      == insn_data[icode].n_generator_args);
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index cb4a448e7..f2774f021 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -1539,12 +1539,12 @@
   [(set_attr "type" "simd_fminmax")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lsx_vfrecip_<flsxfmt>"
 +(define_insn "recip<mode>3"
   [(set (match_operand:FLSX 0 "register_operand" "=f")
 -	(unspec:FLSX [(match_operand:FLSX 1 "register_operand" "f")]
 -		     UNSPEC_LSX_VFRECIP))]
 +       (div:FLSX (match_operand:FLSX 1 "const_vector_1_operand" "")
 +		 (match_operand:FLSX 2 "register_operand" "f")))]
   "ISA_HAS_LSX"
 -  "vfrecip.<flsxfmt>\t%w0,%w1"
 +  "vfrecip.<flsxfmt>\t%w0,%w2"
   [(set_attr "type" "simd_fdiv")
    (set_attr "mode" "<MODE>")])
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 30a0dee9f..572550dbc 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -227,6 +227,10 @@
   (and (match_code "const_int,const_wide_int,const_double,const_vector")
        (match_test "op == CONST1_RTX (GET_MODE (op))")))
 +(define_predicate "const_vector_1_operand"
 +  (and (match_code "const_vector")
 +       (match_test "op == CONST1_RTX (GET_MODE (op))")))
 +
 (define_predicate "reg_or_1_operand"
   (ior (match_operand 0 "const_1_operand")
        (match_operand 0 "register_operand")))
 -- 
 2.43.0
--- a/0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
+++ b/0062-LoongArch-New-options-mrecip-and-mrecip-with-ffast-m.patch
--- a/0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch
+++ b/0063-LoongArch-Vectorized-loop-unrolling-is-disable-for-d.patch
@ -0,0 +1,83 @@
 From bb211ae35474a9fa1a8189f0a4c525ce3d8c280e Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 6 Dec 2023 15:04:53 +0800
 Subject: [PATCH 063/188] LoongArch: Vectorized loop unrolling is disable for
 divf/sqrtf/rsqrtf when -mrecip is enabled.
 Using -mrecip generates a sequence of instructions to replace divf, sqrtf and rsqrtf. The number
 of generated instructions is close to or exceeds the maximum issue instructions per cycle of the
 LoongArch, so vectorized loop unrolling is not performed on them.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_vector_costs::determine_suggested_unroll_factor):
 	If m_has_recip is true, uf return 1.
 	(loongarch_vector_costs::add_stmt_cost): Detect the use of approximate instruction sequence.
 ---
 gcc/config/loongarch/loongarch.cc | 36 +++++++++++++++++++++++++++++--
 1 file changed, 34 insertions(+), 2 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 18326ce47..d64777179 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -3970,7 +3970,9 @@ protected:
   /* Reduction factor for suggesting unroll factor.  */
   unsigned m_reduc_factor = 0;
   /* True if the loop contains an average operation. */
 -  bool m_has_avg =false;
 +  bool m_has_avg = false;
 +  /* True if the loop uses approximation instruction sequence.  */
 +  bool m_has_recip = false;
 };
 /* Implement TARGET_VECTORIZE_CREATE_COSTS.  */
@@ -4017,7 +4019,7 @@ loongarch_vector_costs::determine_suggested_unroll_factor (loop_vec_info loop_vi
 {
   class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
 -  if (m_has_avg)
 +  if (m_has_avg || m_has_recip)
     return 1;
   /* Don't unroll if it's specified explicitly not to be unrolled.  */
@@ -4077,6 +4079,36 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	}
     }
 +  combined_fn cfn;
 +  if (kind == vector_stmt
 +      && stmt_info
 +      && stmt_info->stmt)
 +    {
 +      /* Detect the use of approximate instruction sequence.  */
 +      if ((TARGET_RECIP_VEC_SQRT || TARGET_RECIP_VEC_RSQRT)
 +	  && (cfn = gimple_call_combined_fn (stmt_info->stmt)) != CFN_LAST)
 +	switch (cfn)
 +	  {
 +	  case CFN_BUILT_IN_SQRTF:
 +	    m_has_recip = true;
 +	  default:
 +	    break;
 +	  }
 +      else if (TARGET_RECIP_VEC_DIV
 +	       && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN)
 +	{
 +	  machine_mode mode = TYPE_MODE (vectype);
 +	  switch (gimple_assign_rhs_code (stmt_info->stmt))
 +	    {
 +	    case RDIV_EXPR:
 +	      if (GET_MODE_INNER (mode) == SFmode)
 +		m_has_recip = true;
 +	    default:
 +	      break;
 +	    }
 +	}
 +    }
 +
   return retval;
 }
 -- 
 2.43.0
--- a/0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
+++ b/0064-LoongArch-Fix-lsx-vshuf.c-and-lasx-xvshuf_b.c-tests-.patch
@ -0,0 +1,130 @@
 From 6ca9670e02a7d3f939b1a75f7b5a9094cd1db909 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Fri, 25 Oct 2024 02:45:35 +0000
 Subject: [PATCH 064/188] LoongArch: Fix lsx-vshuf.c and lasx-xvshuf_b.c tests
 fail on  LA664 [PR112611]
 For [x]vshuf instructions, if the index value in the selector exceeds 63, it triggers
 undefined behavior on LA464, but not on LA664. To ensure compatibility of these two
 tests on both LA464 and LA664, we have modified both tests to ensure that the index
 value in the selector does not exceed 63.
 gcc/testsuite/ChangeLog:
        PR target/112611
        * gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c: Sure index less than 64.
        * gcc.target/loongarch/vector/lsx/lsx-vshuf.c: Ditto.
 ---
 .../loongarch/vector/lasx/lasx-xvshuf_b.c          | 14 +++++++-------
 .../gcc.target/loongarch/vector/lsx/lsx-vshuf.c    | 12 ++++++------
 2 files changed, 13 insertions(+), 13 deletions(-)
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
 index b8ab38711..910d29339 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvshuf_b.c
@@ -99,9 +99,9 @@ main ()
   *((unsigned long *)&__m256i_op1[2]) = 0x7ff0000000000000;
   *((unsigned long *)&__m256i_op1[1]) = 0x7ff0000000000000;
   *((unsigned long *)&__m256i_op1[0]) = 0x7ff0000000000000;
 -  *((unsigned long *)&__m256i_op2[3]) = 0x3ff0010000000000;
 +  *((unsigned long *)&__m256i_op2[3]) = 0x3f11010000000000;
   *((unsigned long *)&__m256i_op2[2]) = 0x0000000000000000;
 -  *((unsigned long *)&__m256i_op2[1]) = 0x3ff0010000000000;
 +  *((unsigned long *)&__m256i_op2[1]) = 0x3f11010000000000;
   *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
   *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
   *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
@@ -200,7 +200,7 @@ main ()
   *((unsigned long *)&__m256i_op2[0]) = 0x0000000000000000;
   *((unsigned long *)&__m256i_result[3]) = 0x0000000000000000;
   *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
 -  *((unsigned long *)&__m256i_result[1]) = 0x0000000000000000;
 +  *((unsigned long *)&__m256i_result[1]) = 0xffffffff00000000;
   *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
   __m256i_out = __lasx_xvshuf_h (__m256i_op0, __m256i_op1, __m256i_op2);
   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
@@ -351,7 +351,7 @@ main ()
   *((unsigned long *)&__m256i_op2[1]) = 0x0000000000000001;
   *((unsigned long *)&__m256i_op2[0]) = 0x00000000012e2110;
   *((unsigned long *)&__m256i_result[3]) = 0x0000000000000001;
 -  *((unsigned long *)&__m256i_result[2]) = 0x0000000200000000;
 +  *((unsigned long *)&__m256i_result[2]) = 0x0000000000000000;
   *((unsigned long *)&__m256i_result[1]) = 0x00000000012e2110;
   *((unsigned long *)&__m256i_result[0]) = 0x0000000000000000;
   __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
@@ -426,10 +426,10 @@ main ()
   *((unsigned long *)&__m256i_op2[2]) = 0x8000000080000000;
   *((unsigned long *)&__m256i_op2[1]) = 0xdfffffffdfffffff;
   *((unsigned long *)&__m256i_op2[0]) = 0x8000000080000000;
 -  *((unsigned long *)&__m256i_result[3]) = 0x8000000080000000;
 +  *((unsigned long *)&__m256i_result[3]) = 0xdfffffff80000000;
   *((unsigned long *)&__m256i_result[2]) = 0x7fc00000dfffffff;
 -  *((unsigned long *)&__m256i_result[1]) = 0x8000000080000000;
 -  *((unsigned long *)&__m256i_result[0]) = 0x8000000080000000;
 +  *((unsigned long *)&__m256i_result[1]) = 0x7fc0000000000000;
 +  *((unsigned long *)&__m256i_result[0]) = 0x8000000000000000;
   __m256i_out = __lasx_xvshuf_w (__m256i_op0, __m256i_op1, __m256i_op2);
   ASSERTEQ_64 (__LINE__, __m256i_result, __m256i_out);
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
 index f3b800f88..93a3078fa 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vshuf.c
@@ -33,7 +33,7 @@ main ()
   *((unsigned long *)&__m128i_op2[1]) = 0x0000000000000000;
   *((unsigned long *)&__m128i_op2[0]) = 0x3f2f1f0f00000000;
   *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
 -  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
 +  *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00000000;
   __m128i_out = __lsx_vshuf_b (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
@@ -153,7 +153,7 @@ main ()
   *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
   *((unsigned long *)&__m128i_op2[1]) = 0x00007fff00007fff;
   *((unsigned long *)&__m128i_op2[0]) = 0x0000000000000000;
 -  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
 +  *((unsigned long *)&__m128i_result[1]) = 0x00007fff00000000;
   *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
   __m128i_out = __lsx_vshuf_h (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
@@ -198,7 +198,7 @@ main ()
   *((unsigned long *)&__m128i_op2[1]) = 0x00000000000000c0;
   *((unsigned long *)&__m128i_op2[0]) = 0x00000001ffffff29;
   *((unsigned long *)&__m128i_result[1]) = 0xffffff29ffffff29;
 -  *((unsigned long *)&__m128i_result[0]) = 0x0000000100000001;
 +  *((unsigned long *)&__m128i_result[0]) = 0xffffff2900000001;
   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
@@ -219,7 +219,7 @@ main ()
   *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
   *((unsigned long *)&__m128i_op2[1]) = 0x0000000020000020;
   *((unsigned long *)&__m128i_op2[0]) = 0x0000000020000020;
 -  *((unsigned long *)&__m128i_result[1]) = 0x2000002000000000;
 +  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
   *((unsigned long *)&__m128i_result[0]) = 0x2000002020000020;
   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
@@ -241,7 +241,7 @@ main ()
   *((unsigned long *)&__m128i_op1[0]) = 0x0000001000000010;
   *((unsigned long *)&__m128i_op2[1]) = 0x8000000100000000;
   *((unsigned long *)&__m128i_op2[0]) = 0x8000000000000103;
 -  *((unsigned long *)&__m128i_result[1]) = 0x0000010300000103;
 +  *((unsigned long *)&__m128i_result[1]) = 0x8000000000000103;
   *((unsigned long *)&__m128i_result[0]) = 0x0000010380000001;
   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
@@ -252,7 +252,7 @@ main ()
   *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
   *((unsigned long *)&__m128i_op2[1]) = 0xffffffffffffffff;
   *((unsigned long *)&__m128i_op2[0]) = 0xffffffffffffffff;
 -  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
 +  *((unsigned long *)&__m128i_result[1]) = 0xffffffff00000000;
   *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
   __m128i_out = __lsx_vshuf_w (__m128i_op0, __m128i_op1, __m128i_op2);
   ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
 -- 
 2.43.0
--- a/0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
+++ b/0065-LoongArch-Fix-ICE-and-use-simplify_gen_subreg-instea.patch
@ -0,0 +1,318 @@
 From 87396b4550eeb097cdbe73fb19c84059ba6bb85e Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Wed, 29 Nov 2023 11:18:00 +0800
 Subject: [PATCH 065/188] LoongArch: Fix ICE and use simplify_gen_subreg
 instead of gen_rtx_SUBREG directly.
 loongarch_expand_vec_cond_mask_expr generates 'subreg's of 'subreg's, which are not supported
 in gcc, it causes an ICE:
 ice.c:55:1: error: unrecognizable insn:
   55 | }
      | ^
 (insn 63 62 64 8 (set (reg:V4DI 278)
        (subreg:V4DI (subreg:V4DF (reg:V4DI 273 [ vect__53.26 ]) 0) 0)) -1
     (nil))
 during RTL pass: vregs
 ice.c:55:1: internal compiler error: in extract_insn, at recog.cc:2804
 Last time, Ruoyao has fixed a similar ICE:
 https://gcc.gnu.org/pipermail/gcc-patches/2023-November/636156.html
 This patch fixes ICE and use simplify_gen_subreg instead of gen_rtx_SUBREG as much as possible
 to avoid the same ice happening again.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_try_expand_lsx_vshuf_const): Use
 	simplify_gen_subreg instead of gen_rtx_SUBREG.
 	(loongarch_expand_vec_perm_const_2): Ditto.
 	(loongarch_expand_vec_cond_expr): Ditto.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/pr112476-3.c: New test.
 	* gcc.target/loongarch/pr112476-4.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 79 +++++++++++--------
 .../gcc.target/loongarch/pr112476-3.c         | 58 ++++++++++++++
 .../gcc.target/loongarch/pr112476-4.c         |  4 +
 3 files changed, 108 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-3.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr112476-4.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index d64777179..4a3a7a246 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -8824,13 +8824,13 @@ loongarch_try_expand_lsx_vshuf_const (struct expand_vec_perm_d *d)
       if (d->vmode == E_V2DFmode)
 	{
 	  sel = gen_rtx_CONST_VECTOR (E_V2DImode, gen_rtvec_v (d->nelt, rperm));
 -	  tmp = gen_rtx_SUBREG (E_V2DImode, d->target, 0);
 +	  tmp = simplify_gen_subreg (E_V2DImode, d->target, d->vmode, 0);
 	  emit_move_insn (tmp, sel);
 	}
       else if (d->vmode == E_V4SFmode)
 	{
 	  sel = gen_rtx_CONST_VECTOR (E_V4SImode, gen_rtvec_v (d->nelt, rperm));
 -	  tmp = gen_rtx_SUBREG (E_V4SImode, d->target, 0);
 +	  tmp = simplify_gen_subreg (E_V4SImode, d->target, d->vmode, 0);
 	  emit_move_insn (tmp, sel);
 	}
       else
@@ -9614,8 +9614,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  /* Adjust op1 for selecting correct value in high 128bit of target
 	     register.
 	     op1: E_V4DImode, { 4, 5, 6, 7 } -> { 2, 3, 4, 5 }.  */
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
 					      conv_op0, GEN_INT (0x21)));
@@ -9644,8 +9644,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op0_alt, d->op0);
 	  /* Generate subreg for fitting into insn gen function.  */
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
 	  /* Adjust op value in temp register.
 	     op0 = {0,1,2,3}, op1 = {4,5,0,1}  */
@@ -9691,9 +9691,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op1_alt, d->op1);
 	  emit_move_insn (op0_alt, d->op0);
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 -	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
 +	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
 +						 d->vmode, 0);
 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
 					      conv_op0, GEN_INT (0x02)));
@@ -9725,9 +9726,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	 Selector sample: E_V4DImode, { 0, 1, 4 ,5 }  */
       if (!d->testing_p)
 	{
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
 -	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
 +	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
 +						 d->vmode, 0);
 	  /* We can achieve the expectation by using sinple xvpermi.q insn.  */
 	  emit_move_insn (conv_target, conv_op1);
@@ -9752,8 +9754,8 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op1_alt, d->op1);
 	  emit_move_insn (op0_alt, d->op0);
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
 	  /* Adjust op value in temp regiter.
 	     op0 = { 0, 1, 2, 3 }, op1 = { 6, 7, 2, 3 }  */
 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
@@ -9797,9 +9799,10 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op1_alt, d->op1);
 	  emit_move_insn (op0_alt, d->op0);
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 -	  rtx conv_target = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, op1_alt, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, op0_alt, d->vmode, 0);
 +	  rtx conv_target = simplify_gen_subreg (E_V4DImode, d->target,
 +						 d->vmode, 0);
 	  emit_insn (gen_lasx_xvpermi_q_v4di (conv_op1, conv_op1,
 					      conv_op0, GEN_INT (0x13)));
@@ -9831,10 +9834,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	 Selector sample:E_V8SImode, { 2, 2, 2, 2, 2, 2, 2, 2 }  */
       if (!d->testing_p)
 	{
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
 	  rtx temp_reg = gen_reg_rtx (d->vmode);
 -	  rtx conv_temp = gen_rtx_SUBREG (E_V4DImode, temp_reg, 0);
 +	  rtx conv_temp = simplify_gen_subreg (E_V4DImode, temp_reg,
 +					       d->vmode, 0);
 	  emit_move_insn (temp_reg, d->op0);
@@ -9943,9 +9947,11 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op0_alt, d->op0);
 	  emit_move_insn (op1_alt, d->op1);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
 -	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 -	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
 +	  rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
 +					       d->vmode, 0);
 +	  rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
 +					       d->vmode, 0);
 	  /* Duplicate op0's low 128bit in op0, then duplicate high 128bit
 	     in op1.  After this, xvshuf.* insn's selector argument can
@@ -9978,10 +9984,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
 	  emit_move_insn (op0_alt, d->op0);
 	  emit_move_insn (op1_alt, d->op1);
 -	  rtx conv_op0a = gen_rtx_SUBREG (E_V4DImode, op0_alt, 0);
 -	  rtx conv_op1a = gen_rtx_SUBREG (E_V4DImode, op1_alt, 0);
 -	  rtx conv_op0 = gen_rtx_SUBREG (E_V4DImode, d->op0, 0);
 -	  rtx conv_op1 = gen_rtx_SUBREG (E_V4DImode, d->op1, 0);
 +	  rtx conv_op0a = simplify_gen_subreg (E_V4DImode, op0_alt,
 +					       d->vmode, 0);
 +	  rtx conv_op1a = simplify_gen_subreg (E_V4DImode, op1_alt,
 +					       d->vmode, 0);
 +	  rtx conv_op0 = simplify_gen_subreg (E_V4DImode, d->op0, d->vmode, 0);
 +	  rtx conv_op1 = simplify_gen_subreg (E_V4DImode, d->op1, d->vmode, 0);
 	  /* Reorganize op0's hi/lo 128bit and op1's hi/lo 128bit, to make sure
 	     that selector's low 128bit can access all op0's elements, and
@@ -10101,12 +10109,12 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
     {
     case E_V4DFmode:
       sel = gen_rtx_CONST_VECTOR (E_V4DImode, gen_rtvec_v (d->nelt, rperm));
 -      tmp = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
 +      tmp = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
       emit_move_insn (tmp, sel);
       break;
     case E_V8SFmode:
       sel = gen_rtx_CONST_VECTOR (E_V8SImode, gen_rtvec_v (d->nelt, rperm));
 -      tmp = gen_rtx_SUBREG (E_V8SImode, d->target, 0);
 +      tmp = simplify_gen_subreg (E_V8SImode, d->target, d->vmode, 0);
       emit_move_insn (tmp, sel);
       break;
     default:
@@ -10192,7 +10200,7 @@ loongarch_expand_vec_perm_const_2 (struct expand_vec_perm_d *d)
      64bit in target vector register.  */
   else if (extract_ev_od)
     {
 -      rtx converted = gen_rtx_SUBREG (E_V4DImode, d->target, 0);
 +      rtx converted = simplify_gen_subreg (E_V4DImode, d->target, d->vmode, 0);
       emit_insn (gen_lasx_xvpermi_d_v4di (converted, converted,
 					  GEN_INT (0xD8)));
     }
@@ -11279,7 +11287,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
 	  if (mode != vimode)
 	    {
 	      xop1 = gen_reg_rtx (vimode);
 -	      emit_move_insn (xop1, gen_rtx_SUBREG (vimode, operands[1], 0));
 +	      emit_move_insn (xop1,
 +			      simplify_gen_subreg (vimode, operands[1],
 +						   mode, 0));
 	    }
 	  emit_move_insn (src1, xop1);
 	}
@@ -11296,7 +11306,9 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
 	  if (mode != vimode)
 	    {
 	      xop2 = gen_reg_rtx (vimode);
 -	      emit_move_insn (xop2, gen_rtx_SUBREG (vimode, operands[2], 0));
 +	      emit_move_insn (xop2,
 +			      simplify_gen_subreg (vimode, operands[2],
 +						   mode, 0));
 	    }
 	  emit_move_insn (src2, xop2);
 	}
@@ -11315,7 +11327,8 @@ loongarch_expand_vec_cond_expr (machine_mode mode, machine_mode vimode,
 			  gen_rtx_AND (vimode, mask, src1));
       /* The result is placed back to a register with the mask.  */
       emit_insn (gen_rtx_SET (mask, bsel));
 -      emit_move_insn (operands[0], gen_rtx_SUBREG (mode, mask, 0));
 +      emit_move_insn (operands[0],
 +		      simplify_gen_subreg (mode, mask, vimode, 0));
     }
 }
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-3.c b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c
 new file mode 100644
 index 000000000..d696d4182
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-3.c
@@ -0,0 +1,58 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlsx" } */
 +
 +#include <stdint.h>
 +
 +typedef int8_t orc_int8;
 +typedef int16_t orc_int16;
 +typedef int32_t orc_int32;
 +typedef int64_t orc_int64;
 +
 +typedef union
 +{
 +  orc_int32 i;
 +  float f;
 +  orc_int16 x2[2];
 +  orc_int8 x4[4];
 +} orc_union32;
 +typedef union
 +{
 +  orc_int64 i;
 +  double f;
 +  orc_int32 x2[2];
 +  float x2f[2];
 +  orc_int16 x4[4];
 +} orc_union64;
 +
 +void
 +audio_orc_s32_to_double (double * restrict d1,
 +    const signed int * restrict s1, int n)
 +{
 +  int i;
 +  orc_union64 *restrict ptr0;
 +  const orc_union32 *restrict ptr4;
 +  orc_union32 var33;
 +  orc_union64 var34;
 +  orc_union64 var35;
 +  orc_union64 var36;
 +
 +  ptr0 = (orc_union64 *) d1;
 +  ptr4 = (orc_union32 *) s1;
 +
 +  var34.i = 0x41e0000000000000UL;
 +
 +  for (i = 0; i < n; i++) {
 +    var33 = ptr4[i];
 +    var36.f = var33.i;
 +    {
 +      orc_union64 _src1;
 +      orc_union64 _src2;
 +      orc_union64 _dest1;
 +      _src1.i = ((var36.i) & ((((var36.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
 +      _src2.i = ((var34.i) & ((((var34.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
 +      _dest1.f = _src1.f / _src2.f;
 +      var35.i = ((_dest1.i) & ((((_dest1.i)&0x7ff0000000000000UL) == 0) ? 0xfff0000000000000UL : 0xffffffffffffffffUL));
 +    }
 +    ptr0[i] = var35;
 +  }
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr112476-4.c b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c
 new file mode 100644
 index 000000000..955d98552
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr112476-4.c
@@ -0,0 +1,4 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O3 -mlasx" } */
 + 
 +#include "pr112476-3.c"
 -- 
 2.43.0
--- a/0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
+++ b/0066-LoongArch-Fix-eh_return-epilogue-for-normal-returns.patch
@ -0,0 +1,236 @@
 From 34088d0a8685defa97754b7ab5d90b9bc536cfaa Mon Sep 17 00:00:00 2001
 From: Yang Yujie <yangyujie@loongson.cn>
 Date: Fri, 8 Dec 2023 18:01:18 +0800
 Subject: [PATCH 066/188] LoongArch: Fix eh_return epilogue for normal returns.
 On LoongArch, the regitsters $r4 - $r7 (EH_RETURN_DATA_REGNO) will be saved
 and restored in the function prologue and epilogue if the given function calls
 __builtin_eh_return.  This causes the return value to be overwritten on normal
 return paths and breaks a rare case of libgcc's _Unwind_RaiseException.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc: Do not restore the saved eh_return
 	data registers ($r4-$r7) for a normal return of a function that calls
 	__builtin_eh_return elsewhere.
 	* config/loongarch/loongarch-protos.h: Same.
 	* config/loongarch/loongarch.md: Same.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/eh_return-normal-return.c: New test.
 ---
 gcc/config/loongarch/loongarch-protos.h       |  2 +-
 gcc/config/loongarch/loongarch.cc             | 34 ++++++++++++-----
 gcc/config/loongarch/loongarch.md             | 23 ++++++++++-
 .../loongarch/eh_return-normal-return.c       | 38 +++++++++++++++++++
 4 files changed, 84 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
 diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
 index 117669e9f..e5fcf3111 100644
 --- a/gcc/config/loongarch/loongarch-protos.h
 +++ b/gcc/config/loongarch/loongarch-protos.h
@@ -60,7 +60,7 @@ enum loongarch_symbol_type {
 extern rtx loongarch_emit_move (rtx, rtx);
 extern HOST_WIDE_INT loongarch_initial_elimination_offset (int, int);
 extern void loongarch_expand_prologue (void);
 -extern void loongarch_expand_epilogue (bool);
 +extern void loongarch_expand_epilogue (int);
 extern bool loongarch_can_use_return_insn (void);
 extern bool loongarch_symbolic_constant_p (rtx, enum loongarch_symbol_type *);
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 4a3a7a246..7caf04d8d 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -1012,7 +1012,8 @@ loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset,
 static void
 loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
 -			      loongarch_save_restore_fn fn)
 +			      loongarch_save_restore_fn fn,
 +			      bool skip_eh_data_regs_p)
 {
   HOST_WIDE_INT offset;
@@ -1021,7 +1022,14 @@ loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset,
   for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++)
     if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST))
       {
 -	if (!cfun->machine->reg_is_wrapped_separately[regno])
 +	/* Special care needs to be taken for $r4-$r7 (EH_RETURN_DATA_REGNO)
 +	   when returning normally from a function that calls
 +	   __builtin_eh_return.  In this case, these registers are saved but
 +	   should not be restored, or the return value may be clobbered.  */
 +
 +	if (!(cfun->machine->reg_is_wrapped_separately[regno]
 +	      || (skip_eh_data_regs_p
 +	      && GP_ARG_FIRST <= regno && regno < GP_ARG_FIRST + 4)))
 	  loongarch_save_restore_reg (word_mode, regno, offset, fn);
 	offset -= UNITS_PER_WORD;
@@ -1294,7 +1302,7 @@ loongarch_expand_prologue (void)
 			    GEN_INT (-step1));
       RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
       size -= step1;
 -      loongarch_for_each_saved_reg (size, loongarch_save_reg);
 +      loongarch_for_each_saved_reg (size, loongarch_save_reg, false);
     }
   /* Set up the frame pointer, if we're using one.  */
@@ -1379,11 +1387,13 @@ loongarch_can_use_return_insn (void)
   return reload_completed && cfun->machine->frame.total_size == 0;
 }
 -/* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P
 -   says which.  */
 +/* Expand function epilogue using the following insn patterns:
 +   "epilogue"	      (style == NORMAL_RETURN)
 +   "sibcall_epilogue" (style == SIBCALL_RETURN)
 +   "eh_return"	      (style == EXCEPTION_RETURN) */
 void
 -loongarch_expand_epilogue (bool sibcall_p)
 +loongarch_expand_epilogue (int style)
 {
   /* Split the frame into two.  STEP1 is the amount of stack we should
      deallocate before restoring the registers.  STEP2 is the amount we
@@ -1400,7 +1410,8 @@ loongarch_expand_epilogue (bool sibcall_p)
   bool need_barrier_p
     = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0;
 -  if (!sibcall_p && loongarch_can_use_return_insn ())
 +  /* Handle simple returns.  */
 +  if (style == NORMAL_RETURN && loongarch_can_use_return_insn ())
     {
       emit_jump_insn (gen_return ());
       return;
@@ -1476,7 +1487,9 @@ loongarch_expand_epilogue (bool sibcall_p)
   /* Restore the registers.  */
   loongarch_for_each_saved_reg (frame->total_size - step2,
 -				loongarch_restore_reg);
 +				loongarch_restore_reg,
 +				crtl->calls_eh_return
 +				&& style != EXCEPTION_RETURN);
   if (need_barrier_p)
     loongarch_emit_stack_tie ();
@@ -1497,11 +1510,12 @@ loongarch_expand_epilogue (bool sibcall_p)
     }
   /* Add in the __builtin_eh_return stack adjustment.  */
 -  if (crtl->calls_eh_return)
 +  if (crtl->calls_eh_return && style == EXCEPTION_RETURN)
     emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
 			      EH_RETURN_STACKADJ_RTX));
 -  if (!sibcall_p)
 +  /* Emit return unless doing sibcall.  */
 +  if (style != SIBCALL_RETURN)
     emit_jump_insn (gen_simple_return_internal (ra));
 }
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index c6edd1dda..222f1ae83 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -125,6 +125,11 @@
    (T1_REGNUM			13)
    (S0_REGNUM			23)
 +   ;; Return path styles
 +   (NORMAL_RETURN		0)
 +   (SIBCALL_RETURN		1)
 +   (EXCEPTION_RETURN		2)
 +
    ;; PIC long branch sequences are never longer than 100 bytes.
    (MAX_PIC_BRANCH_LENGTH	100)
 ])
@@ -3276,7 +3281,7 @@
   [(const_int 2)]
   ""
 {
 -  loongarch_expand_epilogue (false);
 +  loongarch_expand_epilogue (NORMAL_RETURN);
   DONE;
 })
@@ -3284,7 +3289,7 @@
   [(const_int 2)]
   ""
 {
 -  loongarch_expand_epilogue (true);
 +  loongarch_expand_epilogue (SIBCALL_RETURN);
   DONE;
 })
@@ -3341,6 +3346,20 @@
     emit_insn (gen_eh_set_ra_di (operands[0]));
   else
     emit_insn (gen_eh_set_ra_si (operands[0]));
 +
 +  emit_jump_insn (gen_eh_return_internal ());
 +  emit_barrier ();
 +  DONE;
 +})
 +
 +(define_insn_and_split "eh_return_internal"
 +  [(eh_return)]
 +  ""
 +  "#"
 +  "epilogue_completed"
 +  [(const_int 0)]
 +{
 +  loongarch_expand_epilogue (EXCEPTION_RETURN);
   DONE;
 })
 diff --git a/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
 new file mode 100644
 index 000000000..f8f3965f8
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/eh_return-normal-return.c
@@ -0,0 +1,38 @@
 +/* { dg-do run } */
 +/* { dg-options "-O2" } */
 +
 +#include <stdlib.h>
 +
 +int foo ()  __attribute__((noinline));
 +int main ();
 +
 +int
 +foo () {
 +
 +  int t;
 +
 +  /* prevent optimization using asm */
 +  asm ("" : "=r" (t) : "0" (-1));
 +  asm ("" : "=r" (t) : "0" (t ? 1 : 0));
 +
 +  if (t == 0)
 +    /* never reached */
 +    __builtin_eh_return (0, __builtin_return_address (0));
 +
 +  else if (t == 1)
 +    /* return here */
 +    return 202312;
 +
 +  else
 +    /* never reached: prevent vrp optimization in main */
 +    return 0;
 +}
 +
 +int
 +main ()
 +{
 +  if (foo() == 202312)
 +    return 0; 
 +  else
 +    abort ();
 +}
 -- 
 2.43.0
--- a/0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
+++ b/0067-LoongArch-Allow-mcmodel-extreme-and-model-attribute-.patch
@ -0,0 +1,180 @@
 From fdb51014f00094737459d5c9008630454ec7f342 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Thu, 7 Dec 2023 15:45:30 +0800
 Subject: [PATCH 067/188] LoongArch: Allow -mcmodel=extreme and model attribute
 with -mexplicit-relocs=auto
 There seems no real reason to require -mexplicit-relocs=always for
 -mcmodel=extreme or model attribute.  As the linker does not know how to
 relax a 3-operand la.local or la.global pseudo instruction, just emit
 explicit relocs for SYMBOL_PCREL64, and under TARGET_CMODEL_EXTREME also
 SYMBOL_GOT_DISP.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc (loongarch_explicit_relocs_p):
 	Return true for SYMBOL_PCREL64.  Return true for SYMBOL_GOT_DISP
 	if TARGET_CMODEL_EXTREME.
 	(loongarch_split_symbol): Check for la_opt_explicit_relocs !=
 	EXPLICIT_RELOCS_NONE instead of TARGET_EXPLICIT_RELOCS.
 	(loongarch_print_operand_reloc): Likewise.
 	(loongarch_option_override_internal): Likewise.
 	(loongarch_handle_model_attribute): Likewise.
 	* doc/invoke.texi (-mcmodel=extreme): Update the compatibility
 	between it and -mexplicit-relocs=.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/attr-model-3.c: New test.
 	* gcc.target/loongarch/attr-model-4.c: New test.
 	* gcc.target/loongarch/func-call-extreme-3.c: New test.
 	* gcc.target/loongarch/func-call-extreme-4.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 25 ++++++++++++-------
 gcc/doc/invoke.texi                           |  4 +--
 .../gcc.target/loongarch/attr-model-3.c       |  6 +++++
 .../gcc.target/loongarch/attr-model-4.c       |  6 +++++
 .../loongarch/func-call-extreme-3.c           |  7 ++++++
 .../loongarch/func-call-extreme-4.c           |  7 ++++++
 6 files changed, 44 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-3.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/attr-model-4.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 7caf04d8d..4362149ef 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -1969,9 +1969,16 @@ loongarch_explicit_relocs_p (enum loongarch_symbol_type type)
       case SYMBOL_TLS_LE:
       case SYMBOL_TLSGD:
       case SYMBOL_TLSLDM:
 -	/* The linker don't know how to relax TLS accesses.  */
 +      case SYMBOL_PCREL64:
 +	/* The linker don't know how to relax TLS accesses or 64-bit
 +	   pc-relative accesses.  */
 	return true;
       case SYMBOL_GOT_DISP:
 +	/* The linker don't know how to relax GOT accesses in extreme
 +	   code model.  */
 +	if (TARGET_CMODEL_EXTREME)
 +	  return true;
 +
 	/* If we are performing LTO for a final link, and we have the
 	   linker plugin so we know the resolution of the symbols, then
 	   all GOT references are binding to external symbols or
@@ -3134,7 +3141,7 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
   if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ())
     {
 -      gcc_assert (TARGET_EXPLICIT_RELOCS);
 +      gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
       temp1 = gen_reg_rtx (Pmode);
       emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
@@ -5933,7 +5940,7 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
     loongarch_classify_symbolic_expression (op);
   if (loongarch_symbol_extreme_p (symbol_type))
 -    gcc_assert (TARGET_EXPLICIT_RELOCS);
 +    gcc_assert (la_opt_explicit_relocs != EXPLICIT_RELOCS_NONE);
   switch (symbol_type)
     {
@@ -7540,9 +7547,9 @@ loongarch_option_override_internal (struct gcc_options *opts,
   switch (la_target.cmodel)
     {
       case CMODEL_EXTREME:
 -	if (!TARGET_EXPLICIT_RELOCS)
 -	  error ("code model %qs needs %s",
 -		 "extreme", "-mexplicit-relocs=always");
 +	if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
 +	  error ("code model %qs is not compatible with %s",
 +		 "extreme", "-mexplicit-relocs=none");
 	if (opts->x_flag_plt)
 	  {
@@ -7908,11 +7915,11 @@ loongarch_handle_model_attribute (tree *node, tree name, tree arg, int,
 	  *no_add_attrs = true;
 	  return NULL_TREE;
 	}
 -      if (!TARGET_EXPLICIT_RELOCS)
 +      if (la_opt_explicit_relocs == EXPLICIT_RELOCS_NONE)
 	{
 	  error_at (DECL_SOURCE_LOCATION (decl),
 -		    "%qE attribute requires %s", name,
 -		    "-mexplicit-relocs=always");
 +		    "%qE attribute is not compatible with %s", name,
 +		    "-mexplicit-relocs=none");
 	  *no_add_attrs = true;
 	  return NULL_TREE;
 	}
 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
 index 76a8f20d1..5c6515cb1 100644
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
@@ -24602,8 +24602,8 @@ The text segment and data segment must be within 2GB addressing space.
 @item extreme
 This mode does not limit the size of the code segment and data segment.
 -The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
 -@option{-mno-explicit-relocs}.
 +The @option{-mcmodel=extreme} option is incompatible with @option{-fplt}
 +and/or @option{-mexplicit-relocs=none}.
 @end table
 The default code model is @code{normal}.
 diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-3.c b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
 new file mode 100644
 index 000000000..5622d5086
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/attr-model-3.c
@@ -0,0 +1,6 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mexplicit-relocs=auto -mcmodel=normal -O2" } */
 +/* { dg-final { scan-assembler-times "%pc64_hi12" 2 } } */
 +
 +#define ATTR_MODEL_TEST
 +#include "attr-model-test.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/attr-model-4.c b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
 new file mode 100644
 index 000000000..482724bb9
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/attr-model-4.c
@@ -0,0 +1,6 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mexplicit-relocs=auto -mcmodel=extreme -O2" } */
 +/* { dg-final { scan-assembler-times "%pc64_hi12" 3 } } */
 +
 +#define ATTR_MODEL_TEST
 +#include "attr-model-test.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
 new file mode 100644
 index 000000000..a4da44b4a
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-3.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
 +/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
 +/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
 +/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
 +
 +#include "func-call-extreme-1.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
 new file mode 100644
 index 000000000..16b00f4c5
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-4.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs=auto -mcmodel=extreme" } */
 +/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
 +/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
 +/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
 +
 +#include "func-call-extreme-1.c"
 -- 
 2.43.0
--- a/0068-LoongArch-Fix-warnings-building-libgcc.patch
+++ b/0068-LoongArch-Fix-warnings-building-libgcc.patch
@ -0,0 +1,79 @@
 From 5a910f294605d0163f8f4ac255a14425b154b5dd Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 9 Dec 2023 22:08:37 +0800
 Subject: [PATCH 068/188] LoongArch: Fix warnings building libgcc
 We are excluding loongarch-opts.h from target libraries, but now struct
 loongarch_target and gcc_options are not declared in the target
 libraries, causing:
 In file included from ../.././gcc/options.h:8,
                 from ../.././gcc/tm.h:49,
                 from ../../../gcc/libgcc/fixed-bit.c:48:
 ../../../gcc/libgcc/../gcc/config/loongarch/loongarch-opts.h:57:41:
 warning: 'struct gcc_options' declared inside parameter list will not
 be visible outside of this definition or declaration
   57 |                                  struct gcc_options *opts,
      |                                         ^~~~~~~~~~~
 So exclude the declarations referring to the C++ structs as well.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-opts.h (la_target): Move into #if
 	for loongarch-def.h.
 	(loongarch_init_target): Likewise.
 	(loongarch_config_target): Likewise.
 	(loongarch_update_gcc_opt_status): Likewise.
 ---
 gcc/config/loongarch/loongarch-opts.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 7010ddfec..639ed50bd 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -21,22 +21,15 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef LOONGARCH_OPTS_H
 #define LOONGARCH_OPTS_H
 -/* This is a C++ header and it shouldn't be used by target libraries.  */
 +/* The loongarch-def.h file is a C++ header and it shouldn't be used by
 +   target libraries.  Exclude it and everything using the C++ structs
 +   (struct loongarch_target and gcc_options) from target libraries.  */
 #if !defined(IN_LIBGCC2) && !defined(IN_TARGET_LIBS) && !defined(IN_RTS)
 #include "loongarch-def.h"
 -#endif
 /* Target configuration */
 extern struct loongarch_target la_target;
 -/* Flag status */
 -struct loongarch_flags {
 -    int flt; const char* flt_str;
 -#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
 -    int sx[2];
 -};
 -
 -
 /* Initialize loongarch_target from separate option variables.  */
 void
 loongarch_init_target (struct loongarch_target *target,
@@ -56,7 +49,14 @@ void
 loongarch_update_gcc_opt_status (struct loongarch_target *target,
 				 struct gcc_options *opts,
 				 struct gcc_options *opts_set);
 +#endif
 +/* Flag status */
 +struct loongarch_flags {
 +    int flt; const char* flt_str;
 +#define SX_FLAG_TYPE(x) ((x) < 0 ? -(x) : (x))
 +    int sx[2];
 +};
 /* Macros for common conditional expressions used in loongarch.{c,h,md} */
 #define TARGET_CMODEL_NORMAL	    (la_target.cmodel == CMODEL_NORMAL)
 -- 
 2.43.0
--- a/0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch
+++ b/0069-LoongArch-testsuite-Remove-XFAIL-in-vect-ftint-no-in.patch
@ -0,0 +1,30 @@
 From 639e7518c8a4468cd50d774c5a3dbda5f2dbb4a7 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Wed, 13 Dec 2023 02:39:35 +0800
 Subject: [PATCH 069/188] LoongArch: testsuite: Remove XFAIL in
 vect-ftint-no-inexact.c
 After r14-6455 this no longer fails.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-ftint-no-inexact.c (xfail): Remove.
 ---
 gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
 index 83d268099..61918beef 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-ftint-no-inexact.c
@@ -39,6 +39,5 @@
 /* { dg-final { scan-assembler-not "\txvftintrne\.w\.s" } } */
 /* { dg-final { scan-assembler-not "\txvftintrne\.l\.d" } } */
 -/* trunc: XFAIL due to PR 107723 */
 -/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" { xfail *-*-* } } } */
 +/* { dg-final { scan-assembler "bl\t%plt\\(trunc\\)" } } */
 /* { dg-final { scan-assembler "bl\t%plt\\(truncf\\)" } } */
 -- 
 2.43.0
--- a/0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch
+++ b/0070-LoongArch-Include-rtl.h-for-COSTS_N_INSNS-instead-of.patch
@ -0,0 +1,44 @@
 From 6a5e3932a39f1ffa6f87479748ee711e4fa47d30 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 9 Dec 2023 15:27:28 +0800
 Subject: [PATCH 070/188] LoongArch: Include rtl.h for COSTS_N_INSNS instead of
 hard coding our own
 With loongarch-def.cc switched from C to C++, we can include rtl.h for
 COSTS_N_INSNS, instead of hard coding our own.
 THis is a non-functional change for now, but it will make the code more
 future-proof in case COSTS_N_INSNS in rtl.h would be changed.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-def.cc (rtl.h): Include.
 	(COSTS_N_INSNS): Remove the macro definition.
 ---
 gcc/config/loongarch/loongarch-def.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 index c41804a18..6217b1926 100644
 --- a/gcc/config/loongarch/loongarch-def.cc
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -22,6 +22,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
 +#include "rtl.h"
 #include "loongarch-def.h"
 #include "loongarch-str.h"
@@ -89,8 +90,6 @@ array_tune<loongarch_align> loongarch_cpu_align =
     .set (CPU_LA464, la464_align ())
     .set (CPU_LA664, la464_align ());
 -#define COSTS_N_INSNS(N) ((N) * 4)
 -
 /* Default RTX cost initializer.  */
 loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
   : fp_add (COSTS_N_INSNS (1)),
 -- 
 2.43.0
--- a/0071-LoongArch-Fix-instruction-costs-PR112936.patch
+++ b/0071-LoongArch-Fix-instruction-costs-PR112936.patch
@ -0,0 +1,165 @@
 From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 9 Dec 2023 17:41:32 +0800
 Subject: [PATCH 071/188] LoongArch: Fix instruction costs [PR112936]
 Replace the instruction costs in loongarch_rtx_cost_data constructor
 based on micro-benchmark results on LA464 and LA664.
 This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
 and slli.
 gcc/ChangeLog:
 	PR target/112936
 	* config/loongarch/loongarch-def.cc
 	(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
 	instruction costs per micro-benchmark results.
 	(loongarch_rtx_cost_optimize_size): Set all instruction costs
 	to (COSTS_N_INSNS (1) + 1).
 	* config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
 	special case for multiplication when optimizing for size.
 	Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
 	Account the extra cost when TARGET_CHECK_ZERO_DIV and
 	optimizing for speed.
 gcc/testsuite/ChangeLog
 	PR target/112936
 	* gcc.target/loongarch/mul-const-reduction.c: New test.
 ---
 gcc/config/loongarch/loongarch-def.cc         | 39 ++++++++++---------
 gcc/config/loongarch/loongarch.cc             | 22 +++++------
 .../loongarch/mul-const-reduction.c           | 11 ++++++
 3 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 index 6217b1926..4a8885e83 100644
 --- a/gcc/config/loongarch/loongarch-def.cc
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align =
 /* Default RTX cost initializer.  */
 loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
 -  : fp_add (COSTS_N_INSNS (1)),
 -    fp_mult_sf (COSTS_N_INSNS (2)),
 -    fp_mult_df (COSTS_N_INSNS (4)),
 -    fp_div_sf (COSTS_N_INSNS (6)),
 +  : fp_add (COSTS_N_INSNS (5)),
 +    fp_mult_sf (COSTS_N_INSNS (5)),
 +    fp_mult_df (COSTS_N_INSNS (5)),
 +    fp_div_sf (COSTS_N_INSNS (8)),
     fp_div_df (COSTS_N_INSNS (8)),
 -    int_mult_si (COSTS_N_INSNS (1)),
 -    int_mult_di (COSTS_N_INSNS (1)),
 -    int_div_si (COSTS_N_INSNS (4)),
 -    int_div_di (COSTS_N_INSNS (6)),
 +    int_mult_si (COSTS_N_INSNS (4)),
 +    int_mult_di (COSTS_N_INSNS (4)),
 +    int_div_si (COSTS_N_INSNS (5)),
 +    int_div_di (COSTS_N_INSNS (5)),
     branch_cost (6),
     memory_latency (4) {}
@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
 array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
   array_tune<loongarch_rtx_cost_data> ();
 -/* RTX costs to use when optimizing for size.  */
 +/* RTX costs to use when optimizing for size.
 +   We use a value slightly larger than COSTS_N_INSNS (1) for all of them
 +   because they are slower than simple instructions.  */
 +#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1)
 const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
   loongarch_rtx_cost_data ()
 -    .fp_add_ (4)
 -    .fp_mult_sf_ (4)
 -    .fp_mult_df_ (4)
 -    .fp_div_sf_ (4)
 -    .fp_div_df_ (4)
 -    .int_mult_si_ (4)
 -    .int_mult_di_ (4)
 -    .int_div_si_ (4)
 -    .int_div_di_ (4);
 +    .fp_add_ (COST_COMPLEX_INSN)
 +    .fp_mult_sf_ (COST_COMPLEX_INSN)
 +    .fp_mult_df_ (COST_COMPLEX_INSN)
 +    .fp_div_sf_ (COST_COMPLEX_INSN)
 +    .fp_div_df_ (COST_COMPLEX_INSN)
 +    .int_mult_si_ (COST_COMPLEX_INSN)
 +    .int_mult_di_ (COST_COMPLEX_INSN)
 +    .int_div_si_ (COST_COMPLEX_INSN)
 +    .int_div_di_ (COST_COMPLEX_INSN);
 array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
   .set (CPU_NATIVE, 4)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 4362149ef..afbb55390 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	*total = (speed
 		  ? loongarch_cost->int_mult_si * 3 + 6
 		  : COSTS_N_INSNS (7));
 -      else if (!speed)
 -	*total = COSTS_N_INSNS (1) + 1;
       else if (mode == DImode)
 	*total = loongarch_cost->int_mult_di;
       else
@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
     case UDIV:
     case UMOD:
 -      if (!speed)
 -	{
 -	  *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
 -	}
 -      else if (mode == DImode)
 +      if (mode == DImode)
 	*total = loongarch_cost->int_div_di;
       else
 -	*total = loongarch_cost->int_div_si;
 +	{
 +	  *total = loongarch_cost->int_div_si;
 +	  if (TARGET_64BIT && !TARGET_DIV32)
 +	    *total += COSTS_N_INSNS (2);
 +	}
 +
 +      if (TARGET_CHECK_ZERO_DIV)
 +	*total += COSTS_N_INSNS (2);
 +
       return false;
     case SIGN_EXTEND:
@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
 		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
 		      == ZERO_EXTEND))))
 	{
 -	  if (!speed)
 -	    *total = COSTS_N_INSNS (1) + 1;
 -	  else if (mode == DImode)
 +	  if (mode == DImode)
 	    *total = loongarch_cost->int_mult_di;
 	  else
 	    *total = loongarch_cost->int_mult_si;
 diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
 new file mode 100644
 index 000000000..02d9a4876
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
@@ -0,0 +1,11 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mtune=la464" } */
 +/* { dg-final { scan-assembler "alsl\.w" } } */
 +/* { dg-final { scan-assembler "slli\.w" } } */
 +/* { dg-final { scan-assembler-not "mul\.w" } } */
 +
 +int
 +test (int a)
 +{
 +  return a * 68;
 +}
 -- 
 2.43.0
--- a/0072-LoongArch-Add-alslsi3_extend.patch
+++ b/0072-LoongArch-Add-alslsi3_extend.patch
@ -0,0 +1,53 @@
 From 89dfb9ad8687f9b31be5925b2d106b6ec13cc628 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 9 Dec 2023 18:02:35 +0800
 Subject: [PATCH 072/188] LoongArch: Add alslsi3_extend
 Following the instruction cost fix, we are generating
    alsl.w $a0, $a0, $a0, 4
 instead of
    li.w  $t0, 17
    mul.w $a0, $t0
 for "x * 4", because alsl.w is 4 times faster than mul.w.  But we didn't
 have a sign-extending pattern for alsl.w, causing an extra slli.w
 instruction generated to sign-extend $a0.  Add the pattern to remove the
 redundant extension.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (alslsi3_extend): New
 	define_insn.
 ---
 gcc/config/loongarch/loongarch.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 222f1ae83..23368008e 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2874,6 +2874,18 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "<MODE>")])
 +(define_insn "alslsi3_extend"
 +  [(set (match_operand:DI 0 "register_operand" "=r")
 +	(sign_extend:DI
 +	  (plus:SI
 +	    (ashift:SI (match_operand:SI 1 "register_operand" "r")
 +		       (match_operand 2 "const_immalsl_operand" ""))
 +	    (match_operand:SI 3 "register_operand" "r"))))]
 +  ""
 +  "alsl.w\t%0,%1,%3,%2"
 +  [(set_attr "type" "arith")
 +   (set_attr "mode" "SI")])
 +
 ;; Reverse the order of bytes of operand 1 and store the result in operand 0.
 -- 
 2.43.0
--- a/0073-LoongArch-Add-support-for-D-frontend.patch
+++ b/0073-LoongArch-Add-support-for-D-frontend.patch
@ -0,0 +1,224 @@
 From 6ef045728a11218f023fee4527cd6d2fdb2c2910 Mon Sep 17 00:00:00 2001
 From: liushuyu <liushuyu011@gmail.com>
 Date: Mon, 18 Dec 2023 09:52:07 +0800
 Subject: [PATCH 073/188] LoongArch: Add support for D frontend.
 gcc/ChangeLog:
 	* config.gcc: Add loongarch-d.o to d_target_objs for LoongArch
 	architecture.
 	* config/loongarch/t-loongarch: Add object target for loongarch-d.cc.
 	* config/loongarch/loongarch-d.cc
 	(loongarch_d_target_versions): add interface function to define builtin
 	D versions for LoongArch architecture.
 	(loongarch_d_handle_target_float_abi): add interface function to define
 	builtin D traits for LoongArch architecture.
 	(loongarch_d_register_target_info): add interface function to register
 	loongarch_d_handle_target_float_abi function.
 	* config/loongarch/loongarch-d.h
 	(loongarch_d_target_versions): add function prototype.
 	(loongarch_d_register_target_info): Likewise.
 libphobos/ChangeLog:
 	* configure.tgt: Enable libphobos for LoongArch architecture.
 	* libdruntime/gcc/sections/elf.d: Add TLS_DTV_OFFSET constant for
 	LoongArch64.
 	* libdruntime/gcc/unwind/generic.d: Add __aligned__ constant for
 	LoongArch64.
 ---
 gcc/config.gcc                             |  1 +
 gcc/config/loongarch/loongarch-d.cc        | 77 ++++++++++++++++++++++
 gcc/config/loongarch/loongarch-d.h         | 26 ++++++++
 gcc/config/loongarch/t-loongarch           |  4 ++
 libphobos/configure.tgt                    |  3 +
 libphobos/libdruntime/gcc/sections/elf.d   |  2 +
 libphobos/libdruntime/gcc/unwind/generic.d |  1 +
 7 files changed, 114 insertions(+)
 create mode 100644 gcc/config/loongarch/loongarch-d.cc
 create mode 100644 gcc/config/loongarch/loongarch-d.h
 diff --git a/gcc/config.gcc b/gcc/config.gcc
 index 11ab620d0..039187fa2 100644
 --- a/gcc/config.gcc
 +++ b/gcc/config.gcc
@@ -456,6 +456,7 @@ mips*-*-*)
 	;;
 loongarch*-*-*)
 	cpu_type=loongarch
 +	d_target_objs="loongarch-d.o"
 	extra_headers="larchintrin.h lsxintrin.h lasxintrin.h"
 	extra_objs="loongarch-c.o loongarch-builtins.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
 	extra_gcc_objs="loongarch-driver.o loongarch-cpu.o loongarch-opts.o loongarch-def.o"
 diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc
 new file mode 100644
 index 000000000..9ac483c39
 --- /dev/null
 +++ b/gcc/config/loongarch/loongarch-d.cc
@@ -0,0 +1,77 @@
 +/* Subroutines for the D front end on the LoongArch architecture.
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#define IN_TARGET_CODE 1
 +
 +#include "config.h"
 +#include "system.h"
 +#include "coretypes.h"
 +#include "tm_d.h"
 +#include "d/d-target.h"
 +#include "d/d-target-def.h"
 +
 +/* Implement TARGET_D_CPU_VERSIONS for LoongArch targets.  */
 +
 +void
 +loongarch_d_target_versions (void)
 +{
 +  if (TARGET_64BIT)
 +    d_add_builtin_version ("LoongArch64");
 +  else
 +    d_add_builtin_version ("LoongArch32");
 +
 +  if (TARGET_HARD_FLOAT_ABI)
 +    {
 +      d_add_builtin_version ("LoongArch_HardFloat");
 +      d_add_builtin_version ("D_HardFloat");
 +    }
 +  else if (TARGET_SOFT_FLOAT_ABI)
 +    {
 +      d_add_builtin_version ("LoongArch_SoftFloat");
 +      d_add_builtin_version ("D_SoftFloat");
 +    }
 +}
 +
 +/* Handle a call to `__traits(getTargetInfo, "floatAbi")'.  */
 +
 +static tree
 +loongarch_d_handle_target_float_abi (void)
 +{
 +  const char *abi;
 +
 +  if (TARGET_HARD_FLOAT_ABI)
 +    abi = "hard";
 +  else if (TARGET_SOFT_FLOAT_ABI)
 +    abi = "soft";
 +  else
 +    abi = "";
 +
 +  return build_string_literal (strlen (abi) + 1, abi);
 +}
 +
 +/* Implement TARGET_D_REGISTER_CPU_TARGET_INFO.  */
 +
 +void
 +loongarch_d_register_target_info (void)
 +{
 +  const struct d_target_info_spec handlers[] = {
 +    {"floatAbi", loongarch_d_handle_target_float_abi},
 +    {NULL, NULL},
 +  };
 +
 +  d_add_target_info_handlers (handlers);
 +}
 diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h
 new file mode 100644
 index 000000000..a2fb8d51d
 --- /dev/null
 +++ b/gcc/config/loongarch/loongarch-d.h
@@ -0,0 +1,26 @@
 +/* Definitions for the D front end on the LoongArch architecture.
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +GCC is free software; you can redistribute it and/or modify
 +it under the terms of the GNU General Public License as published by
 +the Free Software Foundation; either version 3, or (at your option)
 +any later version.
 +
 +GCC is distributed in the hope that it will be useful,
 +but WITHOUT ANY WARRANTY; without even the implied warranty of
 +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +GNU General Public License for more details.
 +
 +You should have received a copy of the GNU General Public License
 +along with GCC; see the file COPYING3.  If not see
 +<http://www.gnu.org/licenses/>.  */
 +
 +/* Defined in loongarch-d.cc  */
 +extern void
 +loongarch_d_target_versions (void);
 +extern void
 +loongarch_d_register_target_info (void);
 +
 +/* Target hooks for D language.  */
 +#define TARGET_D_CPU_VERSIONS loongarch_d_target_versions
 +#define TARGET_D_REGISTER_CPU_TARGET_INFO loongarch_d_register_target_info
 diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
 index a1a40431f..994f4d19c 100644
 --- a/gcc/config/loongarch/t-loongarch
 +++ b/gcc/config/loongarch/t-loongarch
@@ -67,6 +67,10 @@ loongarch-cpu.o: $(srcdir)/config/loongarch/loongarch-cpu.cc $(LA_STR_H) \
 loongarch-def.o: $(srcdir)/config/loongarch/loongarch-def.cc $(LA_STR_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 +loongarch-d.o: $(srcdir)/config/loongarch/loongarch-d.cc
 +	$(COMPILE) $<
 +	$(POSTCOMPILE)
 +
 $(srcdir)/config/loongarch/loongarch.opt: s-loongarch-opt ; @true
 s-loongarch-opt: $(srcdir)/config/loongarch/genopts/genstr.sh \
 	$(srcdir)/config/loongarch/genopts/loongarch.opt.in \
 diff --git a/libphobos/configure.tgt b/libphobos/configure.tgt
 index 0063dd232..dcb1551cd 100644
 --- a/libphobos/configure.tgt
 +++ b/libphobos/configure.tgt
@@ -36,6 +36,9 @@ case "${target}" in
   hppa-*-linux*)
 	LIBPHOBOS_SUPPORTED=yes
 	;;
 +  loongarch*-*-linux*)
 +	LIBPHOBOS_SUPPORTED=yes
 +	;;
   mips*-*-linux*)
 	LIBPHOBOS_SUPPORTED=yes
 	;;
 diff --git a/libphobos/libdruntime/gcc/sections/elf.d b/libphobos/libdruntime/gcc/sections/elf.d
 index 5819811f3..bc993ea49 100644
 --- a/libphobos/libdruntime/gcc/sections/elf.d
 +++ b/libphobos/libdruntime/gcc/sections/elf.d
@@ -1061,6 +1061,8 @@ else version (MIPS64)
     enum TLS_DTV_OFFSET = 0x8000;
 else version (IBMZ_Any)
     enum TLS_DTV_OFFSET = 0x0;
 +else version (LoongArch64)
 +    enum TLS_DTV_OFFSET = 0x0;
 else
     static assert( false, "Platform not supported." );
 diff --git a/libphobos/libdruntime/gcc/unwind/generic.d b/libphobos/libdruntime/gcc/unwind/generic.d
 index 929b75dc7..8e5db80e1 100644
 --- a/libphobos/libdruntime/gcc/unwind/generic.d
 +++ b/libphobos/libdruntime/gcc/unwind/generic.d
@@ -141,6 +141,7 @@ else version (SPARC64)  private enum __aligned__ = 16;
 else version (SystemZ)  private enum __aligned__ = 8;
 else version (X86)      private enum __aligned__ = 16;
 else version (X86_64)   private enum __aligned__ = 16;
 +else version (LoongArch64) private enum __aligned__ = 16;
 else static assert( false, "Platform not supported.");
 align(__aligned__) struct _Unwind_Exception
 -- 
 2.43.0
--- a/0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
+++ b/0074-libruntime-Add-fiber-context-switch-code-for-LoongAr.patch
@ -0,0 +1,156 @@
 From 29eade7dc3032c6054f2ec2e2caa4ce43da6212d Mon Sep 17 00:00:00 2001
 From: Yang Yujie <yangyujie@loongson.cn>
 Date: Fri, 8 Dec 2023 18:09:41 +0800
 Subject: [PATCH 074/188] libruntime: Add fiber context switch code for
 LoongArch.
 libphobos/ChangeLog:
 	* libdruntime/config/loongarch/switchcontext.S: New file.
 ---
 .../config/loongarch/switchcontext.S          | 133 ++++++++++++++++++
 1 file changed, 133 insertions(+)
 create mode 100644 libphobos/libdruntime/config/loongarch/switchcontext.S
 diff --git a/libphobos/libdruntime/config/loongarch/switchcontext.S b/libphobos/libdruntime/config/loongarch/switchcontext.S
 new file mode 100644
 index 000000000..edfb9b67e
 --- /dev/null
 +++ b/libphobos/libdruntime/config/loongarch/switchcontext.S
@@ -0,0 +1,133 @@
 +/* LoongArch support code for fibers and multithreading.
 +   Copyright (C) 2023 Free Software Foundation, Inc.
 +
 +This file is part of GCC.
 +
 +GCC is free software; you can redistribute it and/or modify it under
 +the terms of the GNU General Public License as published by the Free
 +Software Foundation; either version 3, or (at your option) any later
 +version.
 +
 +GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 +WARRANTY; without even the implied warranty of MERCHANTABILITY or
 +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 +for more details.
 +
 +Under Section 7 of GPL version 3, you are granted additional
 +permissions described in the GCC Runtime Library Exception, version
 +3.1, as published by the Free Software Foundation.
 +
 +You should have received a copy of the GNU General Public License and
 +a copy of the GCC Runtime Library Exception along with this program;
 +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 +<http://www.gnu.org/licenses/>.  */
 +
 +#include "../common/threadasm.S"
 +
 +/**
 + * Performs a context switch.
 + *
 + * $a0 - void** - ptr to old stack pointer
 + * $a1 - void*  - new stack pointer
 + *
 + */
 +
 +#if defined(__loongarch_lp64)
 +#  define GPR_L ld.d
 +#  define GPR_S st.d
 +#  define SZ_GPR 8
 +#  define ADDSP(si)   addi.d  $sp, $sp, si
 +#elif defined(__loongarch64_ilp32)
 +#  define GPR_L ld.w
 +#  define GPR_S st.w
 +#  define SZ_GPR 4
 +#  define ADDSP(si)   addi.w  $sp, $sp, si
 +#else
 +#  error Unsupported GPR size (must be 64-bit or 32-bit).
 +#endif
 +
 +#if defined(__loongarch_double_float)
 +#  define FPR_L fld.d
 +#  define FPR_S fst.d
 +#  define SZ_FPR 8
 +#elif defined(__loongarch_single_float)
 +#  define FPR_L fld.s
 +#  define FPR_S fst.s
 +#  define SZ_FPR 4
 +#else
 +#  define SZ_FPR 0
 +#endif
 +
 +    .text
 +    .align 2
 +    .global fiber_switchContext
 +    .type   fiber_switchContext, @function
 +fiber_switchContext:
 +    .cfi_startproc
 +    ADDSP(-11 * SZ_GPR)
 +
 +    // fp regs and return address are stored below the stack
 +    // because we don't want the GC to scan them.
 +
 +    // return address (r1)
 +    GPR_S  $r1, $sp, -SZ_GPR
 +
 +#if SZ_FPR != 0
 +    // callee-saved scratch FPRs (f24-f31)
 +    FPR_S  $f24, $sp, -SZ_GPR-1*SZ_FPR
 +    FPR_S  $f25, $sp, -SZ_GPR-2*SZ_FPR
 +    FPR_S  $f26, $sp, -SZ_GPR-3*SZ_FPR
 +    FPR_S  $f27, $sp, -SZ_GPR-4*SZ_FPR
 +    FPR_S  $f28, $sp, -SZ_GPR-5*SZ_FPR
 +    FPR_S  $f29, $sp, -SZ_GPR-6*SZ_FPR
 +    FPR_S  $f30, $sp, -SZ_GPR-7*SZ_FPR
 +    FPR_S  $f31, $sp, -SZ_GPR-8*SZ_FPR
 +#endif
 +
 +    // callee-saved GPRs (r21, fp (r22), r23-r31)
 +    GPR_S $r21, $sp, 0*SZ_GPR
 +    GPR_S  $fp, $sp, 1*SZ_GPR
 +    GPR_S  $s0, $sp, 2*SZ_GPR
 +    GPR_S  $s1, $sp, 3*SZ_GPR
 +    GPR_S  $s2, $sp, 4*SZ_GPR
 +    GPR_S  $s3, $sp, 5*SZ_GPR
 +    GPR_S  $s4, $sp, 6*SZ_GPR
 +    GPR_S  $s5, $sp, 7*SZ_GPR
 +    GPR_S  $s6, $sp, 8*SZ_GPR
 +    GPR_S  $s7, $sp, 9*SZ_GPR
 +    GPR_S  $s8, $sp, 10*SZ_GPR
 +
 +    // swap stack pointer
 +    GPR_S $sp, $a0, 0
 +    move $sp, $a1
 +
 +    GPR_L  $r1, $sp, -SZ_GPR
 +
 +#if SZ_FPR != 0
 +    FPR_L  $f24, $sp, -SZ_GPR-1*SZ_FPR
 +    FPR_L  $f25, $sp, -SZ_GPR-2*SZ_FPR
 +    FPR_L  $f26, $sp, -SZ_GPR-3*SZ_FPR
 +    FPR_L  $f27, $sp, -SZ_GPR-4*SZ_FPR
 +    FPR_L  $f28, $sp, -SZ_GPR-5*SZ_FPR
 +    FPR_L  $f29, $sp, -SZ_GPR-6*SZ_FPR
 +    FPR_L  $f30, $sp, -SZ_GPR-7*SZ_FPR
 +    FPR_L  $f31, $sp, -SZ_GPR-8*SZ_FPR
 +#endif
 +
 +    GPR_L $r21, $sp, 0*SZ_GPR
 +    GPR_L  $fp, $sp, 1*SZ_GPR
 +    GPR_L  $s0, $sp, 2*SZ_GPR
 +    GPR_L  $s1, $sp, 3*SZ_GPR
 +    GPR_L  $s2, $sp, 4*SZ_GPR
 +    GPR_L  $s3, $sp, 5*SZ_GPR
 +    GPR_L  $s4, $sp, 6*SZ_GPR
 +    GPR_L  $s5, $sp, 7*SZ_GPR
 +    GPR_L  $s6, $sp, 8*SZ_GPR
 +    GPR_L  $s7, $sp, 9*SZ_GPR
 +    GPR_L  $s8, $sp, 10*SZ_GPR
 +
 +    ADDSP(11 * SZ_GPR)
 +
 +    jr     $r1 // return
 +    .cfi_endproc
 +    .size fiber_switchContext,.-fiber_switchContext
 -- 
 2.43.0
--- a/0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
+++ b/0075-LoongArch-Fix-FP-vector-comparsons-PR113034.patch
@ -0,0 +1,866 @@
 From dd33794e64d462bf39e72f39343a384c191307f4 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 17 Dec 2023 01:09:20 +0800
 Subject: [PATCH 075/188] LoongArch: Fix FP vector comparsons [PR113034]
 We had the following mappings between <x>vfcmp submenmonics and RTX
 codes:
    (define_code_attr fcc
      [(unordered "cun")
       (ordered   "cor")
       (eq       "ceq")
       (ne       "cne")
       (uneq      "cueq")
       (unle      "cule")
       (unlt      "cult")
       (le       "cle")
       (lt       "clt")])
 This is inconsistent with scalar code:
    (define_code_attr fcond [(unordered "cun")
                             (uneq "cueq")
                             (unlt "cult")
                             (unle "cule")
                             (eq "ceq")
                             (lt "slt")
                             (le "sle")
                             (ordered "cor")
                             (ltgt "sne")
                             (ne "cune")
                             (ge "sge")
                             (gt "sgt")
                             (unge "cuge")
                             (ungt "cugt")])
 For every RTX code for which the LSX/LASX code is different from the
 scalar code, the scalar code is correct and the LSX/LASX code is wrong.
 Most seriously, the RTX code NE should be mapped to "cneq", not "cne".
 Rewrite <x>vfcmp define_insns in simd.md using the same mapping as
 scalar fcmp.
 Note that GAS does not support [x]vfcmp.{c/s}[u]{ge/gt} (pseudo)
 instruction (although fcmp.{c/s}[u]{ge/gt} is supported), so we need to
 switch the order of inputs and use [x]vfcmp.{c/s}[u]{le/lt} instead.
 The <x>vfcmp.{sult/sule/clt/cle}.{s/d} instructions do not have a single
 RTX code, but they can be modeled as an inversed RTX code following a
 "not" operation.  Doing so allows the compiler to optimized vectorized
 __builtin_isless etc. to a single instruction.  This optimization should
 be added for scalar code too and I'll do it later.
 Tests are added for mapping between C code, IEC 60559 operations, and
 vfcmp instructions.
 [1]:https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640713.html
 gcc/ChangeLog:
 	PR target/113034
 	* config/loongarch/lasx.md (UNSPEC_LASX_XVFCMP_*): Remove.
 	(lasx_xvfcmp_caf_<flasxfmt>): Remove.
 	(lasx_xvfcmp_cune_<FLASX:flasxfmt>): Remove.
 	(FSC256_UNS): Remove.
 	(fsc256): Remove.
 	(lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>): Remove.
 	(lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>): Remove.
 	* config/loongarch/lsx.md (UNSPEC_LSX_XVFCMP_*): Remove.
 	(lsx_vfcmp_caf_<flsxfmt>): Remove.
 	(lsx_vfcmp_cune_<FLSX:flsxfmt>): Remove.
 	(vfcond): Remove.
 	(fcc): Remove.
 	(FSC_UNS): Remove.
 	(fsc): Remove.
 	(lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>): Remove.
 	(lsx_vfcmp_<fsc>_<FLSX:flsxfmt>): Remove.
 	* config/loongarch/simd.md
 	(fcond_simd): New define_code_iterator.
 	(<simd_isa>_<x>vfcmp_<fcond:fcond_simd>_<simdfmt>):
 	New define_insn.
 	(fcond_simd_rev): New define_code_iterator.
 	(fcond_rev_asm): New define_code_attr.
 	(<simd_isa>_<x>vfcmp_<fcond:fcond_simd_rev>_<simdfmt>):
 	New define_insn.
 	(fcond_inv): New define_code_iterator.
 	(fcond_inv_rev): New define_code_iterator.
 	(fcond_inv_rev_asm): New define_code_attr.
 	(<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>): New define_insn.
 	(<simd_isa>_<x>vfcmp_<fcond_inv:fcond_inv_rev>_<simdfmt>):
 	New define_insn.
 	(UNSPEC_SIMD_FCMP_CAF, UNSPEC_SIMD_FCMP_SAF,
 	UNSPEC_SIMD_FCMP_SEQ, UNSPEC_SIMD_FCMP_SUN,
 	UNSPEC_SIMD_FCMP_SUEQ, UNSPEC_SIMD_FCMP_CNE,
 	UNSPEC_SIMD_FCMP_SOR, UNSPEC_SIMD_FCMP_SUNE): New unspecs.
 	(SIMD_FCMP): New define_int_iterator.
 	(fcond_unspec): New define_int_attr.
 	(<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>): New define_insn.
 	* config/loongarch/loongarch.cc (loongarch_expand_lsx_cmp):
 	Remove unneeded special cases.
 gcc/testsuite/ChangeLog:
 	PR target/113034
 	* gcc.target/loongarch/vfcmp-f.c: New test.
 	* gcc.target/loongarch/vfcmp-d.c: New test.
 	* gcc.target/loongarch/xvfcmp-f.c: New test.
 	* gcc.target/loongarch/xvfcmp-d.c: New test.
 	* gcc.target/loongarch/vector/lasx/lasx-vcond-2.c: Scan for cune
 	instead of cne.
 	* gcc.target/loongarch/vector/lsx/lsx-vcond-2.c: Likewise.
 ---
 gcc/config/loongarch/lasx.md                  |  76 --------
 gcc/config/loongarch/loongarch.cc             |  60 +-----
 gcc/config/loongarch/lsx.md                   |  83 --------
 gcc/config/loongarch/simd.md                  | 118 ++++++++++++
 .../loongarch/vector/lasx/lasx-vcond-2.c      |   4 +-
 .../loongarch/vector/lsx/lsx-vcond-2.c        |   4 +-
 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c  |  28 +++
 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c  | 178 ++++++++++++++++++
 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c |  29 +++
 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c |  27 +++
 10 files changed, 385 insertions(+), 222 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index eeac8cd98..921ce0eeb 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -32,9 +32,7 @@
   UNSPEC_LASX_XVBITREVI
   UNSPEC_LASX_XVBITSET
   UNSPEC_LASX_XVBITSETI
 -  UNSPEC_LASX_XVFCMP_CAF
   UNSPEC_LASX_XVFCLASS
 -  UNSPEC_LASX_XVFCMP_CUNE
   UNSPEC_LASX_XVFCVT
   UNSPEC_LASX_XVFCVTH
   UNSPEC_LASX_XVFCVTL
@@ -44,17 +42,6 @@
   UNSPEC_LASX_XVFRINT
   UNSPEC_LASX_XVFRSQRT
   UNSPEC_LASX_XVFRSQRTE
 -  UNSPEC_LASX_XVFCMP_SAF
 -  UNSPEC_LASX_XVFCMP_SEQ
 -  UNSPEC_LASX_XVFCMP_SLE
 -  UNSPEC_LASX_XVFCMP_SLT
 -  UNSPEC_LASX_XVFCMP_SNE
 -  UNSPEC_LASX_XVFCMP_SOR
 -  UNSPEC_LASX_XVFCMP_SUEQ
 -  UNSPEC_LASX_XVFCMP_SULE
 -  UNSPEC_LASX_XVFCMP_SULT
 -  UNSPEC_LASX_XVFCMP_SUN
 -  UNSPEC_LASX_XVFCMP_SUNE
   UNSPEC_LASX_XVFTINT_U
   UNSPEC_LASX_XVCLO
   UNSPEC_LASX_XVSAT_S
@@ -1481,69 +1468,6 @@
   [(set_attr "type" "simd_fclass")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lasx_xvfcmp_caf_<flasxfmt>"
 -  [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 -	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
 -			     (match_operand:FLASX 2 "register_operand" "f")]
 -			    UNSPEC_LASX_XVFCMP_CAF))]
 -  "ISA_HAS_LASX"
 -  "xvfcmp.caf.<flasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_insn "lasx_xvfcmp_cune_<FLASX:flasxfmt>"
 -  [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 -	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
 -			     (match_operand:FLASX 2 "register_operand" "f")]
 -			    UNSPEC_LASX_XVFCMP_CUNE))]
 -  "ISA_HAS_LASX"
 -  "xvfcmp.cune.<FLASX:flasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -
 -
 -(define_int_iterator FSC256_UNS [UNSPEC_LASX_XVFCMP_SAF UNSPEC_LASX_XVFCMP_SUN
 -				 UNSPEC_LASX_XVFCMP_SOR UNSPEC_LASX_XVFCMP_SEQ
 -				 UNSPEC_LASX_XVFCMP_SNE UNSPEC_LASX_XVFCMP_SUEQ
 -				 UNSPEC_LASX_XVFCMP_SUNE UNSPEC_LASX_XVFCMP_SULE
 -				 UNSPEC_LASX_XVFCMP_SULT UNSPEC_LASX_XVFCMP_SLE
 -				 UNSPEC_LASX_XVFCMP_SLT])
 -
 -(define_int_attr fsc256
 -  [(UNSPEC_LASX_XVFCMP_SAF  "saf")
 -   (UNSPEC_LASX_XVFCMP_SUN  "sun")
 -   (UNSPEC_LASX_XVFCMP_SOR  "sor")
 -   (UNSPEC_LASX_XVFCMP_SEQ  "seq")
 -   (UNSPEC_LASX_XVFCMP_SNE  "sne")
 -   (UNSPEC_LASX_XVFCMP_SUEQ "sueq")
 -   (UNSPEC_LASX_XVFCMP_SUNE "sune")
 -   (UNSPEC_LASX_XVFCMP_SULE "sule")
 -   (UNSPEC_LASX_XVFCMP_SULT "sult")
 -   (UNSPEC_LASX_XVFCMP_SLE  "sle")
 -   (UNSPEC_LASX_XVFCMP_SLT  "slt")])
 -
 -(define_insn "lasx_xvfcmp_<vfcond:fcc>_<FLASX:flasxfmt>"
 -  [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 -	(vfcond:<VIMODE256> (match_operand:FLASX 1 "register_operand" "f")
 -			    (match_operand:FLASX 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -  "xvfcmp.<vfcond:fcc>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -
 -(define_insn "lasx_xvfcmp_<fsc256>_<FLASX:flasxfmt>"
 -  [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 -	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")
 -			     (match_operand:FLASX 2 "register_operand" "f")]
 -			    FSC256_UNS))]
 -  "ISA_HAS_LASX"
 -  "xvfcmp.<fsc256>.<FLASX:flasxfmt>\t%u0,%u1,%u2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -
 (define_mode_attr fint256
   [(V8SF "v8si")
    (V4DF "v4di")])
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index afbb55390..a22601d88 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -11156,7 +11156,6 @@ static void
 loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
 {
   machine_mode cmp_mode = GET_MODE (op0);
 -  int unspec = -1;
   bool negate = false;
   switch (cmp_mode)
@@ -11198,66 +11197,9 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
     case E_V4SFmode:
     case E_V2DFmode:
 -      switch (cond)
 -	{
 -	case UNORDERED:
 -	case ORDERED:
 -	case EQ:
 -	case NE:
 -	case UNEQ:
 -	case UNLE:
 -	case UNLT:
 -	  break;
 -	case LTGT: cond = NE; break;
 -	case UNGE: cond = UNLE; std::swap (op0, op1); break;
 -	case UNGT: cond = UNLT; std::swap (op0, op1); break;
 -	case LE: unspec = UNSPEC_LSX_VFCMP_SLE; break;
 -	case LT: unspec = UNSPEC_LSX_VFCMP_SLT; break;
 -	case GE: unspec = UNSPEC_LSX_VFCMP_SLE; std::swap (op0, op1); break;
 -	case GT: unspec = UNSPEC_LSX_VFCMP_SLT; std::swap (op0, op1); break;
 -	default:
 -		 gcc_unreachable ();
 -	}
 -      if (unspec < 0)
 -	loongarch_emit_binary (cond, dest, op0, op1);
 -      else
 -	{
 -	  rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
 -				  gen_rtvec (2, op0, op1), unspec);
 -	  emit_insn (gen_rtx_SET (dest, x));
 -	}
 -      break;
 -
     case E_V8SFmode:
     case E_V4DFmode:
 -      switch (cond)
 -	{
 -	case UNORDERED:
 -	case ORDERED:
 -	case EQ:
 -	case NE:
 -	case UNEQ:
 -	case UNLE:
 -	case UNLT:
 -	  break;
 -	case LTGT: cond = NE; break;
 -	case UNGE: cond = UNLE; std::swap (op0, op1); break;
 -	case UNGT: cond = UNLT; std::swap (op0, op1); break;
 -	case LE: unspec = UNSPEC_LASX_XVFCMP_SLE; break;
 -	case LT: unspec = UNSPEC_LASX_XVFCMP_SLT; break;
 -	case GE: unspec = UNSPEC_LASX_XVFCMP_SLE; std::swap (op0, op1); break;
 -	case GT: unspec = UNSPEC_LASX_XVFCMP_SLT; std::swap (op0, op1); break;
 -	default:
 -		 gcc_unreachable ();
 -	}
 -      if (unspec < 0)
 -	loongarch_emit_binary (cond, dest, op0, op1);
 -      else
 -	{
 -	  rtx x = gen_rtx_UNSPEC (GET_MODE (dest),
 -				  gen_rtvec (2, op0, op1), unspec);
 -	  emit_insn (gen_rtx_SET (dest, x));
 -	}
 +      loongarch_emit_binary (cond, dest, op0, op1);
       break;
     default:
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index dbdb42301..57e0ee3d4 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -34,9 +34,7 @@
   UNSPEC_LSX_VBITSETI
   UNSPEC_LSX_BRANCH_V
   UNSPEC_LSX_BRANCH
 -  UNSPEC_LSX_VFCMP_CAF
   UNSPEC_LSX_VFCLASS
 -  UNSPEC_LSX_VFCMP_CUNE
   UNSPEC_LSX_VFCVT
   UNSPEC_LSX_VFCVTH
   UNSPEC_LSX_VFCVTL
@@ -46,17 +44,6 @@
   UNSPEC_LSX_VFRINT
   UNSPEC_LSX_VFRSQRT
   UNSPEC_LSX_VFRSQRTE
 -  UNSPEC_LSX_VFCMP_SAF
 -  UNSPEC_LSX_VFCMP_SEQ
 -  UNSPEC_LSX_VFCMP_SLE
 -  UNSPEC_LSX_VFCMP_SLT
 -  UNSPEC_LSX_VFCMP_SNE
 -  UNSPEC_LSX_VFCMP_SOR
 -  UNSPEC_LSX_VFCMP_SUEQ
 -  UNSPEC_LSX_VFCMP_SULE
 -  UNSPEC_LSX_VFCMP_SULT
 -  UNSPEC_LSX_VFCMP_SUN
 -  UNSPEC_LSX_VFCMP_SUNE
   UNSPEC_LSX_VFTINT_U
   UNSPEC_LSX_VSAT_S
   UNSPEC_LSX_VSAT_U
@@ -1377,76 +1364,6 @@
   [(set_attr "type" "simd_fclass")
    (set_attr "mode" "<MODE>")])
 -(define_insn "lsx_vfcmp_caf_<flsxfmt>"
 -  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 -	(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
 -			  (match_operand:FLSX 2 "register_operand" "f")]
 -			 UNSPEC_LSX_VFCMP_CAF))]
 -  "ISA_HAS_LSX"
 -  "vfcmp.caf.<flsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_insn "lsx_vfcmp_cune_<FLSX:flsxfmt>"
 -  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 -	(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
 -			  (match_operand:FLSX 2 "register_operand" "f")]
 -			 UNSPEC_LSX_VFCMP_CUNE))]
 -  "ISA_HAS_LSX"
 -  "vfcmp.cune.<FLSX:flsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_code_iterator vfcond [unordered ordered eq ne le lt uneq unle unlt])
 -
 -(define_code_attr fcc
 -  [(unordered "cun")
 -   (ordered   "cor")
 -   (eq	      "ceq")
 -   (ne	      "cne")
 -   (uneq      "cueq")
 -   (unle      "cule")
 -   (unlt      "cult")
 -   (le	      "cle")
 -   (lt	      "clt")])
 -
 -(define_int_iterator FSC_UNS [UNSPEC_LSX_VFCMP_SAF UNSPEC_LSX_VFCMP_SUN UNSPEC_LSX_VFCMP_SOR
 -			      UNSPEC_LSX_VFCMP_SEQ UNSPEC_LSX_VFCMP_SNE UNSPEC_LSX_VFCMP_SUEQ
 -			      UNSPEC_LSX_VFCMP_SUNE UNSPEC_LSX_VFCMP_SULE UNSPEC_LSX_VFCMP_SULT
 -			      UNSPEC_LSX_VFCMP_SLE UNSPEC_LSX_VFCMP_SLT])
 -
 -(define_int_attr fsc
 -  [(UNSPEC_LSX_VFCMP_SAF  "saf")
 -   (UNSPEC_LSX_VFCMP_SUN  "sun")
 -   (UNSPEC_LSX_VFCMP_SOR  "sor")
 -   (UNSPEC_LSX_VFCMP_SEQ  "seq")
 -   (UNSPEC_LSX_VFCMP_SNE  "sne")
 -   (UNSPEC_LSX_VFCMP_SUEQ "sueq")
 -   (UNSPEC_LSX_VFCMP_SUNE "sune")
 -   (UNSPEC_LSX_VFCMP_SULE "sule")
 -   (UNSPEC_LSX_VFCMP_SULT "sult")
 -   (UNSPEC_LSX_VFCMP_SLE  "sle")
 -   (UNSPEC_LSX_VFCMP_SLT  "slt")])
 -
 -(define_insn "lsx_vfcmp_<vfcond:fcc>_<FLSX:flsxfmt>"
 -  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 -	(vfcond:<VIMODE> (match_operand:FLSX 1 "register_operand" "f")
 -			 (match_operand:FLSX 2 "register_operand" "f")))]
 -  "ISA_HAS_LSX"
 -  "vfcmp.<vfcond:fcc>.<FLSX:flsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 -(define_insn "lsx_vfcmp_<fsc>_<FLSX:flsxfmt>"
 -  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 -	(unspec:<VIMODE> [(match_operand:FLSX 1 "register_operand" "f")
 -			  (match_operand:FLSX 2 "register_operand" "f")]
 -			 FSC_UNS))]
 -  "ISA_HAS_LSX"
 -  "vfcmp.<fsc>.<FLSX:flsxfmt>\t%w0,%w1,%w2"
 -  [(set_attr "type" "simd_fcmp")
 -   (set_attr "mode" "<MODE>")])
 -
 (define_mode_attr fint
   [(V4SF "v4si")
    (V2DF "v2di")])
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index 843b1a41f..13202f79b 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -279,6 +279,124 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 +;; <x>vfcmp.*.{s/d} with defined RTX code
 +;; There are no fcmp.{sugt/suge/cgt/cge}.{s/d} menmonics in GAS, so we have
 +;; to reverse the operands ourselves :(.
 +(define_code_iterator fcond_simd [unordered uneq unlt unle eq lt le
 +				  ordered ltgt ne])
 +(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
 +  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 +	(fcond_simd:<VIMODE>
 +	  (match_operand:FVEC 1 "register_operand" "f")
 +	  (match_operand:FVEC 2 "register_operand" "f")))]
 +  ""
 +  "<x>vfcmp.<fcond>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_fcmp")
 +   (set_attr "mode" "<MODE>")])
 +
 +;; There are no fcmp.{sge/sgt/cuge/cugt}.{s/d} menmonics in GAS, so we have
 +;; to reverse the operands ourselves.
 +(define_code_iterator fcond_simd_rev [ge gt unge ungt])
 +
 +(define_code_attr fcond_rev_asm
 +  [(ge		"sle")
 +   (gt		"slt")
 +   (unge	"cule")
 +   (ungt	"cult")])
 +
 +(define_insn "<simd_isa>_<x>vfcmp_<fcond>_<simdfmt>"
 +  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 +	(fcond_simd_rev:<VIMODE>
 +	  (match_operand:FVEC 1 "register_operand" "f")
 +	  (match_operand:FVEC 2 "register_operand" "f")))]
 +  ""
 +  "<x>vfcmp.<fcond_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1";
 +  [(set_attr "type" "simd_fcmp")
 +   (set_attr "mode" "<MODE>")])
 +
 +;; <x>vfcmp.*.{s/d} without defined RTX code, but with defined RTX code for
 +;; its inverse.  Again, there are no fcmp.{sugt/suge/cgt/cge}.{s/d}
 +;; menmonics in GAS, so we have to reverse the operands ourselves.
 +(define_code_iterator fcond_inv [ge gt unge ungt])
 +(define_code_iterator fcond_inv_rev [le lt unle unlt])
 +(define_code_attr fcond_inv
 +  [(ge		"sult")
 +   (gt		"sule")
 +   (unge	"clt")
 +   (ungt	"cle")
 +   (le		"sugt")
 +   (lt		"suge")
 +   (unle	"cgt")
 +   (unlt	"cge")])
 +(define_code_attr fcond_inv_rev_asm
 +  [(le		"sult")
 +   (lt		"sule")
 +   (unle	"clt")
 +   (unlt	"cle")])
 +
 +(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
 +  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 +	(not:<VIMODE>
 +	  (fcond_inv:<VIMODE>
 +	    (match_operand:FVEC 1 "register_operand" "f")
 +	    (match_operand:FVEC 2 "register_operand" "f"))))]
 +  ""
 +  "<x>vfcmp.<fcond_inv>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_fcmp")
 +   (set_attr "mode" "<MODE>")])
 +
 +(define_insn "<simd_isa>_<x>vfcmp_<fcond_inv>_<simdfmt>"
 +  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 +	(not:<VIMODE>
 +	  (fcond_inv_rev:<VIMODE>
 +	    (match_operand:FVEC 1 "register_operand" "f")
 +	    (match_operand:FVEC 2 "register_operand" "f"))))]
 +  ""
 +  "<x>vfcmp.<fcond_inv_rev_asm>.<simdfmt>\t%<wu>0,%<wu>2,%<wu>1"
 +  [(set_attr "type" "simd_fcmp")
 +   (set_attr "mode" "<MODE>")])
 +
 +;; <x>vfcmp.*.{s/d} instructions only as instrinsics
 +(define_c_enum "unspec"
 +  [UNSPEC_SIMD_FCMP_CAF
 +   UNSPEC_SIMD_FCMP_SAF
 +   UNSPEC_SIMD_FCMP_SEQ
 +   UNSPEC_SIMD_FCMP_SUN
 +   UNSPEC_SIMD_FCMP_SUEQ
 +   UNSPEC_SIMD_FCMP_CNE
 +   UNSPEC_SIMD_FCMP_SOR
 +   UNSPEC_SIMD_FCMP_SUNE])
 +
 +(define_int_iterator SIMD_FCMP
 +  [UNSPEC_SIMD_FCMP_CAF
 +   UNSPEC_SIMD_FCMP_SAF
 +   UNSPEC_SIMD_FCMP_SEQ
 +   UNSPEC_SIMD_FCMP_SUN
 +   UNSPEC_SIMD_FCMP_SUEQ
 +   UNSPEC_SIMD_FCMP_CNE
 +   UNSPEC_SIMD_FCMP_SOR
 +   UNSPEC_SIMD_FCMP_SUNE])
 +
 +(define_int_attr fcond_unspec
 +  [(UNSPEC_SIMD_FCMP_CAF	"caf")
 +   (UNSPEC_SIMD_FCMP_SAF	"saf")
 +   (UNSPEC_SIMD_FCMP_SEQ	"seq")
 +   (UNSPEC_SIMD_FCMP_SUN	"sun")
 +   (UNSPEC_SIMD_FCMP_SUEQ	"sueq")
 +   (UNSPEC_SIMD_FCMP_CNE	"cne")
 +   (UNSPEC_SIMD_FCMP_SOR	"sor")
 +   (UNSPEC_SIMD_FCMP_SUNE	"sune")])
 +
 +(define_insn "<simd_isa>_<x>vfcmp_<fcond_unspec>_<simdfmt>"
 +  [(set (match_operand:<VIMODE> 0 "register_operand" "=f")
 +	(unspec:<VIMODE> [(match_operand:FVEC 1 "register_operand" "f")
 +			  (match_operand:FVEC 2 "register_operand" "f")]
 +			 SIMD_FCMP))]
 +  ""
 +  "<x>vfcmp.<fcond_unspec>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_fcmp")
 +   (set_attr "mode" "<MODE>")])
 +
 ; The LoongArch SX Instructions.
 (include "lsx.md")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 index 55d5a084c..f2f523622 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vcond-2.c
@@ -69,8 +69,8 @@ TEST_CMP (nugt)
 /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.s} 3 } } */
 /* { dg-final { scan-assembler-times {\txvfcmp\.ceq\.d} 3 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.s} 3 } } */
 -/* { dg-final { scan-assembler-times {\txvfcmp\.cne\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\txvfcmp\.cune\.d} 3 } } */
 /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.s} 6 } } */
 /* { dg-final { scan-assembler-times {\txvfcmp\.slt\.d} 6 } } */
 /* { dg-final { scan-assembler-times {\txvfcmp\.sle\.s} 6 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 index 2214afd0a..486bedba4 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vcond-2.c
@@ -69,8 +69,8 @@ TEST_CMP (nugt)
 /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.s} 3 } } */
 /* { dg-final { scan-assembler-times {\tvfcmp\.ceq\.d} 3 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.s} 3 } } */
 -/* { dg-final { scan-assembler-times {\tvfcmp\.cne\.d} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.s} 3 } } */
 +/* { dg-final { scan-assembler-times {\tvfcmp\.cune\.d} 3 } } */
 /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.s} 6 } } */
 /* { dg-final { scan-assembler-times {\tvfcmp\.slt\.d} 6 } } */
 /* { dg-final { scan-assembler-times {\tvfcmp\.sle\.s} 6 } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
 new file mode 100644
 index 000000000..8b870ef38
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-d.c
@@ -0,0 +1,28 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
 +
 +#define F double
 +#define I long long
 +
 +#include "vfcmp-f.c"
 +
 +/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.d\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
 new file mode 100644
 index 000000000..b9110b90c
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vfcmp-f.c
@@ -0,0 +1,178 @@
 +/* Test mapping IEC 60559 operations to SIMD instructions.
 +   For details read C23 Annex F.3 and LoongArch Vol. 1 section 3.2.2.1.  */
 +
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
 +
 +#ifndef F
 +#define F float
 +#endif
 +
 +#ifndef I
 +#define I int
 +#endif
 +
 +#ifndef VL
 +#define VL 16
 +#endif
 +
 +typedef F VF __attribute__ ((vector_size (VL)));
 +typedef I VI __attribute__ ((vector_size (VL)));
 +
 +register VF a asm ("f0");
 +register VF b asm ("f1");
 +register VI c asm ("f2");
 +
 +void
 +compare_quiet_equal (void)
 +{
 +  c = (a == b);
 +}
 +
 +void
 +compare_quiet_not_equal (void)
 +{
 +  c = (a != b);
 +}
 +
 +void
 +compare_signaling_greater (void)
 +{
 +  c = (a > b);
 +}
 +
 +void
 +compare_signaling_greater_equal (void)
 +{
 +  c = (a >= b);
 +}
 +
 +void
 +compare_signaling_less (void)
 +{
 +  c = (a < b);
 +}
 +
 +void
 +compare_signaling_less_equal (void)
 +{
 +  c = (a <= b);
 +}
 +
 +void
 +compare_signaling_not_greater (void)
 +{
 +  c = ~(a > b);
 +}
 +
 +void
 +compare_signaling_less_unordered (void)
 +{
 +  c = ~(a >= b);
 +}
 +
 +void
 +compare_signaling_not_less (void)
 +{
 +  c = ~(a < b);
 +}
 +
 +void
 +compare_signaling_greater_unordered (void)
 +{
 +  c = ~(a <= b);
 +}
 +
 +void
 +compare_quiet_less (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isless (a[i], b[i]) ? -1 : 0;
 +}
 +
 +void
 +compare_quiet_less_equal (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_islessequal (a[i], b[i]) ? -1 : 0;
 +}
 +
 +void
 +compare_quiet_greater (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isgreater (a[i], b[i]) ? -1 : 0;
 +}
 +
 +void
 +compare_quiet_greater_equal (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isgreaterequal (a[i], b[i]) ? -1 : 0;
 +}
 +
 +void
 +compare_quiet_not_less (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isless (a[i], b[i]) ? 0 : -1;
 +}
 +
 +void
 +compare_quiet_greater_unordered (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_islessequal (a[i], b[i]) ? 0 : -1;
 +}
 +
 +void
 +compare_quiet_not_greater (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isgreater (a[i], b[i]) ? 0 : -1;
 +}
 +
 +void
 +compare_quiet_less_unordered (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isgreaterequal (a[i], b[i]) ? 0 : -1;
 +}
 +
 +void
 +compare_quiet_unordered (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isunordered (a[i], b[i]) ? -1 : 0;
 +}
 +
 +void
 +compare_quiet_ordered (void)
 +{
 +  for (int i = 0; i < sizeof (c) / sizeof (c[0]); i++)
 +    c[i] = __builtin_isunordered (a[i], b[i]) ? 0 : -1;
 +}
 +
 +/* The "-<function_name>" matches the .size directive after the function
 +   body, so we can ensure the instruction is in the correct function.  */
 +
 +/* { dg-final { scan-assembler "compare_quiet_equal:.*\tvfcmp\\.ceq\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\tvfcmp\\.cune\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less:.*\tvfcmp\\.slt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\tvfcmp\\.sle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_signaling_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_less:.*\tvfcmp\\.sule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\tvfcmp\\.sult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_signaling_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater:.*\tvfcmp\\.clt\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\tvfcmp\\.cle\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_less:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr1,\\\$vr0.*-compare_quiet_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\tvfcmp\\.cule\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\tvfcmp\\.cult\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_unordered:.*\tvfcmp\\.cun\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_ordered:.*\tvfcmp\\.cor\\.s\t\\\$vr2,\\\$vr0,\\\$vr1.*-compare_quiet_ordered\n" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
 new file mode 100644
 index 000000000..d8017caaa
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-d.c
@@ -0,0 +1,29 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2 -fno-vect-cost-model" } */
 +
 +#define F double
 +#define I long long
 +#define VL 32
 +
 +#include "vfcmp-f.c"
 +
 +/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.d\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
 new file mode 100644
 index 000000000..b54556475
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/xvfcmp-f.c
@@ -0,0 +1,27 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -ffixed-f0 -ffixed-f1 -ffixed-f2" } */
 +
 +#define VL 32
 +
 +#include "vfcmp-f.c"
 +
 +/* { dg-final { scan-assembler "compare_quiet_equal:.*\txvfcmp\\.ceq\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_equal:.*\txvfcmp\\.cune\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less:.*\txvfcmp\\.slt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_equal:.*\txvfcmp\\.sle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_greater:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_less_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_signaling_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_not_less:.*\txvfcmp\\.sule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_signaling_greater_unordered:.*\txvfcmp\\.sult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_signaling_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater:.*\txvfcmp\\.clt\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_equal:.*\txvfcmp\\.cle\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_equal\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_less:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_not_less\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_greater_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr1,\\\$xr0.*-compare_quiet_greater_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_not_greater:.*\txvfcmp\\.cule\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_not_greater\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_less_unordered:.*\txvfcmp\\.cult\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_less_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_unordered:.*\txvfcmp\\.cun\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_unordered\n" } } */
 +/* { dg-final { scan-assembler "compare_quiet_ordered:.*\txvfcmp\\.cor\\.s\t\\\$xr2,\\\$xr0,\\\$xr1.*-compare_quiet_ordered\n" } } */
 -- 
 2.43.0
--- a/0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
+++ b/0076-LoongArch-Use-force_reg-instead-of-gen_reg_rtx-emit_.patch
@ -0,0 +1,190 @@
 From be149d7f6527df6b16f3f9f8aec1e488466a71f1 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 19 Dec 2023 04:48:03 +0800
 Subject: [PATCH 076/188] LoongArch: Use force_reg instead of gen_reg_rtx +
 emit_move_insn in vec_init expander [PR113033]
 Jakub says:
    Then that seems like a bug in the loongarch vec_init pattern(s).
    Those really don't have a predicate in any of the backends on the
    input operand, so they need to force_reg it if it is something it
    can't handle. I've looked e.g. at i386 vec_init and that is exactly
    what it does, see the various tests + force_reg calls in
    ix86_expand_vector_init*.
 So replace gen_reg_rtx + emit_move_insn with force_reg to fix PR 113033.
 gcc/ChangeLog:
 	PR target/113033
 	* config/loongarch/loongarch.cc
 	(loongarch_expand_vector_init_same): Replace gen_reg_rtx +
 	emit_move_insn with force_reg.
 	(loongarch_expand_vector_init): Likewise.
 gcc/testsuite/ChangeLog:
 	PR target/113033
 	* gcc.target/loongarch/pr113033.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             | 38 ++++++-------------
 gcc/testsuite/gcc.target/loongarch/pr113033.c | 23 +++++++++++
 2 files changed, 35 insertions(+), 26 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113033.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index a22601d88..000d2d623 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -10745,7 +10745,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
 	  gcc_unreachable ();
 	}
     }
 -  temp = gen_reg_rtx (imode);
 +
   if (imode == GET_MODE (same))
     temp2 = same;
   else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
@@ -10770,7 +10770,8 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
       else
 	temp2 = lowpart_subreg (imode, same, GET_MODE (same));
     }
 -  emit_move_insn (temp, temp2);
 +
 +  temp = force_reg (imode, temp2);
   switch (vmode)
     {
@@ -10992,35 +10993,29 @@ loongarch_expand_vector_init (rtx target, rtx vals)
 			 to reduce the number of instructions.  */
 		      if (i == 1)
 			{
 -			  op0 = gen_reg_rtx (imode);
 -			  emit_move_insn (op0, val_hi[0]);
 -			  op1 = gen_reg_rtx (imode);
 -			  emit_move_insn (op1, val_hi[1]);
 +			  op0 = force_reg (imode, val_hi[0]);
 +			  op1 = force_reg (imode, val_hi[1]);
 			  emit_insn (
 			    loongarch_vec_repl2_256 (target_hi, op0, op1));
 			}
 		      else if (i > 1)
 			{
 -			  op0 = gen_reg_rtx (imode);
 -			  emit_move_insn (op0, val_hi[i]);
 +			  op0 = force_reg (imode, val_hi[i]);
 			  emit_insn (
 			    loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
 			}
 		    }
 		  else
 		    {
 +		      op0 = force_reg (imode, val_hi[i]);
 		      /* Assign the lowest element of val_hi to all elements
 			 of target_hi.  */
 		      if (i == 0)
 			{
 -			  op0 = gen_reg_rtx (imode);
 -			  emit_move_insn (op0, val_hi[0]);
 			  emit_insn (loongarch_vec_repl1_256 (target_hi, op0));
 			}
 		      else if (!rtx_equal_p (val_hi[i], val_hi[0]))
 			{
 -			  op0 = gen_reg_rtx (imode);
 -			  emit_move_insn (op0, val_hi[i]);
 			  emit_insn (
 			    loongarch_vec_set256 (target_hi, op0, GEN_INT (i)));
 			}
@@ -11028,18 +11023,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
 		}
 	      if (!lo_same && !half_same)
 		{
 +		  op0 = force_reg (imode, val_lo[i]);
 		  /* Assign the lowest element of val_lo to all elements
 		     of target_lo.  */
 		  if (i == 0)
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val_lo[0]);
 		      emit_insn (loongarch_vec_repl1_128 (target_lo, op0));
 		    }
 		  else if (!rtx_equal_p (val_lo[i], val_lo[0]))
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val_lo[i]);
 		      emit_insn (
 			loongarch_vec_set128 (target_lo, op0, GEN_INT (i)));
 		    }
@@ -11071,16 +11063,13 @@ loongarch_expand_vector_init (rtx target, rtx vals)
 		     reduce the number of instructions.  */
 		  if (i == 1)
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val[0]);
 -		      op1 = gen_reg_rtx (imode);
 -		      emit_move_insn (op1, val[1]);
 +		      op0 = force_reg (imode, val[0]);
 +		      op1 = force_reg (imode, val[1]);
 		      emit_insn (loongarch_vec_repl2_128 (target, op0, op1));
 		    }
 		  else if (i > 1)
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val[i]);
 +		      op0 = force_reg (imode, val[i]);
 		      emit_insn (
 			loongarch_vec_set128 (target, op0, GEN_INT (i)));
 		    }
@@ -11093,18 +11082,15 @@ loongarch_expand_vector_init (rtx target, rtx vals)
 			loongarch_vec_mirror (target, target, const0_rtx));
 		      return;
 		    }
 +		  op0 = force_reg (imode, val[i]);
 		  /* Assign the lowest element of val to all elements of
 		     target.  */
 		  if (i == 0)
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val[0]);
 		      emit_insn (loongarch_vec_repl1_128 (target, op0));
 		    }
 		  else if (!rtx_equal_p (val[i], val[0]))
 		    {
 -		      op0 = gen_reg_rtx (imode);
 -		      emit_move_insn (op0, val[i]);
 		      emit_insn (
 			loongarch_vec_set128 (target, op0, GEN_INT (i)));
 		    }
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr113033.c b/gcc/testsuite/gcc.target/loongarch/pr113033.c
 new file mode 100644
 index 000000000..4ccd037d8
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr113033.c
@@ -0,0 +1,23 @@
 +/* PR target/113033: ICE with vector left rotate */
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx" } */
 +
 +typedef unsigned __attribute__ ((vector_size (16))) v4si;
 +typedef unsigned __attribute__ ((vector_size (32))) v8si;
 +typedef unsigned long long __attribute__ ((vector_size (16))) v2di;
 +typedef unsigned long long __attribute__ ((vector_size (32))) v4di;
 +
 +#define TEST(tp) \
 +extern tp data_##tp; \
 +tp \
 +test_##tp (int x) \
 +{ \
 +  const int bit = sizeof (data_##tp[0]) * __CHAR_BIT__; \
 +  data_##tp = data_##tp << (x & (bit - 1)) \
 +	      | data_##tp >> (bit - x & (bit - 1)); \
 +}
 +
 +TEST (v4si)
 +TEST (v8si)
 +TEST (v2di)
 +TEST (v4di)
 -- 
 2.43.0
--- a/0077-LoongArch-Clean-up-vec_init-expander.patch
+++ b/0077-LoongArch-Clean-up-vec_init-expander.patch
@ -0,0 +1,83 @@
 From 38438021c770f077b78092299f22712fdd734814 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 19 Dec 2023 05:02:42 +0800
 Subject: [PATCH 077/188] LoongArch: Clean up vec_init expander
 Non functional change, clean up the code.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.cc
 	(loongarch_expand_vector_init_same): Remove "temp2" and reuse
 	"temp" instead.
 	(loongarch_expand_vector_init): Use gcc_unreachable () instead
 	of gcc_assert (0), and fix the comment for it.
 ---
 gcc/config/loongarch/loongarch.cc | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 000d2d623..3aeafeafd 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -10723,7 +10723,7 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
   machine_mode vmode = GET_MODE (target);
   machine_mode imode = GET_MODE_INNER (vmode);
   rtx same = XVECEXP (vals, 0, 0);
 -  rtx temp, temp2;
 +  rtx temp;
   if (CONST_INT_P (same) && nvar == 0
       && loongarch_signed_immediate_p (INTVAL (same), 10, 0))
@@ -10747,17 +10747,17 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
     }
   if (imode == GET_MODE (same))
 -    temp2 = same;
 +    temp = same;
   else if (GET_MODE_SIZE (imode) >= UNITS_PER_WORD)
     {
       if (GET_CODE (same) == MEM)
 	{
 	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
 	  loongarch_emit_move (reg_tmp, same);
 -	  temp2 = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
 +	  temp = simplify_gen_subreg (imode, reg_tmp, GET_MODE (reg_tmp), 0);
 	}
       else
 -	temp2 = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
 +	temp = simplify_gen_subreg (imode, same, GET_MODE (same), 0);
     }
   else
     {
@@ -10765,13 +10765,13 @@ loongarch_expand_vector_init_same (rtx target, rtx vals, unsigned nvar)
 	{
 	  rtx reg_tmp = gen_reg_rtx (GET_MODE (same));
 	  loongarch_emit_move (reg_tmp, same);
 -	  temp2 = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
 +	  temp = lowpart_subreg (imode, reg_tmp, GET_MODE (reg_tmp));
 	}
       else
 -	temp2 = lowpart_subreg (imode, same, GET_MODE (same));
 +	temp = lowpart_subreg (imode, same, GET_MODE (same));
     }
 -  temp = force_reg (imode, temp2);
 +  temp = force_reg (imode, temp);
   switch (vmode)
     {
@@ -11117,8 +11117,8 @@ loongarch_expand_vector_init (rtx target, rtx vals)
       return;
     }
 -  /* Loongson is the only cpu with vectors with more elements.  */
 -  gcc_assert (0);
 +  /* No LoongArch CPU supports vectors with more elements as at now.  */
 +  gcc_unreachable ();
 }
 /* Implement HARD_REGNO_CALLER_SAVE_MODE.  */
 -- 
 2.43.0
--- a/0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch
+++ b/0078-LoongArch-Fix-incorrect-code-generation-for-sad-patt.patch
@ -0,0 +1,78 @@
 From e5c0e4b416b8628585e27b524ba524261cacf713 Mon Sep 17 00:00:00 2001
 From: Jiahao Xu <xujiahao@loongson.cn>
 Date: Thu, 14 Dec 2023 20:49:04 +0800
 Subject: [PATCH 078/188] LoongArch: Fix incorrect code generation for sad
 pattern
 When I attempt to enable vect_usad_char effective target for LoongArch, slp-reduc-sad.c
 and vect-reduc-sad*.c tests fail. These tests fail because the sad pattern generates bad
 code. This patch to fixed them, for sad patterns, use zero expansion instead of sign
 expansion for reduction.
 Currently, we are fixing failed vectorized tests, and in the future, we will
 enable more tests of "vect" for LoongArch.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md: Use zero expansion instruction.
 	* config/loongarch/lsx.md: Ditto.
 ---
 gcc/config/loongarch/lasx.md | 8 ++++----
 gcc/config/loongarch/lsx.md  | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 921ce0eeb..9ca3f9278 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -5021,8 +5021,8 @@
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
   emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
 -  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
 -  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
 +  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
   DONE;
 })
@@ -5038,8 +5038,8 @@
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
   emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
 -  emit_insn (gen_lasx_xvhaddw_h_b (t2, t1, t1));
 -  emit_insn (gen_lasx_xvhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
 +  emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
   DONE;
 })
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 57e0ee3d4..7f5fff40a 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -3385,8 +3385,8 @@
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
   emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
 -  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
 -  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
 +  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
   DONE;
 })
@@ -3402,8 +3402,8 @@
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
   emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
 -  emit_insn (gen_lsx_vhaddw_h_b (t2, t1, t1));
 -  emit_insn (gen_lsx_vhaddw_w_h (t3, t2, t2));
 +  emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
 +  emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
   DONE;
 })
 -- 
 2.43.0
--- a/0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch
+++ b/0079-LoongArch-Modify-the-check-type-of-the-vector-builti.patch
@ -0,0 +1,68 @@
 From bedb0338fadc373eeafc418a7bf6395d37eec78c Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Wed, 13 Dec 2023 09:31:07 +0800
 Subject: [PATCH 079/188] LoongArch: Modify the check type of the vector
 builtin function.
 On LoongArch architecture, using the latest gcc14 in regression test,
 it is found that the vector test cases in vector directory appear FAIL
 entries with unmatched pointer types. In order to solve this kind of
 problem, the type of the variable in the check result is modified with
 the parameter type defined in the vector builtin function.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/simd_correctness_check.h:The variable
 	types in the check results are modified in conjunction with the
 	parameter types defined in the vector builtin function.
 ---
 .../loongarch/vector/simd_correctness_check.h       | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
 index eb7fbd59c..551340bd5 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
@@ -8,11 +8,12 @@
       int fail = 0;                                                           \
       for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
         {                                                                     \
 -          long *temp_ref = &ref[i], *temp_res = &res[i];                      \
 +          long long *temp_ref = (long long *)&ref[i],                         \
 +		*temp_res = (long long *)&res[i];			      \
           if (abs (*temp_ref - *temp_res) > 0)                                \
             {                                                                 \
               printf (" error: %s at line %ld , expected " #ref               \
 -                      "[%ld]:0x%lx, got: 0x%lx\n",                            \
 +                      "[%ld]:0x%016lx, got: 0x%016lx\n",                      \
                       __FILE__, line, i, *temp_ref, *temp_res);               \
               fail = 1;                                                       \
             }                                                                 \
@@ -28,11 +29,11 @@
       int fail = 0;                                                           \
       for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
         {                                                                     \
 -          int *temp_ref = &ref[i], *temp_res = &res[i];                       \
 +          int *temp_ref = (int *)&ref[i], *temp_res = (int *)&res[i];         \
           if (abs (*temp_ref - *temp_res) > 0)                                \
             {                                                                 \
               printf (" error: %s at line %ld , expected " #ref               \
 -                      "[%ld]:0x%x, got: 0x%x\n",                              \
 +                      "[%ld]:0x%08x, got: 0x%08x\n",                          \
                       __FILE__, line, i, *temp_ref, *temp_res);               \
               fail = 1;                                                       \
             }                                                                 \
@@ -47,8 +48,8 @@
     {                                                                         \
       if (ref != res)                                                         \
         {                                                                     \
 -          printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
 -                  line, ref, res);                                            \
 +          printf (" error: %s at line %ld , expected 0x:%016x",               \
 +		  "got 0x:%016x\n", __FILE__, line, ref, res);                \
         }                                                                     \
     }                                                                         \
   while (0)
 -- 
 2.43.0
--- a/0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
+++ b/0080-LoongArch-extend.texi-Fix-typos-in-LSX-intrinsics.patch
@ -0,0 +1,250 @@
 From 2e0092b20b845e0e301b1dab177b338e35981f10 Mon Sep 17 00:00:00 2001
 From: Jiajie Chen <c@jia.je>
 Date: Wed, 13 Dec 2023 23:26:01 +0800
 Subject: [PATCH 080/188] LoongArch: extend.texi: Fix typos in LSX intrinsics
 Several typos have been found and fixed: missing semicolons, using
 variable name instead of type, duplicate functions and wrong types.
 gcc/ChangeLog:
 	* doc/extend.texi(__lsx_vabsd_di): remove extra `i' in name.
 	(__lsx_vfrintrm_d, __lsx_vfrintrm_s, __lsx_vfrintrne_d,
 	__lsx_vfrintrne_s, __lsx_vfrintrp_d, __lsx_vfrintrp_s, __lsx_vfrintrz_d,
 	__lsx_vfrintrz_s): fix return types.
 	(__lsx_vld, __lsx_vldi, __lsx_vldrepl_b, __lsx_vldrepl_d,
 	__lsx_vldrepl_h, __lsx_vldrepl_w, __lsx_vmaxi_b, __lsx_vmaxi_d,
 	__lsx_vmaxi_h, __lsx_vmaxi_w, __lsx_vmini_b, __lsx_vmini_d,
 	__lsx_vmini_h, __lsx_vmini_w, __lsx_vsrani_d_q, __lsx_vsrarni_d_q,
 	__lsx_vsrlni_d_q, __lsx_vsrlrni_d_q, __lsx_vssrani_d_q,
 	__lsx_vssrarni_d_q, __lsx_vssrarni_du_q, __lsx_vssrlni_d_q,
 	__lsx_vssrlrni_du_q, __lsx_vst, __lsx_vstx, __lsx_vssrani_du_q,
 	__lsx_vssrlni_du_q, __lsx_vssrlrni_d_q): add missing semicolon.
 	(__lsx_vpickve2gr_bu, __lsx_vpickve2gr_hu): fix typo in return
 	type.
 	(__lsx_vstelm_b, __lsx_vstelm_d, __lsx_vstelm_h,
 	__lsx_vstelm_w): use imm type for the last argument.
 	(__lsx_vsigncov_b, __lsx_vsigncov_h, __lsx_vsigncov_w,
 	__lsx_vsigncov_d): remove duplicate definitions.
 ---
 gcc/doc/extend.texi | 90 ++++++++++++++++++++++-----------------------
 1 file changed, 43 insertions(+), 47 deletions(-)
 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index bb042ae78..ac8da4e80 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
@@ -16392,7 +16392,7 @@ int __lsx_bz_v (__m128i);
 int __lsx_bz_w (__m128i);
 __m128i __lsx_vabsd_b (__m128i, __m128i);
 __m128i __lsx_vabsd_bu (__m128i, __m128i);
 -__m128i __lsx_vabsd_di (__m128i, __m128i);
 +__m128i __lsx_vabsd_d (__m128i, __m128i);
 __m128i __lsx_vabsd_du (__m128i, __m128i);
 __m128i __lsx_vabsd_h (__m128i, __m128i);
 __m128i __lsx_vabsd_hu (__m128i, __m128i);
@@ -16598,14 +16598,14 @@ __m128 __lsx_vfnmsub_s (__m128, __m128, __m128);
 __m128d __lsx_vfrecip_d (__m128d);
 __m128 __lsx_vfrecip_s (__m128);
 __m128d __lsx_vfrint_d (__m128d);
 -__m128i __lsx_vfrintrm_d (__m128d);
 -__m128i __lsx_vfrintrm_s (__m128);
 -__m128i __lsx_vfrintrne_d (__m128d);
 -__m128i __lsx_vfrintrne_s (__m128);
 -__m128i __lsx_vfrintrp_d (__m128d);
 -__m128i __lsx_vfrintrp_s (__m128);
 -__m128i __lsx_vfrintrz_d (__m128d);
 -__m128i __lsx_vfrintrz_s (__m128);
 +__m128d __lsx_vfrintrm_d (__m128d);
 +__m128 __lsx_vfrintrm_s (__m128);
 +__m128d __lsx_vfrintrne_d (__m128d);
 +__m128 __lsx_vfrintrne_s (__m128);
 +__m128d __lsx_vfrintrp_d (__m128d);
 +__m128 __lsx_vfrintrp_s (__m128);
 +__m128d __lsx_vfrintrz_d (__m128d);
 +__m128 __lsx_vfrintrz_s (__m128);
 __m128 __lsx_vfrint_s (__m128);
 __m128d __lsx_vfrsqrt_d (__m128d);
 __m128 __lsx_vfrsqrt_s (__m128);
@@ -16674,12 +16674,12 @@ __m128i __lsx_vinsgr2vr_b (__m128i, int, imm0_15);
 __m128i __lsx_vinsgr2vr_d (__m128i, long int, imm0_1);
 __m128i __lsx_vinsgr2vr_h (__m128i, int, imm0_7);
 __m128i __lsx_vinsgr2vr_w (__m128i, int, imm0_3);
 -__m128i __lsx_vld (void *, imm_n2048_2047)
 -__m128i __lsx_vldi (imm_n1024_1023)
 -__m128i __lsx_vldrepl_b (void *, imm_n2048_2047)
 -__m128i __lsx_vldrepl_d (void *, imm_n256_255)
 -__m128i __lsx_vldrepl_h (void *, imm_n1024_1023)
 -__m128i __lsx_vldrepl_w (void *, imm_n512_511)
 +__m128i __lsx_vld (void *, imm_n2048_2047);
 +__m128i __lsx_vldi (imm_n1024_1023);
 +__m128i __lsx_vldrepl_b (void *, imm_n2048_2047);
 +__m128i __lsx_vldrepl_d (void *, imm_n256_255);
 +__m128i __lsx_vldrepl_h (void *, imm_n1024_1023);
 +__m128i __lsx_vldrepl_w (void *, imm_n512_511);
 __m128i __lsx_vldx (void *, long int);
 __m128i __lsx_vmadd_b (__m128i, __m128i, __m128i);
 __m128i __lsx_vmadd_d (__m128i, __m128i, __m128i);
@@ -16715,13 +16715,13 @@ __m128i __lsx_vmax_d (__m128i, __m128i);
 __m128i __lsx_vmax_du (__m128i, __m128i);
 __m128i __lsx_vmax_h (__m128i, __m128i);
 __m128i __lsx_vmax_hu (__m128i, __m128i);
 -__m128i __lsx_vmaxi_b (__m128i, imm_n16_15)
 +__m128i __lsx_vmaxi_b (__m128i, imm_n16_15);
 __m128i __lsx_vmaxi_bu (__m128i, imm0_31);
 -__m128i __lsx_vmaxi_d (__m128i, imm_n16_15)
 +__m128i __lsx_vmaxi_d (__m128i, imm_n16_15);
 __m128i __lsx_vmaxi_du (__m128i, imm0_31);
 -__m128i __lsx_vmaxi_h (__m128i, imm_n16_15)
 +__m128i __lsx_vmaxi_h (__m128i, imm_n16_15);
 __m128i __lsx_vmaxi_hu (__m128i, imm0_31);
 -__m128i __lsx_vmaxi_w (__m128i, imm_n16_15)
 +__m128i __lsx_vmaxi_w (__m128i, imm_n16_15);
 __m128i __lsx_vmaxi_wu (__m128i, imm0_31);
 __m128i __lsx_vmax_w (__m128i, __m128i);
 __m128i __lsx_vmax_wu (__m128i, __m128i);
@@ -16731,13 +16731,13 @@ __m128i __lsx_vmin_d (__m128i, __m128i);
 __m128i __lsx_vmin_du (__m128i, __m128i);
 __m128i __lsx_vmin_h (__m128i, __m128i);
 __m128i __lsx_vmin_hu (__m128i, __m128i);
 -__m128i __lsx_vmini_b (__m128i, imm_n16_15)
 +__m128i __lsx_vmini_b (__m128i, imm_n16_15);
 __m128i __lsx_vmini_bu (__m128i, imm0_31);
 -__m128i __lsx_vmini_d (__m128i, imm_n16_15)
 +__m128i __lsx_vmini_d (__m128i, imm_n16_15);
 __m128i __lsx_vmini_du (__m128i, imm0_31);
 -__m128i __lsx_vmini_h (__m128i, imm_n16_15)
 +__m128i __lsx_vmini_h (__m128i, imm_n16_15);
 __m128i __lsx_vmini_hu (__m128i, imm0_31);
 -__m128i __lsx_vmini_w (__m128i, imm_n16_15)
 +__m128i __lsx_vmini_w (__m128i, imm_n16_15);
 __m128i __lsx_vmini_wu (__m128i, imm0_31);
 __m128i __lsx_vmin_w (__m128i, __m128i);
 __m128i __lsx_vmin_wu (__m128i, __m128i);
@@ -16826,11 +16826,11 @@ __m128i __lsx_vpickod_d (__m128i, __m128i);
 __m128i __lsx_vpickod_h (__m128i, __m128i);
 __m128i __lsx_vpickod_w (__m128i, __m128i);
 int __lsx_vpickve2gr_b (__m128i, imm0_15);
 -unsinged int __lsx_vpickve2gr_bu (__m128i, imm0_15);
 +unsigned int __lsx_vpickve2gr_bu (__m128i, imm0_15);
 long int __lsx_vpickve2gr_d (__m128i, imm0_1);
 unsigned long int __lsx_vpickve2gr_du (__m128i, imm0_1);
 int __lsx_vpickve2gr_h (__m128i, imm0_7);
 -unsinged int __lsx_vpickve2gr_hu (__m128i, imm0_7);
 +unsigned int __lsx_vpickve2gr_hu (__m128i, imm0_7);
 int __lsx_vpickve2gr_w (__m128i, imm0_3);
 unsigned int __lsx_vpickve2gr_wu (__m128i, imm0_3);
 __m128i __lsx_vreplgr2vr_b (int);
@@ -16893,10 +16893,6 @@ __m128i __lsx_vsigncov_b (__m128i, __m128i);
 __m128i __lsx_vsigncov_d (__m128i, __m128i);
 __m128i __lsx_vsigncov_h (__m128i, __m128i);
 __m128i __lsx_vsigncov_w (__m128i, __m128i);
 -__m128i __lsx_vsigncov_b (__m128i, __m128i);
 -__m128i __lsx_vsigncov_d (__m128i, __m128i);
 -__m128i __lsx_vsigncov_h (__m128i, __m128i);
 -__m128i __lsx_vsigncov_w (__m128i, __m128i);
 __m128i __lsx_vsle_b (__m128i, __m128i);
 __m128i __lsx_vsle_bu (__m128i, __m128i);
 __m128i __lsx_vsle_d (__m128i, __m128i);
@@ -16953,7 +16949,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31);
 __m128i __lsx_vsran_b_h (__m128i, __m128i);
 __m128i __lsx_vsran_h_w (__m128i, __m128i);
 __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63);
 __m128i __lsx_vsran_w_d (__m128i, __m128i);
@@ -16967,7 +16963,7 @@ __m128i __lsx_vsrari_w (__m128i, imm0_31);
 __m128i __lsx_vsrarn_b_h (__m128i, __m128i);
 __m128i __lsx_vsrarn_h_w (__m128i, __m128i);
 __m128i __lsx_vsrarni_b_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vsrarni_d_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vsrarni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vsrarni_w_d (__m128i, __m128i, imm0_63);
 __m128i __lsx_vsrarn_w_d (__m128i, __m128i);
@@ -16983,7 +16979,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31);
 __m128i __lsx_vsrln_b_h (__m128i, __m128i);
 __m128i __lsx_vsrln_h_w (__m128i, __m128i);
 __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63);
 __m128i __lsx_vsrln_w_d (__m128i, __m128i);
@@ -16997,7 +16993,7 @@ __m128i __lsx_vsrlri_w (__m128i, imm0_31);
 __m128i __lsx_vsrlrn_b_h (__m128i, __m128i);
 __m128i __lsx_vsrlrn_h_w (__m128i, __m128i);
 __m128i __lsx_vsrlrni_b_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vsrlrni_d_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vsrlrni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vsrlrni_w_d (__m128i, __m128i, imm0_63);
 __m128i __lsx_vsrlrn_w_d (__m128i, __m128i);
@@ -17009,8 +17005,8 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i);
 __m128i __lsx_vssran_h_w (__m128i, __m128i);
 __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15);
 __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127)
 -__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127);
 +__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrani_w_d (__m128i, __m128i, imm0_63);
@@ -17023,8 +17019,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i);
 __m128i __lsx_vssrarn_h_w (__m128i, __m128i);
 __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15);
 __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127)
 -__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127);
 +__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63);
@@ -17037,8 +17033,8 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i);
 __m128i __lsx_vssrln_h_w (__m128i, __m128i);
 __m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15);
 __m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127)
 -__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127);
 +__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrlni_w_d (__m128i, __m128i, imm0_63);
@@ -17051,8 +17047,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i);
 __m128i __lsx_vssrlrn_h_w (__m128i, __m128i);
 __m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15);
 __m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15);
 -__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127)
 -__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127)
 +__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127);
 +__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127);
 __m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31);
 __m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63);
@@ -17067,12 +17063,12 @@ __m128i __lsx_vssub_h (__m128i, __m128i);
 __m128i __lsx_vssub_hu (__m128i, __m128i);
 __m128i __lsx_vssub_w (__m128i, __m128i);
 __m128i __lsx_vssub_wu (__m128i, __m128i);
 -void __lsx_vst (__m128i, void *, imm_n2048_2047)
 -void __lsx_vstelm_b (__m128i, void *, imm_n128_127, idx);
 -void __lsx_vstelm_d (__m128i, void *, imm_n128_127, idx);
 -void __lsx_vstelm_h (__m128i, void *, imm_n128_127, idx);
 -void __lsx_vstelm_w (__m128i, void *, imm_n128_127, idx);
 -void __lsx_vstx (__m128i, void *, long int)
 +void __lsx_vst (__m128i, void *, imm_n2048_2047);
 +void __lsx_vstelm_b (__m128i, void *, imm_n128_127, imm0_15);
 +void __lsx_vstelm_d (__m128i, void *, imm_n128_127, imm0_1);
 +void __lsx_vstelm_h (__m128i, void *, imm_n128_127, imm0_7);
 +void __lsx_vstelm_w (__m128i, void *, imm_n128_127, imm0_3);
 +void __lsx_vstx (__m128i, void *, long int);
 __m128i __lsx_vsub_b (__m128i, __m128i);
 __m128i __lsx_vsub_d (__m128i, __m128i);
 __m128i __lsx_vsub_h (__m128i, __m128i);
 -- 
 2.43.0
--- a/0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch
+++ b/0081-LoongArch-Fix-builtin-function-prototypes-for-LASX-i.patch
@ -0,0 +1,60 @@
 From d9965ed8d9f4244ac1948c6fb92c7c0f7d80b3a4 Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Tue, 19 Dec 2023 16:43:17 +0800
 Subject: [PATCH 081/188] LoongArch: Fix builtin function prototypes for LASX
 in doc.
 gcc/ChangeLog:
 	* doc/extend.texi:According to the documents submitted earlier,
 	Two problems with function return types and using the actual types
 	of parameters instead of variable names were found and fixed.
 ---
 gcc/doc/extend.texi | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)
 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index ac8da4e80..c793c9c5d 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
@@ -17438,14 +17438,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256);
 __m256d __lasx_xvfrecip_d (__m256d);
 __m256 __lasx_xvfrecip_s (__m256);
 __m256d __lasx_xvfrint_d (__m256d);
 -__m256i __lasx_xvfrintrm_d (__m256d);
 -__m256i __lasx_xvfrintrm_s (__m256);
 -__m256i __lasx_xvfrintrne_d (__m256d);
 -__m256i __lasx_xvfrintrne_s (__m256);
 -__m256i __lasx_xvfrintrp_d (__m256d);
 -__m256i __lasx_xvfrintrp_s (__m256);
 -__m256i __lasx_xvfrintrz_d (__m256d);
 -__m256i __lasx_xvfrintrz_s (__m256);
 +__m256d __lasx_xvfrintrm_d (__m256d);
 +__m256 __lasx_xvfrintrm_s (__m256);
 +__m256d __lasx_xvfrintrne_d (__m256d);
 +__m256 __lasx_xvfrintrne_s (__m256);
 +__m256d __lasx_xvfrintrp_d (__m256d);
 +__m256 __lasx_xvfrintrp_s (__m256);
 +__m256d __lasx_xvfrintrz_d (__m256d);
 +__m256 __lasx_xvfrintrz_s (__m256);
 __m256 __lasx_xvfrint_s (__m256);
 __m256d __lasx_xvfrsqrt_d (__m256d);
 __m256 __lasx_xvfrsqrt_s (__m256);
@@ -17912,10 +17912,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i);
 __m256i __lasx_xvssub_w (__m256i, __m256i);
 __m256i __lasx_xvssub_wu (__m256i, __m256i);
 void __lasx_xvst (__m256i, void *, imm_n2048_2047);
 -void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx);
 -void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx);
 -void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx);
 -void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx);
 +void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_31);
 +void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_3);
 +void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_15);
 +void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_7);
 void __lasx_xvstx (__m256i, void *, long int);
 __m256i __lasx_xvsub_b (__m256i, __m256i);
 __m256i __lasx_xvsub_d (__m256i, __m256i);
 -- 
 2.43.0
--- a/0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch
+++ b/0082-LoongArch-Add-asm-modifiers-to-the-LSX-and-LASX-dire.patch
@ -0,0 +1,92 @@
 From 48f0d47eb6dc2c799c845a25cfabd586bd176378 Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Tue, 5 Dec 2023 14:44:35 +0800
 Subject: [PATCH 082/188] LoongArch: Add asm modifiers to the LSX and LASX
 directives in the doc.
 gcc/ChangeLog:
 	* doc/extend.texi:Add modifiers to the vector of asm in the doc.
 	* doc/md.texi:Refine the description of the modifier 'f' in the doc.
 ---
 gcc/doc/extend.texi | 46 +++++++++++++++++++++++++++++++++++++++++++++
 gcc/doc/md.texi     |  2 +-
 2 files changed, 47 insertions(+), 1 deletion(-)
 diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
 index c793c9c5d..bcb9329c2 100644
 --- a/gcc/doc/extend.texi
 +++ b/gcc/doc/extend.texi
@@ -11424,10 +11424,56 @@ The list below describes the supported modifiers and their effects for LoongArch
 @item @code{d} @tab Same as @code{c}.
 @item @code{i} @tab Print the character ''@code{i}'' if the operand is not a register.
 @item @code{m} @tab Same as @code{c}, but the printed value is @code{operand - 1}.
 +@item @code{u} @tab Print a LASX register.
 +@item @code{w} @tab Print a LSX register.
 @item @code{X} @tab Print a constant integer operand in hexadecimal.
 @item @code{z} @tab Print the operand in its unmodified form, followed by a comma.
 @end multitable
 +References to input and output operands in the assembler template of extended
 +asm statements can use modifiers to affect the way the operands are formatted
 +in the code output to the assembler.  For example, the following code uses the
 +'w' modifier for LoongArch:
 +
 +@example
 +test-asm.c:
 +
 +#include <lsxintrin.h>
 +
 +__m128i foo (void)
 +@{
 +__m128i  a,b,c;
 +__asm__ ("vadd.d %w0,%w1,%w2\n\t"
 +   :"=f" (c)
 +   :"f" (a),"f" (b));
 +
 +return c;
 +@}
 +
 +@end example
 +
 +@noindent
 +The compile command for the test case is as follows:
 +
 +@example
 +gcc test-asm.c -mlsx -S -o test-asm.s
 +@end example
 +
 +@noindent
 +The assembly statement produces the following assembly code:
 +
 +@example
 +vadd.d $vr0,$vr0,$vr1
 +@end example
 +
 +This is a 128-bit vector addition instruction, @code{c} (referred to in the
 +template string as %0) is the output, and @code{a} (%1) and @code{b} (%2) are
 +the inputs.  @code{__m128i} is a vector data type defined in the  file
 +@code{lsxintrin.h} (@xref{LoongArch SX Vector Intrinsics}).  The symbol '=f'
 +represents a constraint using a floating-point register as an output type, and
 +the 'f' in the input operand represents a constraint using a floating-point
 +register operand, which can refer to the definition of a constraint
 +(@xref{Constraints}) in gcc.
 @lowersections
 @include md.texi
 diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
 index b58da0787..a2e839073 100644
 --- a/gcc/doc/md.texi
 +++ b/gcc/doc/md.texi
@@ -2750,7 +2750,7 @@ $r1h
 @item LoongArch---@file{config/loongarch/constraints.md}
 @table @code
 @item f
 -A floating-point register (if available).
 +A floating-point or vector register (if available).
 @item k
 A memory operand whose address is formed by a base register and
 (optionally scaled) index register.
 -- 
 2.43.0
--- a/0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
+++ b/0083-LoongArch-Implement-FCCmode-reload-and-cstore-ANYF-m.patch
@ -0,0 +1,392 @@
 From b199de440fc877efdd1dde90b5c1c5111e060c1b Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 15 Dec 2023 01:49:40 +0800
 Subject: [PATCH 083/188] LoongArch: Implement FCCmode reload and
 cstore<ANYF:mode>4
 We used a branch to load floating-point comparison results into GPR.
 This is very slow when the branch is not predictable.
 Implement movfcc so we can reload FCCmode into GPRs, FPRs, and MEM.
 Then implement cstore<ANYF:mode>4.
 gcc/ChangeLog:
 	* config/loongarch/loongarch-tune.h
 	(loongarch_rtx_cost_data::movcf2gr): New field.
 	(loongarch_rtx_cost_data::movcf2gr_): New method.
 	(loongarch_rtx_cost_data::use_movcf2gr): New method.
 	* config/loongarch/loongarch-def.cc
 	(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Set movcf2gr
 	to COSTS_N_INSNS (7) and movgr2cf to COSTS_N_INSNS (15), based
 	on timing on LA464.
 	(loongarch_cpu_rtx_cost_data): Set movcf2gr and movgr2cf to
 	COSTS_N_INSNS (1) for LA664.
 	(loongarch_rtx_cost_optimize_size): Set movcf2gr and movgr2cf to
 	COSTS_N_INSNS (1) + 1.
 	* config/loongarch/predicates.md (loongarch_fcmp_operator): New
 	predicate.
 	* config/loongarch/loongarch.md (movfcc): Change to
 	define_expand.
 	(movfcc_internal): New define_insn.
 	(fcc_to_<X:mode>): New define_insn.
 	(cstore<ANYF:mode>4): New define_expand.
 	* config/loongarch/loongarch.cc
 	(loongarch_hard_regno_mode_ok_uncached): Allow FCCmode in GPRs
 	and GPRs.
 	(loongarch_secondary_reload): Reload FCCmode via FPR and/or GPR.
 	(loongarch_emit_float_compare): Call gen_reg_rtx instead of
 	loongarch_allocate_fcc.
 	(loongarch_allocate_fcc): Remove.
 	(loongarch_move_to_gpr_cost): Handle FCC_REGS -> GR_REGS.
 	(loongarch_move_from_gpr_cost): Handle GR_REGS -> FCC_REGS.
 	(loongarch_register_move_cost): Handle FCC_REGS -> FCC_REGS,
 	FCC_REGS -> FP_REGS, and FP_REGS -> FCC_REGS.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/movcf2gr.c: New test.
 	* gcc.target/loongarch/movcf2gr-via-fr.c: New test.
 ---
 gcc/config/loongarch/loongarch-def.cc         | 13 +++-
 gcc/config/loongarch/loongarch-tune.h         | 15 +++-
 gcc/config/loongarch/loongarch.cc             | 70 ++++++++++++-------
 gcc/config/loongarch/loongarch.md             | 69 ++++++++++++++++--
 gcc/config/loongarch/predicates.md            |  4 ++
 .../gcc.target/loongarch/movcf2gr-via-fr.c    | 10 +++
 gcc/testsuite/gcc.target/loongarch/movcf2gr.c |  9 +++
 7 files changed, 157 insertions(+), 33 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/movcf2gr.c
 diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
 index 4a8885e83..843be78e4 100644
 --- a/gcc/config/loongarch/loongarch-def.cc
 +++ b/gcc/config/loongarch/loongarch-def.cc
@@ -101,15 +101,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
     int_mult_di (COSTS_N_INSNS (4)),
     int_div_si (COSTS_N_INSNS (5)),
     int_div_di (COSTS_N_INSNS (5)),
 +    movcf2gr (COSTS_N_INSNS (7)),
 +    movgr2cf (COSTS_N_INSNS (15)),
     branch_cost (6),
     memory_latency (4) {}
 /* The following properties cannot be looked up directly using "cpucfg".
  So it is necessary to provide a default value for "unknown native"
  tune targets (i.e. -mtune=native while PRID does not correspond to
 - any known "-mtune" type).  Currently all numbers are default.  */
 + any known "-mtune" type).  */
 array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
 -  array_tune<loongarch_rtx_cost_data> ();
 +  array_tune<loongarch_rtx_cost_data> ()
 +    .set (CPU_LA664,
 +	  loongarch_rtx_cost_data ()
 +	    .movcf2gr_ (COSTS_N_INSNS (1))
 +	    .movgr2cf_ (COSTS_N_INSNS (1)));
 /* RTX costs to use when optimizing for size.
    We use a value slightly larger than COSTS_N_INSNS (1) for all of them
@@ -125,7 +131,8 @@ const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
     .int_mult_si_ (COST_COMPLEX_INSN)
     .int_mult_di_ (COST_COMPLEX_INSN)
     .int_div_si_ (COST_COMPLEX_INSN)
 -    .int_div_di_ (COST_COMPLEX_INSN);
 +    .int_div_di_ (COST_COMPLEX_INSN)
 +    .movcf2gr_ (COST_COMPLEX_INSN);
 array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
   .set (CPU_NATIVE, 4)
 diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
 index 616b94e87..26f163f0a 100644
 --- a/gcc/config/loongarch/loongarch-tune.h
 +++ b/gcc/config/loongarch/loongarch-tune.h
@@ -35,6 +35,8 @@ struct loongarch_rtx_cost_data
   unsigned short int_mult_di;
   unsigned short int_div_si;
   unsigned short int_div_di;
 +  unsigned short movcf2gr;
 +  unsigned short movgr2cf;
   unsigned short branch_cost;
   unsigned short memory_latency;
@@ -95,6 +97,18 @@ struct loongarch_rtx_cost_data
     return *this;
   }
 +  loongarch_rtx_cost_data movcf2gr_ (unsigned short _movcf2gr)
 +  {
 +    movcf2gr = _movcf2gr;
 +    return *this;
 +  }
 +
 +  loongarch_rtx_cost_data movgr2cf_ (unsigned short _movgr2cf)
 +  {
 +    movgr2cf = _movgr2cf;
 +    return *this;
 +  }
 +
   loongarch_rtx_cost_data branch_cost_ (unsigned short _branch_cost)
   {
     branch_cost = _branch_cost;
@@ -106,7 +120,6 @@ struct loongarch_rtx_cost_data
     memory_latency = _memory_latency;
     return *this;
   }
 -
 };
 /* Costs to use when optimizing for size.  */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 3aeafeafd..56f631b1a 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -5119,29 +5119,6 @@ loongarch_zero_if_equal (rtx cmp0, rtx cmp1)
 		       OPTAB_DIRECT);
 }
 -/* Allocate a floating-point condition-code register of mode MODE.  */
 -
 -static rtx
 -loongarch_allocate_fcc (machine_mode mode)
 -{
 -  unsigned int regno, count;
 -
 -  gcc_assert (TARGET_HARD_FLOAT);
 -
 -  if (mode == FCCmode)
 -    count = 1;
 -  else
 -    gcc_unreachable ();
 -
 -  cfun->machine->next_fcc += -cfun->machine->next_fcc & (count - 1);
 -  if (cfun->machine->next_fcc > FCC_REG_LAST - FCC_REG_FIRST)
 -    cfun->machine->next_fcc = 0;
 -
 -  regno = FCC_REG_FIRST + cfun->machine->next_fcc;
 -  cfun->machine->next_fcc += count;
 -  return gen_rtx_REG (mode, regno);
 -}
 -
 /* Sign- or zero-extend OP0 and OP1 for integer comparisons.  */
 static void
@@ -5256,7 +5233,7 @@ loongarch_emit_float_compare (enum rtx_code *code, rtx *op0, rtx *op1)
      operands for FCMP.cond.fmt, instead a reversed condition code is
      required and a test for false.  */
   *code = NE;
 -  *op0 = loongarch_allocate_fcc (FCCmode);
 +  *op0 = gen_reg_rtx (FCCmode);
   *op1 = const0_rtx;
   loongarch_emit_binary (cmp_code, *op0, cmp_op0, cmp_op1);
@@ -6626,7 +6603,7 @@ loongarch_hard_regno_mode_ok_uncached (unsigned int regno, machine_mode mode)
   enum mode_class mclass;
   if (mode == FCCmode)
 -    return FCC_REG_P (regno);
 +    return FCC_REG_P (regno) || GP_REG_P (regno) || FP_REG_P (regno);
   size = GET_MODE_SIZE (mode);
   mclass = GET_MODE_CLASS (mode);
@@ -6841,6 +6818,9 @@ loongarch_move_to_gpr_cost (reg_class_t from)
       /* MOVFR2GR, etc.  */
       return 4;
 +    case FCC_REGS:
 +      return loongarch_cost->movcf2gr;
 +
     default:
       return 0;
     }
@@ -6863,6 +6843,9 @@ loongarch_move_from_gpr_cost (reg_class_t to)
       /* MOVGR2FR, etc.  */
       return 4;
 +    case FCC_REGS:
 +      return loongarch_cost->movgr2cf;
 +
     default:
       return 0;
     }
@@ -6897,6 +6880,10 @@ loongarch_register_move_cost (machine_mode mode, reg_class_t from,
   if (to == dregs)
     return loongarch_move_to_gpr_cost (from);
 +  /* fcc -> fcc, fcc -> fpr, or fpr -> fcc. */
 +  if (from == FCC_REGS || to == FCC_REGS)
 +    return COSTS_N_INSNS (from == to ? 2 : 1);
 +
   /* Handles cases that require a GPR temporary.  */
   cost1 = loongarch_move_to_gpr_cost (from);
   if (cost1 != 0)
@@ -6933,6 +6920,39 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
   regno = true_regnum (x);
 +  if (mode == FCCmode)
 +    {
 +      if (reg_class_subset_p (rclass, FCC_REGS) && !FP_REG_P (regno))
 +	{
 +	  if (FCC_REG_P (regno))
 +	    return FP_REGS;
 +
 +	  auto fn = in_p ? loongarch_move_from_gpr_cost
 +			 : loongarch_move_to_gpr_cost;
 +
 +	  if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
 +	    return FP_REGS;
 +
 +	  return GP_REG_P (regno) ? NO_REGS : GR_REGS;
 +	}
 +
 +      if (reg_class_subset_p (rclass, GR_REGS) && FCC_REG_P (regno))
 +	{
 +	  auto fn = in_p ? loongarch_move_to_gpr_cost
 +			 : loongarch_move_from_gpr_cost;
 +
 +	  if (fn (FCC_REGS) > fn (FP_REGS) + COSTS_N_INSNS (1))
 +	    return FP_REGS;
 +
 +	  return NO_REGS;
 +	}
 +
 +      if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x))
 +	return GR_REGS;
 +
 +      return NO_REGS;
 +    }
 +
   if (reg_class_subset_p (rclass, FP_REGS))
     {
       if (regno < 0
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 23368008e..6cf71d9e4 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2283,11 +2283,72 @@
 ;; Clear one FCC register
 -(define_insn "movfcc"
 -  [(set (match_operand:FCC 0 "register_operand" "=z")
 -	(const_int 0))]
 +(define_expand "movfcc"
 +  [(set (match_operand:FCC 0 "")
 +	(match_operand:FCC 1 ""))]
 +  "TARGET_HARD_FLOAT"
 +{
 +  if (memory_operand (operands[0], FCCmode)
 +      && memory_operand (operands[1], FCCmode))
 +    operands[1] = force_reg (FCCmode, operands[1]);
 +})
 +
 +(define_insn "movfcc_internal"
 +  [(set (match_operand:FCC 0 "nonimmediate_operand"
 +			     "=z,z,*f,*f,*r,*r,*m,*f,*r,z,*r")
 +	(match_operand:FCC 1 "reg_or_0_operand"
 +			     "J,*f,z,*f,J*r,*m,J*r,J*r,*f,*r,z"))]
 +  "TARGET_HARD_FLOAT"
 +  "@
 +   fcmp.caf.s\t%0,$f0,$f0
 +   movfr2cf\t%0,%1
 +   movcf2fr\t%0,%1
 +   fmov.s\t%0,%1
 +   or\t%0,%z1,$r0
 +   ld.b\t%0,%1
 +   st.b\t%z1,%0
 +   movgr2fr.w\t%0,%1
 +   movfr2gr.s\t%0,%1
 +   movgr2cf\t%0,%1
 +   movcf2gr\t%0,%1"
 +  [(set_attr "type" "move")
 +   (set_attr "mode" "FCC")])
 +
 +(define_insn "fcc_to_<X:mode>"
 +  [(set (match_operand:X 0 "register_operand" "=r")
 +	(if_then_else:X (ne (match_operand:FCC 1 "register_operand" "0")
 +			    (const_int 0))
 +			(const_int 1)
 +			(const_int 0)))]
 +  "TARGET_HARD_FLOAT"
   ""
 -  "fcmp.caf.s\t%0,$f0,$f0")
 +  [(set_attr "length" "0")
 +   (set_attr "type" "ghost")])
 +
 +(define_expand "cstore<ANYF:mode>4"
 +  [(set (match_operand:SI 0 "register_operand")
 +	(match_operator:SI 1 "loongarch_fcmp_operator"
 +	  [(match_operand:ANYF 2 "register_operand")
 +	   (match_operand:ANYF 3 "register_operand")]))]
 +  ""
 +  {
 +    rtx fcc = gen_reg_rtx (FCCmode);
 +    rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), FCCmode,
 +			      operands[2], operands[3]);
 +
 +    emit_insn (gen_rtx_SET (fcc, cmp));
 +    if (TARGET_64BIT)
 +      {
 +	rtx gpr = gen_reg_rtx (DImode);
 +	emit_insn (gen_fcc_to_di (gpr, fcc));
 +	emit_insn (gen_rtx_SET (operands[0],
 +				lowpart_subreg (SImode, gpr, DImode)));
 +      }
 +    else
 +      emit_insn (gen_fcc_to_si (operands[0], fcc));
 +
 +    DONE;
 +  })
 ;; Conditional move instructions.
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 88e54c915..58f9a7826 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -590,6 +590,10 @@
 (define_predicate "loongarch_cstore_operator"
   (match_code "ne,eq,gt,gtu,ge,geu,lt,ltu,le,leu"))
 +(define_predicate "loongarch_fcmp_operator"
 +  (match_code
 +    "unordered,uneq,unlt,unle,eq,lt,le,ordered,ltgt,ne,ge,gt,unge,ungt"))
 +
 (define_predicate "small_data_pattern"
   (and (match_code "set,parallel,unspec,unspec_volatile,prefetch")
        (match_test "loongarch_small_data_pattern_p (op)")))
 diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
 new file mode 100644
 index 000000000..23334a3a3
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr-via-fr.c
@@ -0,0 +1,10 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mtune=la464 -mabi=lp64d" } */
 +/* { dg-final { scan-assembler "movcf2fr\t\\\$f\[0-9\]+,\\\$fcc" } } */
 +/* { dg-final { scan-assembler "movfr2gr\\.s\t\\\$r4" } } */
 +
 +int
 +t (float a, float b)
 +{
 +  return a > b;
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/movcf2gr.c b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c
 new file mode 100644
 index 000000000..d27c393b5
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/movcf2gr.c
@@ -0,0 +1,9 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mtune=la664 -mabi=lp64d" } */
 +/* { dg-final { scan-assembler "movcf2gr\t\\\$r4,\\\$fcc" } } */
 +
 +int
 +t (float a, float b)
 +{
 +  return a > b;
 +}
 -- 
 2.43.0
--- a/0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch
+++ b/0084-LoongArch-Add-sign_extend-pattern-for-32-bit-rotate-.patch
@ -0,0 +1,69 @@
 From 8da6a317bc3ad64da8590649b83a841391f20438 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 17 Dec 2023 04:26:23 +0800
 Subject: [PATCH 084/188] LoongArch: Add sign_extend pattern for 32-bit rotate
 shift
 Remove a redundant sign extension.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (rotrsi3_extend): New
 	define_insn.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/rotrw.c: New test.
 ---
 gcc/config/loongarch/loongarch.md          | 10 ++++++++++
 gcc/testsuite/gcc.target/loongarch/rotrw.c | 17 +++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotrw.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 6cf71d9e4..44e8d336a 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2893,6 +2893,16 @@
   [(set_attr "type" "shift,shift")
    (set_attr "mode" "<MODE>")])
 +(define_insn "rotrsi3_extend"
 +  [(set (match_operand:DI 0 "register_operand" "=r,r")
 +	(sign_extend:DI
 +	  (rotatert:SI (match_operand:SI 1 "register_operand" "r,r")
 +		       (match_operand:SI 2 "arith_operand" "r,I"))))]
 +  "TARGET_64BIT"
 +  "rotr%i2.w\t%0,%1,%2"
 +  [(set_attr "type" "shift,shift")
 +   (set_attr "mode" "SI")])
 +
 ;; The following templates were added to generate "bstrpick.d + alsl.d"
 ;; instruction pairs.
 ;; It is required that the values of const_immalsl_operand and
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotrw.c b/gcc/testsuite/gcc.target/loongarch/rotrw.c
 new file mode 100644
 index 000000000..6ed45e8b8
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotrw.c
@@ -0,0 +1,17 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2" } */
 +/* { dg-final { scan-assembler "rotr\\.w\t\\\$r4,\\\$r4,\\\$r5" } } */
 +/* { dg-final { scan-assembler "rotri\\.w\t\\\$r4,\\\$r4,5" } } */
 +/* { dg-final { scan-assembler-not "slli\\.w" } } */
 +
 +unsigned
 +rotr (unsigned a, unsigned b)
 +{
 +  return a >> b | a << 32 - b;
 +}
 +
 +unsigned
 +rotri (unsigned a)
 +{
 +  return a >> 5 | a << 27;
 +}
 -- 
 2.43.0
--- a/0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch
+++ b/0085-LoongArch-Fixed-bug-in-bstrins_-mode-_for_ior_mask-t.patch
@ -0,0 +1,37 @@
 From e56d6d9526e1565fffeb320e15796385eb1732b8 Mon Sep 17 00:00:00 2001
 From: Li Wei <liwei@loongson.cn>
 Date: Mon, 25 Dec 2023 11:20:23 +0800
 Subject: [PATCH 085/188] LoongArch: Fixed bug in *bstrins_<mode>_for_ior_mask
 template.
 We found that using the latest compiled gcc will cause a miscompare error
 when running spec2006 400.perlbench test with -flto turned on.  After testing,
 it was found that only the LoongArch architecture will report errors.
 The first error commit was located through the git bisect command as
 r14-3773-g5b857e87201335.  Through debugging, it was found that the problem
 was that the split condition of the *bstrins_<mode>_for_ior_mask template was
 empty, which should actually be consistent with the insn condition.
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md: Adjust.
 ---
 gcc/config/loongarch/loongarch.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 44e8d336a..3d5b75825 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -1489,7 +1489,7 @@
   "loongarch_pre_reload_split () && \
    loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
   "#"
 -  ""
 +  "&& true"
   [(set (match_dup 0) (match_dup 1))
    (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 4))
 	(match_dup 3))]
 -- 
 2.43.0
--- a/0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
+++ b/0086-LoongArch-Fix-insn-output-of-vec_concat-templates-fo.patch
@ -0,0 +1,132 @@
 From b1947829a5949a37db09bc23681e44c8479bd404 Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Fri, 22 Dec 2023 16:22:03 +0800
 Subject: [PATCH 086/188] LoongArch: Fix insn output of vec_concat templates
 for LASX.
 When investigaing failure of gcc.dg/vect/slp-reduc-sad.c, following
 instruction block are being generated by vec_concatv32qi (which is
 generated by vec_initv32qiv16qi) at entrance of foo() function:
  vldx    $vr3,$r5,$r6
  vld     $vr2,$r5,0
  xvpermi.q       $xr2,$xr3,0x20
 causes the reversion of vec_initv32qiv16qi operation's high and
 low 128-bit part.
 According to other target's similar impl and LSX impl for following
 RTL representation, current definition in lasx.md of "vec_concat<mode>"
 are wrong:
  (set (op0) (vec_concat (op1) (op2)))
 For correct behavior, the last argument of xvpermi.q should be 0x02
 instead of 0x20. This patch fixes this issue and cleanup the vec_concat
 template impl.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (vec_concatv4di): Delete.
 	(vec_concatv8si): Delete.
 	(vec_concatv16hi): Delete.
 	(vec_concatv32qi): Delete.
 	(vec_concatv4df): Delete.
 	(vec_concatv8sf): Delete.
 	(vec_concat<mode>): New template with insn output fixed.
 ---
 gcc/config/loongarch/lasx.md | 74 ++++--------------------------------
 1 file changed, 7 insertions(+), 67 deletions(-)
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 9ca3f9278..46150f2fb 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -577,77 +577,17 @@
   [(set_attr "type" "simd_insert")
    (set_attr "mode" "<MODE>")])
 -(define_insn "vec_concatv4di"
 -  [(set (match_operand:V4DI 0 "register_operand" "=f")
 -	(vec_concat:V4DI
 -	  (match_operand:V2DI 1 "register_operand" "0")
 -	  (match_operand:V2DI 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -{
 -  return "xvpermi.q\t%u0,%u2,0x20";
 -}
 -  [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DI")])
 -
 -(define_insn "vec_concatv8si"
 -  [(set (match_operand:V8SI 0 "register_operand" "=f")
 -	(vec_concat:V8SI
 -	  (match_operand:V4SI 1 "register_operand" "0")
 -	  (match_operand:V4SI 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -{
 -  return "xvpermi.q\t%u0,%u2,0x20";
 -}
 -  [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DI")])
 -
 -(define_insn "vec_concatv16hi"
 -  [(set (match_operand:V16HI 0 "register_operand" "=f")
 -	(vec_concat:V16HI
 -	  (match_operand:V8HI 1 "register_operand" "0")
 -	  (match_operand:V8HI 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -{
 -  return "xvpermi.q\t%u0,%u2,0x20";
 -}
 -  [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DI")])
 -
 -(define_insn "vec_concatv32qi"
 -  [(set (match_operand:V32QI 0 "register_operand" "=f")
 -	(vec_concat:V32QI
 -	  (match_operand:V16QI 1 "register_operand" "0")
 -	  (match_operand:V16QI 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -{
 -  return "xvpermi.q\t%u0,%u2,0x20";
 -}
 -  [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DI")])
 -
 -(define_insn "vec_concatv4df"
 -  [(set (match_operand:V4DF 0 "register_operand" "=f")
 -	(vec_concat:V4DF
 -	  (match_operand:V2DF 1 "register_operand" "0")
 -	  (match_operand:V2DF 2 "register_operand" "f")))]
 -  "ISA_HAS_LASX"
 -{
 -  return "xvpermi.q\t%u0,%u2,0x20";
 -}
 -  [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DF")])
 -
 -(define_insn "vec_concatv8sf"
 -  [(set (match_operand:V8SF 0 "register_operand" "=f")
 -	(vec_concat:V8SF
 -	  (match_operand:V4SF 1 "register_operand" "0")
 -	  (match_operand:V4SF 2 "register_operand" "f")))]
 +(define_insn "vec_concat<mode>"
 +  [(set (match_operand:LASX 0 "register_operand" "=f")
 +	(vec_concat:LASX
 +	  (match_operand:<VHMODE256_ALL> 1 "register_operand" "0")
 +	  (match_operand:<VHMODE256_ALL> 2 "register_operand" "f")))]
   "ISA_HAS_LASX"
 {
 -  return "xvpermi.q\t%u0,%u2,0x20";
 +  return "xvpermi.q\t%u0,%u2,0x02";
 }
   [(set_attr "type" "simd_splat")
 -   (set_attr "mode" "V4DI")])
 +   (set_attr "mode" "<MODE>")])
 ;; xshuf.w
 (define_insn "lasx_xvperm_<lasxfmt_f_wd>"
 -- 
 2.43.0
--- a/0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
+++ b/0087-LoongArch-Fix-ICE-when-passing-two-same-vector-argum.patch
@ -0,0 +1,232 @@
 From 1096571509762846e2222f575bc981385b4e9fb7 Mon Sep 17 00:00:00 2001
 From: Chenghui Pan <panchenghui@loongson.cn>
 Date: Fri, 22 Dec 2023 16:18:44 +0800
 Subject: [PATCH 087/188] LoongArch: Fix ICE when passing two same vector
 argument consecutively
 Following code will cause ICE on LoongArch target:
  #include <lsxintrin.h>
  extern void bar (__m128i, __m128i);
  __m128i a;
  void
  foo ()
  {
    bar (a, a);
  }
 It is caused by missing constraint definition in mov<mode>_lsx. This
 patch fixes the template and remove the unnecessary processing from
 loongarch_split_move () function.
 This patch also cleanup the redundant definition from
 loongarch_split_move () and loongarch_split_move_p ().
 gcc/ChangeLog:
 	* config/loongarch/lasx.md: Use loongarch_split_move and
 	loongarch_split_move_p directly.
 	* config/loongarch/loongarch-protos.h
 	(loongarch_split_move): Remove unnecessary argument.
 	(loongarch_split_move_insn_p): Delete.
 	(loongarch_split_move_insn): Delete.
 	* config/loongarch/loongarch.cc
 	(loongarch_split_move_insn_p): Delete.
 	(loongarch_load_store_insns): Use loongarch_split_move_p
 	directly.
 	(loongarch_split_move): remove the unnecessary processing.
 	(loongarch_split_move_insn): Delete.
 	* config/loongarch/lsx.md: Use loongarch_split_move and
 	loongarch_split_move_p directly.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lsx/lsx-mov-1.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  |  4 +-
 gcc/config/loongarch/loongarch-protos.h       |  4 +-
 gcc/config/loongarch/loongarch.cc             | 49 +------------------
 gcc/config/loongarch/lsx.md                   | 10 ++--
 .../loongarch/vector/lsx/lsx-mov-1.c          | 14 ++++++
 5 files changed, 24 insertions(+), 57 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index 46150f2fb..dbbf5a136 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -839,10 +839,10 @@
   [(set (match_operand:LASX 0 "nonimmediate_operand")
 	(match_operand:LASX 1 "move_operand"))]
   "reload_completed && ISA_HAS_LASX
 -   && loongarch_split_move_insn_p (operands[0], operands[1])"
 +   && loongarch_split_move_p (operands[0], operands[1])"
   [(const_int 0)]
 {
 -  loongarch_split_move_insn (operands[0], operands[1], curr_insn);
 +  loongarch_split_move (operands[0], operands[1]);
   DONE;
 })
 diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
 index e5fcf3111..2067e50c3 100644
 --- a/gcc/config/loongarch/loongarch-protos.h
 +++ b/gcc/config/loongarch/loongarch-protos.h
@@ -82,11 +82,9 @@ extern rtx loongarch_legitimize_call_address (rtx);
 extern rtx loongarch_subword (rtx, bool);
 extern bool loongarch_split_move_p (rtx, rtx);
 -extern void loongarch_split_move (rtx, rtx, rtx);
 +extern void loongarch_split_move (rtx, rtx);
 extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
 extern void loongarch_split_plus_constant (rtx *, machine_mode);
 -extern bool loongarch_split_move_insn_p (rtx, rtx);
 -extern void loongarch_split_move_insn (rtx, rtx, rtx);
 extern void loongarch_split_128bit_move (rtx, rtx);
 extern bool loongarch_split_128bit_move_p (rtx, rtx);
 extern void loongarch_split_256bit_move (rtx, rtx);
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 56f631b1a..5c278386a 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -2558,7 +2558,6 @@ loongarch_split_const_insns (rtx x)
   return low + high;
 }
 -bool loongarch_split_move_insn_p (rtx dest, rtx src);
 /* Return one word of 128-bit value OP, taking into account the fixed
    endianness of certain registers.  BYTE selects from the byte address.  */
@@ -2598,7 +2597,7 @@ loongarch_load_store_insns (rtx mem, rtx_insn *insn)
     {
       set = single_set (insn);
       if (set
 -	  && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set)))
 +	  && !loongarch_split_move_p (SET_DEST (set), SET_SRC (set)))
 	might_split_p = false;
     }
@@ -4216,7 +4215,7 @@ loongarch_split_move_p (rtx dest, rtx src)
    SPLIT_TYPE describes the split condition.  */
 void
 -loongarch_split_move (rtx dest, rtx src, rtx insn_)
 +loongarch_split_move (rtx dest, rtx src)
 {
   rtx low_dest;
@@ -4254,33 +4253,6 @@ loongarch_split_move (rtx dest, rtx src, rtx insn_)
 			       loongarch_subword (src, true));
 	}
     }
 -
 -  /* This is a hack.  See if the next insn uses DEST and if so, see if we
 -     can forward SRC for DEST.  This is most useful if the next insn is a
 -     simple store.  */
 -  rtx_insn *insn = (rtx_insn *) insn_;
 -  struct loongarch_address_info addr = {};
 -  if (insn)
 -    {
 -      rtx_insn *next = next_nonnote_nondebug_insn_bb (insn);
 -      if (next)
 -	{
 -	  rtx set = single_set (next);
 -	  if (set && SET_SRC (set) == dest)
 -	    {
 -	      if (MEM_P (src))
 -		{
 -		  rtx tmp = XEXP (src, 0);
 -		  loongarch_classify_address (&addr, tmp, GET_MODE (tmp),
 -					      true);
 -		  if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg))
 -		    validate_change (next, &SET_SRC (set), src, false);
 -		}
 -	      else
 -		validate_change (next, &SET_SRC (set), src, false);
 -	    }
 -	}
 -    }
 }
 /* Check if adding an integer constant value for a specific mode can be
@@ -4327,23 +4299,6 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
   op[2] = gen_int_mode (v, mode);
 }
 -/* Return true if a move from SRC to DEST in INSN should be split.  */
 -
 -bool
 -loongarch_split_move_insn_p (rtx dest, rtx src)
 -{
 -  return loongarch_split_move_p (dest, src);
 -}
 -
 -/* Split a move from SRC to DEST in INSN, given that
 -   loongarch_split_move_insn_p holds.  */
 -
 -void
 -loongarch_split_move_insn (rtx dest, rtx src, rtx insn)
 -{
 -  loongarch_split_move (dest, src, insn);
 -}
 -
 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
 static HOST_WIDE_INT
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 7f5fff40a..3e3248ef4 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -794,21 +794,21 @@
 })
 (define_insn "mov<mode>_lsx"
 -  [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
 -	(match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r"))]
 +  [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r")
 +	(match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))]
   "ISA_HAS_LSX"
 { return loongarch_output_move (operands[0], operands[1]); }
 -  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
 +  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy")
    (set_attr "mode" "<MODE>")])
 (define_split
   [(set (match_operand:LSX 0 "nonimmediate_operand")
 	(match_operand:LSX 1 "move_operand"))]
   "reload_completed && ISA_HAS_LSX
 -   && loongarch_split_move_insn_p (operands[0], operands[1])"
 +   && loongarch_split_move_p (operands[0], operands[1])"
   [(const_int 0)]
 {
 -  loongarch_split_move_insn (operands[0], operands[1], curr_insn);
 +  loongarch_split_move (operands[0], operands[1]);
   DONE;
 })
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
 new file mode 100644
 index 000000000..7f9d792eb
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-mov-1.c
@@ -0,0 +1,14 @@
 +/* { dg-do compile } */
 +/* { dg-options "-mlsx -O2" } */
 +
 +#include <lsxintrin.h>
 +
 +extern void bar (__m128i, __m128i);
 +
 +__m128i a;
 +
 +void
 +foo ()
 +{
 +  bar (a, a);
 +}
 -- 
 2.43.0
--- a/0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
+++ b/0088-LoongArch-Expand-left-rotate-to-right-rotate-with-ne.patch
@ -0,0 +1,253 @@
 From a2cc86c9b5e44c3dcdb8c52d6ae5f535442ec1d4 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sun, 17 Dec 2023 05:38:20 +0800
 Subject: [PATCH 088/188] LoongArch: Expand left rotate to right rotate with
 negated amount
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (rotl<mode>3):
 	New define_expand.
 	* config/loongarch/simd.md (vrotl<mode>3): Likewise.
 	(rotl<mode>3): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/rotl-with-rotr.c: New test.
 	* gcc.target/loongarch/rotl-with-vrotr-b.c: New test.
 	* gcc.target/loongarch/rotl-with-vrotr-h.c: New test.
 	* gcc.target/loongarch/rotl-with-vrotr-w.c: New test.
 	* gcc.target/loongarch/rotl-with-vrotr-d.c: New test.
 	* gcc.target/loongarch/rotl-with-xvrotr-b.c: New test.
 	* gcc.target/loongarch/rotl-with-xvrotr-h.c: New test.
 	* gcc.target/loongarch/rotl-with-xvrotr-w.c: New test.
 	* gcc.target/loongarch/rotl-with-xvrotr-d.c: New test.
 ---
 gcc/config/loongarch/loongarch.md             | 12 ++++++++
 gcc/config/loongarch/simd.md                  | 29 +++++++++++++++++++
 .../gcc.target/loongarch/rotl-with-rotr.c     |  9 ++++++
 .../gcc.target/loongarch/rotl-with-vrotr-b.c  |  7 +++++
 .../gcc.target/loongarch/rotl-with-vrotr-d.c  |  7 +++++
 .../gcc.target/loongarch/rotl-with-vrotr-h.c  |  7 +++++
 .../gcc.target/loongarch/rotl-with-vrotr-w.c  | 28 ++++++++++++++++++
 .../gcc.target/loongarch/rotl-with-xvrotr-b.c |  7 +++++
 .../gcc.target/loongarch/rotl-with-xvrotr-d.c |  7 +++++
 .../gcc.target/loongarch/rotl-with-xvrotr-h.c |  7 +++++
 .../gcc.target/loongarch/rotl-with-xvrotr-w.c |  7 +++++
 11 files changed, 127 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 3d5b75825..ed4d4b906 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -2903,6 +2903,18 @@
   [(set_attr "type" "shift,shift")
    (set_attr "mode" "SI")])
 +;; Expand left rotate to right rotate.
 +(define_expand "rotl<mode>3"
 +  [(set (match_dup 3)
 +	(neg:SI (match_operand:SI 2 "register_operand")))
 +   (set (match_operand:GPR 0 "register_operand")
 +	(rotatert:GPR (match_operand:GPR 1 "register_operand")
 +		      (match_dup 3)))]
 +  ""
 +  {
 +    operands[3] = gen_reg_rtx (SImode);
 +  });
 +
 ;; The following templates were added to generate "bstrpick.d + alsl.d"
 ;; instruction pairs.
 ;; It is required that the values of const_immalsl_operand and
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index 13202f79b..93fb39abc 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -268,6 +268,35 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 +;; Expand left rotate to right rotate.
 +(define_expand "vrotl<mode>3"
 +  [(set (match_dup 3)
 +	(neg:IVEC (match_operand:IVEC 2 "register_operand")))
 +   (set (match_operand:IVEC 0 "register_operand")
 +	(rotatert:IVEC (match_operand:IVEC 1 "register_operand")
 +		       (match_dup 3)))]
 +  ""
 +  {
 +    operands[3] = gen_reg_rtx (<MODE>mode);
 +  });
 +
 +;; Expand left rotate with a scalar amount to right rotate: negate the
 +;; scalar before broadcasting it because scalar negation is cheaper than
 +;; vector negation.
 +(define_expand "rotl<mode>3"
 +  [(set (match_dup 3)
 +	(neg:SI (match_operand:SI 2 "register_operand")))
 +   (set (match_dup 4)
 +	(vec_duplicate:IVEC (subreg:<IVEC:UNITMODE> (match_dup 3) 0)))
 +   (set (match_operand:IVEC 0 "register_operand")
 +	(rotatert:IVEC (match_operand:IVEC 1 "register_operand")
 +		       (match_dup 4)))]
 +  ""
 +  {
 +    operands[3] = gen_reg_rtx (SImode);
 +    operands[4] = gen_reg_rtx (<MODE>mode);
 +  });
 +
 ;; <x>vrotri.{b/h/w/d}
 (define_insn "rotr<mode>3"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
 new file mode 100644
 index 000000000..84cc53cec
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-rotr.c
@@ -0,0 +1,9 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2" } */
 +/* { dg-final { scan-assembler "rotr\\.w" } } */
 +
 +unsigned
 +t (unsigned a, unsigned b)
 +{
 +  return a << b | a >> (32 - b);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
 new file mode 100644
 index 000000000..14298bf9e
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-b.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "vrotr\\.b" 2 } } */
 +/* { dg-final { scan-assembler-times "vneg\\.b" 1 } } */
 +
 +#define TYPE char
 +#include "rotl-with-vrotr-w.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
 new file mode 100644
 index 000000000..0e971b323
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-d.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "vrotr\\.d" 2 } } */
 +/* { dg-final { scan-assembler-times "vneg\\.d" 1 } } */
 +
 +#define TYPE long long
 +#include "rotl-with-vrotr-w.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
 new file mode 100644
 index 000000000..93216ebc2
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-h.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "vrotr\\.h" 2 } } */
 +/* { dg-final { scan-assembler-times "vneg\\.h" 1 } } */
 +
 +#define TYPE short
 +#include "rotl-with-vrotr-w.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
 new file mode 100644
 index 000000000..d05b86f47
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-vrotr-w.c
@@ -0,0 +1,28 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlsx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "vrotr\\.w" 2 } } */
 +/* { dg-final { scan-assembler-times "vneg\\.w" 1 } } */
 +
 +#ifndef VLEN
 +#define VLEN 16
 +#endif
 +
 +#ifndef TYPE
 +#define TYPE int
 +#endif
 +
 +typedef unsigned TYPE V __attribute__ ((vector_size (VLEN)));
 +V a, b, c;
 +
 +void
 +test (int x)
 +{
 +  b = a << x | a >> ((int)sizeof (TYPE) * __CHAR_BIT__ - x);
 +}
 +
 +void
 +test2 (void)
 +{
 +  for (int i = 0; i < VLEN / sizeof (TYPE); i++)
 +    c[i] = a[i] << b[i] | a[i] >> ((int)sizeof (TYPE) * __CHAR_BIT__ - b[i]);
 +}
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
 new file mode 100644
 index 000000000..2674b1b61
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-b.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "xvrotr\\.b" 2 } } */
 +/* { dg-final { scan-assembler-times "xvneg\\.b" 1 } } */
 +
 +#define VLEN 32
 +#include "rotl-with-vrotr-b.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
 new file mode 100644
 index 000000000..e94403315
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-d.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "xvrotr\\.d" 2 } } */
 +/* { dg-final { scan-assembler-times "xvneg\\.d" 1 } } */
 +
 +#define VLEN 32
 +#include "rotl-with-vrotr-d.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
 new file mode 100644
 index 000000000..3d998941f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-h.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "xvrotr\\.h" 2 } } */
 +/* { dg-final { scan-assembler-times "xvneg\\.h" 1 } } */
 +
 +#define VLEN 32
 +#include "rotl-with-vrotr-h.c"
 diff --git a/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
 new file mode 100644
 index 000000000..ca6aa7bae
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/rotl-with-xvrotr-w.c
@@ -0,0 +1,7 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mlasx -fno-vect-cost-model" } */
 +/* { dg-final { scan-assembler-times "xvrotr\\.w" 2 } } */
 +/* { dg-final { scan-assembler-times "xvneg\\.w" 1 } } */
 +
 +#define VLEN 32
 +#include "rotl-with-vrotr-w.c"
 -- 
 2.43.0
--- a/0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
+++ b/0089-LoongArch-Fix-infinite-secondary-reloading-of-FCCmod.patch
@ -0,0 +1,104 @@
 From 1e389ec3bad94888fadd153f191fe8862448f258 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Wed, 27 Dec 2023 04:28:56 +0800
 Subject: [PATCH 089/188] LoongArch: Fix infinite secondary reloading of
 FCCmode [PR113148]
 The GCC internal doc says:
     X might be a pseudo-register or a 'subreg' of a pseudo-register,
     which could either be in a hard register or in memory.  Use
     'true_regnum' to find out; it will return -1 if the pseudo is in
     memory and the hard register number if it is in a register.
 So "MEM_P (x)" is not enough for checking if we are reloading from/to
 the memory.  This bug has caused reload pass to stall and finally ICE
 complaining with "maximum number of generated reload insns per insn
 achieved", since r14-6814.
 Check if "true_regnum (x)" is -1 besides "MEM_P (x)" to fix the issue.
 gcc/ChangeLog:
 	PR target/113148
 	* config/loongarch/loongarch.cc (loongarch_secondary_reload):
 	Check if regno == -1 besides MEM_P (x) for reloading FCCmode
 	from/to FPR to/from memory.
 gcc/testsuite/ChangeLog:
 	PR target/113148
 	* gcc.target/loongarch/pr113148.c: New test.
 ---
 gcc/config/loongarch/loongarch.cc             |  3 +-
 gcc/testsuite/gcc.target/loongarch/pr113148.c | 44 +++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/pr113148.c
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 5c278386a..2e305f940 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -6902,7 +6902,8 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 	  return NO_REGS;
 	}
 -      if (reg_class_subset_p (rclass, FP_REGS) && MEM_P (x))
 +      if (reg_class_subset_p (rclass, FP_REGS)
 +	  && (regno == -1 || MEM_P (x)))
 	return GR_REGS;
       return NO_REGS;
 diff --git a/gcc/testsuite/gcc.target/loongarch/pr113148.c b/gcc/testsuite/gcc.target/loongarch/pr113148.c
 new file mode 100644
 index 000000000..cf48e5520
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/pr113148.c
@@ -0,0 +1,44 @@
 +/* PR 113148: ICE caused by infinite reloading */
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=la464 -mfpu=64 -mabi=lp64d" } */
 +
 +struct bound
 +{
 +  double max;
 +} drawQuadrant_bound;
 +double w4, innerXfromXY_y, computeBound_right_0;
 +struct arc_def
 +{
 +  double w, h;
 +  double a0, a1;
 +};
 +static void drawQuadrant (struct arc_def *);
 +static void
 +computeBound (struct arc_def *def, struct bound *bound)
 +{
 +  double ellipsex_1, ellipsex_0;
 +  bound->max = def->a1 ?: __builtin_sin (w4) * def->h;
 +  if (def->a0 == 5 && def->w == def->h)
 +    ;
 +  else
 +    ellipsex_0 = def->a0 == 0.0 ?: __builtin_cos (w4);
 +  if (def->a1 == 5 && def->w == def->h)
 +    ellipsex_1 = bound->max;
 +  __builtin_sqrt (ellipsex_1 * innerXfromXY_y * innerXfromXY_y * w4);
 +  computeBound_right_0 = ellipsex_0;
 +}
 +void
 +drawArc ()
 +{
 +  struct arc_def foo;
 +  for (;;)
 +    drawQuadrant (&foo);
 +}
 +void
 +drawQuadrant (struct arc_def *def)
 +{
 +  int y, miny;
 +  computeBound (def, &drawQuadrant_bound);
 +  while (y >= miny)
 +    ;
 +}
 -- 
 2.43.0
--- a/0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
+++ b/0090-LoongArch-Replace-mexplicit-relocs-auto-simple-used-.patch
@ -0,0 +1,305 @@
 From 294893b352898328d804f2d07981f6bf1e54f8b6 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Tue, 12 Dec 2023 04:54:21 +0800
 Subject: [PATCH 090/188] LoongArch: Replace -mexplicit-relocs=auto simple-used
 address peephole2 with combine
 The problem with peephole2 is it uses a naive sliding-window algorithm
 and misses many cases.  For example:
    float a[10000];
    float t() { return a[0] + a[8000]; }
 is compiled to:
    la.local    $r13,a
    la.local    $r12,a+32768
    fld.s       $f1,$r13,0
    fld.s       $f0,$r12,-768
    fadd.s      $f0,$f1,$f0
 by trunk.  But as we've explained in r14-4851, the following would be
 better with -mexplicit-relocs=auto:
    pcalau12i   $r13,%pc_hi20(a)
    pcalau12i   $r12,%pc_hi20(a+32000)
    fld.s       $f1,$r13,%pc_lo12(a)
    fld.s       $f0,$r12,%pc_lo12(a+32000)
    fadd.s      $f0,$f1,$f0
 However the sliding-window algorithm just won't detect the pcalau12i/fld
 pair to be optimized.  Use a define_insn_and_rewrite in combine pass
 will work around the issue.
 gcc/ChangeLog:
 	* config/loongarch/predicates.md
 	(symbolic_pcrel_offset_operand): New define_predicate.
 	(mem_simple_ldst_operand): Likewise.
 	* config/loongarch/loongarch-protos.h
 	(loongarch_rewrite_mem_for_simple_ldst): Declare.
 	* config/loongarch/loongarch.cc
 	(loongarch_rewrite_mem_for_simple_ldst): Implement.
 	* config/loongarch/loongarch.md (simple_load<mode>): New
 	define_insn_and_rewrite.
 	(simple_load_<su>ext<SUBDI:mode><GPR:mode>): Likewise.
 	(simple_store<mode>): Likewise.
 	(define_peephole2): Remove la.local/[f]ld peepholes.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c:
 	New test.
 	* gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c:
 	New test.
 ---
 gcc/config/loongarch/loongarch-protos.h       |   1 +
 gcc/config/loongarch/loongarch.cc             |  16 +++
 gcc/config/loongarch/loongarch.md             | 114 +++++-------------
 gcc/config/loongarch/predicates.md            |  13 ++
 ...explicit-relocs-auto-single-load-store-2.c |  11 ++
 ...explicit-relocs-auto-single-load-store-3.c |  18 +++
 6 files changed, 86 insertions(+), 87 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
 create mode 100644 gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
 diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
 index 2067e50c3..5060efbb6 100644
 --- a/gcc/config/loongarch/loongarch-protos.h
 +++ b/gcc/config/loongarch/loongarch-protos.h
@@ -163,6 +163,7 @@ extern bool loongarch_use_ins_ext_p (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
 extern bool loongarch_check_zero_div_p (void);
 extern bool loongarch_pre_reload_split (void);
 extern int loongarch_use_bstrins_for_ior_with_mask (machine_mode, rtx *);
 +extern rtx loongarch_rewrite_mem_for_simple_ldst (rtx);
 union loongarch_gen_fn_ptrs
 {
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 2e305f940..c6318bee9 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -5713,6 +5713,22 @@ loongarch_use_bstrins_for_ior_with_mask (machine_mode mode, rtx *op)
   return 0;
 }
 +/* Rewrite a MEM for simple load/store under -mexplicit-relocs=auto
 +   -mcmodel={normal/medium}.  */
 +rtx
 +loongarch_rewrite_mem_for_simple_ldst (rtx mem)
 +{
 +  rtx addr = XEXP (mem, 0);
 +  rtx hi = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
 +			   UNSPEC_PCALAU12I_GR);
 +  rtx new_mem;
 +
 +  addr = gen_rtx_LO_SUM (Pmode, force_reg (Pmode, hi), addr);
 +  new_mem = gen_rtx_MEM (GET_MODE (mem), addr);
 +  MEM_COPY_ATTRIBUTES (new_mem, mem);
 +  return new_mem;
 +}
 +
 /* Print the text for PRINT_OPERAND punctation character CH to FILE.
    The punctuation characters are:
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index ed4d4b906..3c61a0cf4 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -4135,101 +4135,41 @@
 ;;
 ;; And if the pseudo op cannot be relaxed, we'll get a worse result (with
 ;; 3 instructions).
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
 -	(mem:LD_AT_LEAST_32_BIT (match_dup 0)))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0]) \
 -       || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2)
 -	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
 -  {
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 -  })
 -
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:LD_AT_LEAST_32_BIT 2 "register_operand")
 -	(mem:LD_AT_LEAST_32_BIT (plus (match_dup 0)
 -				(match_operand 3 "const_int_operand"))))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0]) \
 -       || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2)
 -	(mem:LD_AT_LEAST_32_BIT (lo_sum:P (match_dup 0) (match_dup 1))))]
 -  {
 -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 -  })
 -
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:GPR 2 "register_operand")
 -	(any_extend:GPR (mem:SUBDI (match_dup 0))))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0]) \
 -       || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2)
 -	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
 -					     (match_dup 1)))))]
 +(define_insn_and_rewrite "simple_load<mode>"
 +  [(set (match_operand:LD_AT_LEAST_32_BIT 0 "register_operand" "=r,f")
 +	(match_operand:LD_AT_LEAST_32_BIT 1 "mem_simple_ldst_operand" ""))]
 +  "loongarch_pre_reload_split ()
 +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
 +  "#"
 +  "&& true"
   {
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +    operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]);
   })
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (match_operand:GPR 2 "register_operand")
 +(define_insn_and_rewrite "simple_load_<su>ext<SUBDI:mode><GPR:mode>"
 +  [(set (match_operand:GPR 0 "register_operand" "=r")
 	(any_extend:GPR
 -	  (mem:SUBDI (plus (match_dup 0)
 -			   (match_operand 3 "const_int_operand")))))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0]) \
 -       || REGNO (operands[0]) == REGNO (operands[2]))"
 -  [(set (match_dup 2)
 -	(any_extend:GPR (mem:SUBDI (lo_sum:P (match_dup 0)
 -					     (match_dup 1)))))]
 -  {
 -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 -  })
 -
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (mem:ST_ANY (match_dup 0))
 -	(match_operand:ST_ANY 2 "register_operand"))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0])) \
 -   && REGNO (operands[0]) != REGNO (operands[2])"
 -  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +	  (match_operand:SUBDI 1 "mem_simple_ldst_operand" "")))]
 +  "loongarch_pre_reload_split ()
 +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
 +  "#"
 +  "&& true"
   {
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +    operands[1] = loongarch_rewrite_mem_for_simple_ldst (operands[1]);
   })
 -(define_peephole2
 -  [(set (match_operand:P 0 "register_operand")
 -	(match_operand:P 1 "symbolic_pcrel_operand"))
 -   (set (mem:ST_ANY (plus (match_dup 0)
 -			  (match_operand 3 "const_int_operand")))
 -	(match_operand:ST_ANY 2 "register_operand"))]
 -  "la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO \
 -   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM) \
 -   && (peep2_reg_dead_p (2, operands[0])) \
 -   && REGNO (operands[0]) != REGNO (operands[2])"
 -  [(set (mem:ST_ANY (lo_sum:P (match_dup 0) (match_dup 1))) (match_dup 2))]
 +(define_insn_and_rewrite "simple_store<mode>"
 +  [(set (match_operand:ST_ANY 0 "mem_simple_ldst_operand" "")
 +	(match_operand:ST_ANY 1 "reg_or_0_operand" "r,f"))]
 +  "loongarch_pre_reload_split ()
 +   && la_opt_explicit_relocs == EXPLICIT_RELOCS_AUTO
 +   && (TARGET_CMODEL_NORMAL || TARGET_CMODEL_MEDIUM)"
 +  "#"
 +  "&& true"
   {
 -    operands[1] = plus_constant (Pmode, operands[1], INTVAL (operands[3]));
 -    emit_insn (gen_pcalau12i_gr<P:mode> (operands[0], operands[1]));
 +    operands[0] = loongarch_rewrite_mem_for_simple_ldst (operands[0]);
   })
 ;; Synchronization instructions.
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 58f9a7826..3698b9103 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -579,6 +579,19 @@
   return loongarch_symbolic_constant_p (op, &type) && type == SYMBOL_PCREL;
 })
 +(define_predicate "symbolic_pcrel_offset_operand"
 +  (and (match_code "plus")
 +       (match_operand 0 "symbolic_pcrel_operand")
 +       (match_operand 1 "const_int_operand")))
 +
 +(define_predicate "mem_simple_ldst_operand"
 +  (match_code "mem")
 +{
 +  op = XEXP (op, 0);
 +  return (symbolic_pcrel_operand (op, Pmode)
 +	  || symbolic_pcrel_offset_operand (op, Pmode));
 +})
 +
 (define_predicate "equality_operator"
   (match_code "eq,ne"))
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
 new file mode 100644
 index 000000000..42cb966d1
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-2.c
@@ -0,0 +1,11 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -march=loongarch64 -mabi=lp64d -mexplicit-relocs=auto" } */
 +
 +float a[8001];
 +float
 +t (void)
 +{
 +  return a[0] + a[8000];
 +}
 +
 +/* { dg-final { scan-assembler-not "la.local" } } */
 diff --git a/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
 new file mode 100644
 index 000000000..32aa5383d
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/explicit-relocs-auto-single-load-store-3.c
@@ -0,0 +1,18 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mexplicit-relocs=auto -fdump-rtl-final" } */
 +/* { dg-final { scan-rtl-dump-times "mem/v/c" 2 "final" } } */
 +/* { dg-final { scan-assembler-not "la\\.local" } } */
 +
 +volatile unsigned long counter;
 +
 +unsigned long
 +read (void)
 +{
 +  return counter;
 +}
 +
 +void
 +clear (void)
 +{
 +  counter = 0;
 +}
 -- 
 2.43.0
--- a/0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch
+++ b/0091-LoongArch-Fix-the-format-of-bstrins_-mode-_for_ior_m.patch
@ -0,0 +1,33 @@
 From 4d569c5fde85ca426eecf57119048ec25f048758 Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Fri, 29 Dec 2023 20:04:34 +0800
 Subject: [PATCH 091/188] LoongArch: Fix the format of
 bstrins_<mode>_for_ior_mask condition (NFC)
 gcc/ChangeLog:
 	* config/loongarch/loongarch.md (bstrins_<mode>_for_ior_mask):
 	For the condition, remove unneeded trailing "\" and move "&&" to
 	follow GNU coding style.  NFC.
 ---
 gcc/config/loongarch/loongarch.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 3c61a0cf4..996df66e8 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -1486,8 +1486,8 @@
                           (match_operand:GPR 2 "const_int_operand"))
 		 (and:GPR (match_operand:GPR 3 "register_operand")
 			  (match_operand:GPR 4 "const_int_operand"))))]
 -  "loongarch_pre_reload_split () && \
 -   loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
 +  "loongarch_pre_reload_split ()
 +   && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
   "#"
   "&& true"
   [(set (match_dup 0) (match_dup 1))
 -- 
 2.43.0
--- a/0092-LoongArch-Added-TLS-Le-Relax-support.patch
+++ b/0092-LoongArch-Added-TLS-Le-Relax-support.patch
@ -0,0 +1,280 @@
 From 58d41ffad306a359ecd2902ec19d582506f14b10 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Tue, 12 Dec 2023 16:32:31 +0800
 Subject: [PATCH 092/188] LoongArch: Added TLS Le Relax support.
 Check whether the assembler supports tls le relax. If it supports it, the assembly
 instruction sequence of tls le relax will be generated by default.
 The original way to obtain the tls le symbol address:
    lu12i.w $rd, %le_hi20(sym)
    ori $rd, $rd, %le_lo12(sym)
    add.{w/d} $rd, $rd, $tp
 If the assembler supports tls le relax, the following sequence is generated:
    lu12i.w $rd, %le_hi20_r(sym)
    add.{w/d} $rd,$rd,$tp,%le_add_r(sym)
    addi.{w/d} $rd,$rd,%le_lo12_r(sym)
 gcc/ChangeLog:
 	* config.in: Regenerate.
 	* config/loongarch/loongarch-opts.h (HAVE_AS_TLS_LE_RELAXATION): Define.
 	* config/loongarch/loongarch.cc (loongarch_legitimize_tls_address):
 	Added TLS Le Relax support.
 	(loongarch_print_operand_reloc): Add the output string of TLS Le Relax.
 	* config/loongarch/loongarch.md (@add_tls_le_relax<mode>): New template.
 	* configure: Regenerate.
 	* configure.ac: Check if binutils supports TLS le relax.
 gcc/testsuite/ChangeLog:
 	* lib/target-supports.exp: Add a function to check whether binutil supports
 	TLS Le Relax.
 	* gcc.target/loongarch/tls-le-relax.c: New test.
 ---
 gcc/config.in                                 |  6 +++
 gcc/config/loongarch/loongarch-opts.h         |  4 ++
 gcc/config/loongarch/loongarch.cc             | 46 +++++++++++++++++--
 gcc/config/loongarch/loongarch.md             | 12 +++++
 gcc/configure                                 | 31 +++++++++++++
 gcc/configure.ac                              |  5 ++
 .../gcc.target/loongarch/tls-le-relax.c       | 12 +++++
 gcc/testsuite/lib/target-supports.exp         | 12 +++++
 8 files changed, 125 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
 diff --git a/gcc/config.in b/gcc/config.in
 index 033cfb98b..7220b2b2b 100644
 --- a/gcc/config.in
 +++ b/gcc/config.in
@@ -771,6 +771,12 @@
 #endif
 +/* Define if your assembler supports tls le relocation. */
 +#ifndef USED_FOR_TARGET
 +#undef HAVE_AS_TLS_LE_RELAXATION
 +#endif
 +
 +
 /* Define if your assembler supports vl/vst/vlm/vstm with an optional
    alignment hint argument. */
 #ifndef USED_FOR_TARGET
 diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
 index 639ed50bd..8491bee0d 100644
 --- a/gcc/config/loongarch/loongarch-opts.h
 +++ b/gcc/config/loongarch/loongarch-opts.h
@@ -114,4 +114,8 @@ struct loongarch_flags {
 #define HAVE_AS_TLS 0
 #endif
 +#ifndef HAVE_AS_TLS_LE_RELAXATION
 +#define HAVE_AS_TLS_LE_RELAXATION 0
 +#endif
 +
 #endif /* LOONGARCH_OPTS_H */
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index c6318bee9..d1b1950dc 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -2993,7 +2993,29 @@ loongarch_legitimize_tls_address (rtx loc)
     case TLS_MODEL_LOCAL_EXEC:
 	{
 -	  /* la.tls.le; tp-relative add.  */
 +	  /* la.tls.le; tp-relative add.
 +
 +	     normal:
 +	      lu12i.w $rd, %le_hi20(sym)
 +	      ori $rd, $rd, %le_lo12(sym)
 +	      add.{w/d} $rd, $rd, $tp
 +	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)
 +
 +	     tls le relax:
 +	      lu12i.w $rd, %le_hi20_r(sym)
 +	      add.{w/d} $rd,$rd,$tp
 +	      addi.{w/d} $rd,$rd,%le_lo12_r(sym)
 +	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)
 +
 +	     extreme (When the code model is set to extreme, the TLS le Relax
 +	     instruction sequence is not generated):
 +	      lu12i.w $rd, %le_hi20(sym)
 +	      ori $rd, $rd, %le_lo12(sym)
 +	      lu32i.d $rd, %le64_lo20(sym)
 +	      lu52i.d $rd, $rd, %le64_hi12(sym)
 +	      add.d $rd, $rd, $tp
 +	      (st.{w/d}/ld.{w/d} $rs, $rd, 0)  */
 +
 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
 	  tmp1 = gen_reg_rtx (Pmode);
 	  dest = gen_reg_rtx (Pmode);
@@ -3004,7 +3026,20 @@ loongarch_legitimize_tls_address (rtx loc)
 	      tmp3 = gen_reg_rtx (Pmode);
 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
 	      high = loongarch_force_temporary (tmp3, high);
 -	      emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
 +
 +	      /* The assembler does not implement tls le relax support when the
 +		 code model is extreme, so when the code model is extreme, the
 +		 old symbol address acquisition method is still used.  */
 +	      if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
 +		{
 +		  emit_insn (gen_add_tls_le_relax (Pmode, dest, high,
 +						   tp, loc));
 +		  loongarch_emit_move (dest,
 +				       gen_rtx_LO_SUM (Pmode, dest, tmp2));
 +		  return dest;
 +		}
 +	      else
 +		emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
 	      if (TARGET_CMODEL_EXTREME)
 		{
@@ -5936,7 +5971,12 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
 	    gcc_unreachable ();
 	}
       else
 -	reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
 +	{
 +	  if (HAVE_AS_TLS_LE_RELAXATION && !TARGET_CMODEL_EXTREME)
 +	    reloc = hi_reloc ? "%le_hi20_r" : "%le_lo12_r";
 +	  else
 +	    reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
 +	}
       break;
     case SYMBOL_TLSGD:
 diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
 index 996df66e8..02c537d4c 100644
 --- a/gcc/config/loongarch/loongarch.md
 +++ b/gcc/config/loongarch/loongarch.md
@@ -73,6 +73,7 @@
   UNSPEC_LOAD_FROM_GOT
   UNSPEC_PCALAU12I
   UNSPEC_PCALAU12I_GR
 +  UNSPEC_ADD_TLS_LE_RELAX
   UNSPEC_ORI_L_LO12
   UNSPEC_LUI_L_HI20
   UNSPEC_LUI_H_LO20
@@ -2503,6 +2504,17 @@
   "pcalau12i\t%0,%%pc_hi20(%1)"
   [(set_attr "type" "move")])
 +(define_insn "@add_tls_le_relax<mode>"
 +  [(set (match_operand:P 0 "register_operand" "=r")
 +	(unspec:P [(match_operand:P 1 "register_operand" "r")
 +		   (match_operand:P 2 "register_operand" "r")
 +		   (match_operand:P 3 "symbolic_operand")]
 +	  UNSPEC_ADD_TLS_LE_RELAX))]
 +  "HAVE_AS_TLS_LE_RELAXATION"
 +  "add.<d>\t%0,%1,%2,%%le_add_r(%3)"
 +  [(set_attr "type" "move")]
 +)
 +
 (define_insn "@ori_l_lo12<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(unspec:P [(match_operand:P 1 "register_operand" "r")
 diff --git a/gcc/configure b/gcc/configure
 index 5842e7a18..eecfe60d6 100755
 --- a/gcc/configure
 +++ b/gcc/configure
@@ -28968,6 +28968,37 @@ if test $gcc_cv_as_loongarch_cond_branch_relax = yes; then
 $as_echo "#define HAVE_AS_COND_BRANCH_RELAXATION 1" >>confdefs.h
 +fi
 +
 +    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for tls le relaxation support" >&5
 +$as_echo_n "checking assembler for tls le relaxation support... " >&6; }
 +if ${gcc_cv_as_loongarch_tls_le_relaxation_support+:} false; then :
 +  $as_echo_n "(cached) " >&6
 +else
 +  gcc_cv_as_loongarch_tls_le_relaxation_support=no
 +  if test x$gcc_cv_as != x; then
 +    $as_echo 'lu12i.w $t0,%le_hi20_r(a)' > conftest.s
 +    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
 +  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
 +  (eval $ac_try) 2>&5
 +  ac_status=$?
 +  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
 +  test $ac_status = 0; }; }
 +    then
 +	gcc_cv_as_loongarch_tls_le_relaxation_support=yes
 +    else
 +      echo "configure: failed program was" >&5
 +      cat conftest.s >&5
 +    fi
 +    rm -f conftest.o conftest.s
 +  fi
 +fi
 +{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_loongarch_tls_le_relaxation_support" >&5
 +$as_echo "$gcc_cv_as_loongarch_tls_le_relaxation_support" >&6; }
 +if test $gcc_cv_as_loongarch_tls_le_relaxation_support = yes; then
 +
 +$as_echo "#define HAVE_AS_TLS_LE_RELAXATION 1" >>confdefs.h
 +
 fi
     ;;
 diff --git a/gcc/configure.ac b/gcc/configure.ac
 index 9c3fd3ad6..d1032440d 100644
 --- a/gcc/configure.ac
 +++ b/gcc/configure.ac
@@ -5357,6 +5357,11 @@ x:
        beq $a0,$a1,a],,
       [AC_DEFINE(HAVE_AS_COND_BRANCH_RELAXATION, 1,
 		[Define if your assembler supports conditional branch relaxation.])])
 +    gcc_GAS_CHECK_FEATURE([tls le relaxation support],
 +      gcc_cv_as_loongarch_tls_le_relaxation_support,,
 +      [lu12i.w $t0,%le_hi20_r(a)],,
 +      [AC_DEFINE(HAVE_AS_TLS_LE_RELAXATION, 1,
 +	  [Define if your assembler supports tls le relocation.])])
     ;;
     s390*-*-*)
     gcc_GAS_CHECK_FEATURE([.gnu_attribute support],
 diff --git a/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
 new file mode 100644
 index 000000000..a9a404fc7
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/tls-le-relax.c
@@ -0,0 +1,12 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mcmodel=normal -mexplicit-relocs" } */
 +/* { dg-final { scan-assembler "%le_add_r" { target tls_le_relax } } } */
 +
 +__attribute__ ((tls_model ("local-exec"))) __thread int a;
 +
 +void
 +test (void)
 +{
 +  a = 10;
 +}
 +
 diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
 index b8bff1a31..20fbd43ee 100644
 --- a/gcc/testsuite/lib/target-supports.exp
 +++ b/gcc/testsuite/lib/target-supports.exp
@@ -10582,6 +10582,18 @@ proc check_effective_target_loongarch_call36_support { } {
   } ""]
 }
 +# Returns 1 if binutils supports TLS le Relax, 0 otherwise.
 +proc check_effective_target_tls_le_relax { } {
 +  if [check_effective_target_tls_native] {
 +    return [check_no_compiler_messages loongarch_tls_le_relax object {
 +        /* Assembly code */
 +   lu12i.w $r12, %le_hi20_r(a)
 +    }]
 +  }
 +
 +  return 0;
 +}
 +
 # Return 1 if the target does *not* require strict alignment.
 proc check_effective_target_non_strict_align {} {
 -- 
 2.43.0
--- a/0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
+++ b/0093-LoongArch-Provide-fmin-fmax-RTL-pattern-for-vectors.patch
@ -0,0 +1,112 @@
 From 97081ba053424e35b1869a00d6ac0e84362d09ea Mon Sep 17 00:00:00 2001
 From: Xi Ruoyao <xry111@xry111.site>
 Date: Sat, 30 Dec 2023 21:40:11 +0800
 Subject: [PATCH 093/188] LoongArch: Provide fmin/fmax RTL pattern for vectors
 We already had smin/smax RTL pattern using vfmin/vfmax instructions.
 But for smin/smax, it's unspecified what will happen if either operand
 contains any NaN operands.  So we would not vectorize the loop with
 -fno-finite-math-only (the default for all optimization levels expect
 -Ofast).
 But, LoongArch vfmin/vfmax instruction is IEEE-754-2008 conformant so we
 can also use them and vectorize the loop.
 gcc/ChangeLog:
 	* config/loongarch/simd.md (fmax<mode>3): New define_insn.
 	(fmin<mode>3): Likewise.
 	(reduc_fmax_scal_<mode>3): New define_expand.
 	(reduc_fmin_scal_<mode>3): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vfmax-vfmin.c: New test.
 ---
 gcc/config/loongarch/simd.md                  | 31 +++++++++++++++++++
 .../gcc.target/loongarch/vfmax-vfmin.c        | 31 +++++++++++++++++++
 2 files changed, 62 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
 diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
 index 93fb39abc..8ac1d75a8 100644
 --- a/gcc/config/loongarch/simd.md
 +++ b/gcc/config/loongarch/simd.md
@@ -426,6 +426,37 @@
   [(set_attr "type" "simd_fcmp")
    (set_attr "mode" "<MODE>")])
 +; [x]vf{min/max} instructions are IEEE-754-2008 conforming, use them for
 +; the corresponding IEEE-754-2008 operations.  We must use UNSPEC instead
 +; of smin/smax though, see PR105414 and PR107013.
 +
 +(define_int_iterator UNSPEC_FMAXMIN [UNSPEC_FMAX UNSPEC_FMIN])
 +(define_int_attr fmaxmin [(UNSPEC_FMAX "fmax") (UNSPEC_FMIN "fmin")])
 +
 +(define_insn "<fmaxmin><mode>3"
 +  [(set (match_operand:FVEC 0 "register_operand" "=f")
 +	(unspec:FVEC [(match_operand:FVEC 1 "register_operand" "f")
 +		      (match_operand:FVEC 2 "register_operand" "f")]
 +		     UNSPEC_FMAXMIN))]
 +  ""
 +  "<x>v<fmaxmin>.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
 +  [(set_attr "type" "simd_fminmax")
 +   (set_attr "mode" "<MODE>")])
 +
 +;; ... and also reduc operations.
 +(define_expand "reduc_<fmaxmin>_scal_<mode>"
 +  [(match_operand:<UNITMODE> 0 "register_operand")
 +   (match_operand:FVEC 1 "register_operand")
 +   (const_int UNSPEC_FMAXMIN)]
 +  ""
 +{
 +  rtx tmp = gen_reg_rtx (<MODE>mode);
 +  loongarch_expand_vector_reduc (gen_<fmaxmin><mode>3, tmp, operands[1]);
 +  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
 +					      const0_rtx));
 +  DONE;
 +})
 +
 ; The LoongArch SX Instructions.
 (include "lsx.md")
 diff --git a/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
 new file mode 100644
 index 000000000..811fee361
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vfmax-vfmin.c
@@ -0,0 +1,31 @@
 +/* { dg-do compile } */
 +/* { dg-options "-O2 -mtune=la464 -mlasx" } */
 +/* { dg-final { scan-assembler "\tvfmin\\.d" } } */
 +/* { dg-final { scan-assembler "\tvfmax\\.d" } } */
 +/* { dg-final { scan-assembler "\txvfmin\\.d" } } */
 +/* { dg-final { scan-assembler "\txvfmax\\.d" } } */
 +/* { dg-final { scan-assembler "\tvfmin\\.s" } } */
 +/* { dg-final { scan-assembler "\tvfmax\\.s" } } */
 +/* { dg-final { scan-assembler "\txvfmin\\.s" } } */
 +/* { dg-final { scan-assembler "\txvfmax\\.s" } } */
 +
 +#define T(OP) __typeof__ (__builtin_##OP (0, 0))
 +
 +#define TEST(OP, LEN) \
 +void \
 +test_##OP##LEN (T (OP) *restrict dest, \
 +		const T (OP) *restrict src1, \
 +		const T (OP) *restrict src2) \
 +{ \
 +  for (int i = 0; i < LEN / sizeof (T(OP)); i++) \
 +    dest[i] = __builtin_##OP (src1[i], src2[i]); \
 +}
 +
 +TEST(fmin, 16)
 +TEST(fmax, 16)
 +TEST(fmin, 32)
 +TEST(fmax, 32)
 +TEST(fminf, 16)
 +TEST(fmaxf, 16)
 +TEST(fminf, 32)
 +TEST(fmaxf, 32)
 -- 
 2.43.0
--- a/0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
+++ b/0094-LoongArch-Merge-constant-vector-permuatation-impleme.patch
--- a/0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch
+++ b/0095-LoongArch-testsuite-Fix-FAIL-in-lasx-xvstelm.c-file.patch
@ -0,0 +1,34 @@
 From 6263acd411b9685ebc7b16d19b91aad39cb7e184 Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Fri, 29 Dec 2023 09:45:15 +0800
 Subject: [PATCH 095/188] LoongArch: testsuite:Fix FAIL in lasx-xvstelm.c file.
 After implementing the cost model on the LoongArch architecture, the GCC
 compiler code has this feature turned on by default, which causes the
 lasx-xvstelm.c file test to fail. Through analysis, this test case can
 generate vectorization instructions required for detection only after
 disabling the functionality of the cost model with the "-fno-vect-cost-model"
 compilation option.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c:Add compile
 	option "-fno-vect-cost-model" to dg-options.
 ---
 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
 index 1a7b0e86f..4b846204a 100644
 --- a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
 +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
 -/* { dg-options "-O3 -mlasx" } */
 +/* { dg-options "-O3 -mlasx -fno-vect-cost-model" } */
 /* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
 #define LEN 256
 -- 
 2.43.0
--- a/0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch
+++ b/0096-LoongArch-testsuite-Modify-the-test-behavior-of-the-.patch
@ -0,0 +1,47 @@
 From c21f2c7e6c2385a3783977bbca79ebe178d0d141 Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Fri, 5 Jan 2024 11:43:24 +0800
 Subject: [PATCH 096/188] LoongArch: testsuite:Modify the test behavior of the
 vect-bic-bitmask-{12, 23}.c file.
 Before modifying the test behavior of the program, dg-do is set to assemble in
 vect-bic-bitmask-{12,23}.c. However, when the binutils library does not support
 the vector instruction set, it will FAIL to recognize the vector instruction
 and fail item will appear in the assembly stage. So set the program's dg-do to
 compile.
 gcc/testsuite/ChangeLog:
 	* gcc.dg/vect/vect-bic-bitmask-12.c: Change the default
 	setting of assembly to compile.
 	* gcc.dg/vect/vect-bic-bitmask-23.c: Dito.
 ---
 gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c | 2 +-
 gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
 index 36ec5a8b1..213e4c2a4 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-12.c
@@ -1,5 +1,5 @@
 /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 -/* { dg-do assemble } */
 +/* { dg-do compile } */
 /* { dg-additional-options "-O3 -fdump-tree-dce -w" } */
 #include <stdint.h>
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
 index 5b4c3b6e1..5dceb4bbc 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-bic-bitmask-23.c
@@ -1,5 +1,5 @@
 /* { dg-skip-if "missing optab for vectorization" { sparc*-*-* } } */
 -/* { dg-do assemble } */
 +/* { dg-do compile } */
 /* { dg-additional-options "-O1 -fdump-tree-dce -w" } */
 #include <stdint.h>
 -- 
 2.43.0
--- a/0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch
+++ b/0097-LoongArch-testsuite-Delete-the-default-run-behavior-.patch
@ -0,0 +1,31 @@
 From cdee2d1e7391d95bf6fd471fddcb86ee81247929 Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Fri, 5 Jan 2024 11:43:27 +0800
 Subject: [PATCH 097/188] LoongArch: testsuite:Delete the default run behavior
 in pr60510.f.
 When binutils does not support vector instruction sets, the test program fails
 because it does not recognize vectorization at the assembly stage. Therefore,
 the default run behavior of the program is deleted, so that the behavior of
 the program depends on whether the software supports vectorization.
 gcc/testsuite/ChangeLog:
 	* gfortran.dg/vect/pr60510.f: Delete the default behavior of the
 	program.
 ---
 gcc/testsuite/gfortran.dg/vect/pr60510.f | 1 -
 1 file changed, 1 deletion(-)
 diff --git a/gcc/testsuite/gfortran.dg/vect/pr60510.f b/gcc/testsuite/gfortran.dg/vect/pr60510.f
 index ecd50dd55..c1e11b27d 100644
 --- a/gcc/testsuite/gfortran.dg/vect/pr60510.f
 +++ b/gcc/testsuite/gfortran.dg/vect/pr60510.f
@@ -1,4 +1,3 @@
 -! { dg-do run }
 ! { dg-require-effective-target vect_double }
 ! { dg-require-effective-target vect_intdouble_cvt }
 ! { dg-additional-options "-fno-inline -ffast-math" }
 -- 
 2.43.0
--- a/0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
+++ b/0098-LoongArch-testsuite-Added-additional-vectorization-m.patch
@ -0,0 +1,157 @@
 From c8fa8efa3297ebced55da8a69cf44f314573be7c Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Fri, 5 Jan 2024 11:43:28 +0800
 Subject: [PATCH 098/188] LoongArch: testsuite:Added additional vectorization
 "-mlasx" compilation option.
 In the LoongArch architecture, the reason for not adding the 128-bit
 vector-width-*hi* instruction template in the GCC back end is that it causes
 program performance loss, so we can only add the "-mlasx" compilation option
 to use 256-bit vectorization functions in test files.
 gcc/testsuite/ChangeLog:
 	* gcc.dg/vect/bb-slp-pattern-1.c: If you are testing on the
 	LoongArch architecture, you need to add the "-mlasx" compilation
 	option to generate vectorized code.
 	* gcc.dg/vect/slp-widen-mult-half.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-const-s16.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-const-u16.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-half-u8.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-half.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-u16.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-u8-s16-s32.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-u8-u32.c: Dito.
 	* gcc.dg/vect/vect-widen-mult-u8.c: Dito.
 ---
 gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c           | 1 +
 gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c        | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c  | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c  | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c    | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c       | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c        | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c     | 1 +
 gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c         | 1 +
 10 files changed, 10 insertions(+)
 diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
 index 47b1a4366..52ffca82a 100644
 --- a/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
 +++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pattern-1.c
@@ -1,4 +1,5 @@
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
 #include <stdarg.h>
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
 index e3bfee333..cd44e551f 100644
 --- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
 +++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
@@ -1,6 +1,7 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
 index 4c95dd201..082c758cb 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
@@ -2,6 +2,7 @@
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-fno-ipa-icf" } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
 index 4075f815c..a95e617ad 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
@@ -2,6 +2,7 @@
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-fno-ipa-icf" } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
 index c4ac88e18..14d96645a 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
@@ -2,6 +2,7 @@
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 /* { dg-additional-options "-fno-ipa-icf" } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
 index ebbf4f5e8..7901dae85 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
@@ -1,6 +1,7 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
 index 2e28baae0..21b39953e 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
@@ -1,6 +1,7 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include <stdarg.h>
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
 index d277f0b2b..4827e11b2 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
@@ -1,6 +1,7 @@
 /* Disabling epilogues until we find a better way to deal with scans.  */
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include <stdarg.h>
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
 index f50358802..87eb9e0cb 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-u32.c
@@ -1,5 +1,6 @@
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-* } } */
 #include <stdarg.h>
 #include "tree-vect.h"
 diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
 index 03d137941..507d30c35 100644
 --- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
 +++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
@@ -1,5 +1,6 @@
 /* { dg-additional-options "--param vect-epilogues-nomask=0" } */
 /* { dg-require-effective-target vect_int } */
 +/* { dg-additional-options "-mlasx" { target loongarch*-*-*} } */
 #include <stdarg.h>
 #include "tree-vect.h"
 -- 
 2.43.0
--- a/0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch
+++ b/0099-LoongArch-testsuite-Give-up-the-detection-of-the-gcc.patch
@ -0,0 +1,80 @@
 From df18d0c85049402b8f2f44c3c4e013a0b6d91cee Mon Sep 17 00:00:00 2001
 From: chenxiaolong <chenxiaolong@loongson.cn>
 Date: Fri, 5 Jan 2024 11:43:29 +0800
 Subject: [PATCH 099/188] LoongArch: testsuite:Give up the detection of the
 gcc.dg/fma-{3, 4, 6, 7}.c file.
 On the LoongArch architecture, the above four test cases need to be waived
 during testing. There are two situations:
 1. The function of fma-{3,6}.c test is to find the value of c-a*b, but on
 the LoongArch architecture, the function of the existing fnmsub instruction
 is to find the value of -(a*b - c);
 2. The function of fma-{4,7}.c test is to find the value of -(a*b)-c, but on
 the LoongArch architecture, the function of the existing fnmadd instruction
 is to find the value of -(a*b + c);
 Through the analysis of the above two cases, there will be positive and
 negative zero inequality.
 gcc/testsuite/ChangeLog
 	* gcc.dg/fma-3.c: The intermediate file corresponding to the
 	function does not produce the corresponding FNMA symbol, so the test
 	rules should be skipped when testing.
 	* gcc.dg/fma-4.c: The intermediate file corresponding to the
 	function does not produce the corresponding FNMS symbol, so skip the
 	test rules when testing.
 	* gcc.dg/fma-6.c: The cause is the same as fma-3.c.
 	* gcc.dg/fma-7.c: The cause is the same as fma-4.c
 ---
 gcc/testsuite/gcc.dg/fma-3.c | 2 +-
 gcc/testsuite/gcc.dg/fma-4.c | 2 +-
 gcc/testsuite/gcc.dg/fma-6.c | 2 +-
 gcc/testsuite/gcc.dg/fma-7.c | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)
 diff --git a/gcc/testsuite/gcc.dg/fma-3.c b/gcc/testsuite/gcc.dg/fma-3.c
 index 699aa2c95..6649b54b6 100644
 --- a/gcc/testsuite/gcc.dg/fma-3.c
 +++ b/gcc/testsuite/gcc.dg/fma-3.c
@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
   return c - a * b;
 }
 -/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target scalar_all_fma } } } */
 +/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
 diff --git a/gcc/testsuite/gcc.dg/fma-4.c b/gcc/testsuite/gcc.dg/fma-4.c
 index bff928f1f..f1701c196 100644
 --- a/gcc/testsuite/gcc.dg/fma-4.c
 +++ b/gcc/testsuite/gcc.dg/fma-4.c
@@ -12,4 +12,4 @@ f2 (double a, double b, double c)
   return -(a * b) - c;
 }
 -/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target scalar_all_fma } } } */
 +/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 2 "widening_mul" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
 diff --git a/gcc/testsuite/gcc.dg/fma-6.c b/gcc/testsuite/gcc.dg/fma-6.c
 index 87258cec4..9e49b62b6 100644
 --- a/gcc/testsuite/gcc.dg/fma-6.c
 +++ b/gcc/testsuite/gcc.dg/fma-6.c
@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
   return -__builtin_fma (a, b, -c);
 }
 -/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target scalar_all_fma } } } */
 +/* { dg-final { scan-tree-dump-times { = \.FNMA \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
 diff --git a/gcc/testsuite/gcc.dg/fma-7.c b/gcc/testsuite/gcc.dg/fma-7.c
 index f409cc8ee..86aacad7b 100644
 --- a/gcc/testsuite/gcc.dg/fma-7.c
 +++ b/gcc/testsuite/gcc.dg/fma-7.c
@@ -64,4 +64,4 @@ f10 (double a, double b, double c)
   return -__builtin_fma (a, b, c);
 }
 -/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target scalar_all_fma } } } */
 +/* { dg-final { scan-tree-dump-times { = \.FNMS \(} 14 "optimized" { target { scalar_all_fma && { ! loongarch*-*-* } } } } } */
 -- 
 2.43.0
--- a/0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
+++ b/0100-LoongArch-Fixed-the-problem-of-incorrect-judgment-of.patch
@ -0,0 +1,206 @@
 From 90db6906a92b685403d9220e94f779737d2dd100 Mon Sep 17 00:00:00 2001
 From: Lulu Cheng <chenglulu@loongson.cn>
 Date: Thu, 4 Jan 2024 10:37:53 +0800
 Subject: [PATCH 100/188] LoongArch: Fixed the problem of incorrect judgment of
 the immediate field of the [x]vld/[x]vst instruction.
 The [x]vld/[x]vst directive is defined as follows:
  [x]vld/[x]vst {x/v}d, rj, si12
 When not modified, the immediate field of [x]vld/[x]vst is between 10 and
 14 bits depending on the type. However, in loongarch_valid_offset_p, the
 immediate field is restricted first, so there is no error. However, in
 some cases redundant instructions will be generated, see test cases.
 Now modify it according to the description in the instruction manual.
 gcc/ChangeLog:
 	* config/loongarch/lasx.md (lasx_mxld_<lasxfmt_f>):
 	Modify the method of determining the memory offset of [x]vld/[x]vst.
 	(lasx_mxst_<lasxfmt_f>): Likewise.
 	* config/loongarch/loongarch.cc (loongarch_valid_offset_p): Delete.
 	(loongarch_address_insns): Likewise.
 	* config/loongarch/lsx.md (lsx_ld_<lsxfmt_f>): Likewise.
 	(lsx_st_<lsxfmt_f>): Likewise.
 	* config/loongarch/predicates.md (aq10b_operand): Likewise.
 	(aq10h_operand): Likewise.
 	(aq10w_operand): Likewise.
 	(aq10d_operand): Likewise.
 gcc/testsuite/ChangeLog:
 	* gcc.target/loongarch/vect-ld-st-imm12.c: New test.
 ---
 gcc/config/loongarch/lasx.md                  | 26 -------------------
 gcc/config/loongarch/loongarch.cc             | 19 +++-----------
 gcc/config/loongarch/lsx.md                   | 26 -------------------
 gcc/config/loongarch/predicates.md            | 16 ------------
 .../gcc.target/loongarch/vect-ld-st-imm12.c   | 15 +++++++++++
 5 files changed, 19 insertions(+), 83 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
 diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
 index dbbf5a136..95c6bae20 100644
 --- a/gcc/config/loongarch/lasx.md
 +++ b/gcc/config/loongarch/lasx.md
@@ -846,32 +846,6 @@
   DONE;
 })
 -;; Offset load
 -(define_expand "lasx_mxld_<lasxfmt_f>"
 -  [(match_operand:LASX 0 "register_operand")
 -   (match_operand 1 "pmode_register_operand")
 -   (match_operand 2 "aq10<lasxfmt>_operand")]
 -  "ISA_HAS_LASX"
 -{
 -  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
 -				      INTVAL (operands[2]));
 -  loongarch_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
 -  DONE;
 -})
 -
 -;; Offset store
 -(define_expand "lasx_mxst_<lasxfmt_f>"
 -  [(match_operand:LASX 0 "register_operand")
 -   (match_operand 1 "pmode_register_operand")
 -   (match_operand 2 "aq10<lasxfmt>_operand")]
 -  "ISA_HAS_LASX"
 -{
 -  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
 -			    INTVAL (operands[2]));
 -  loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
 -  DONE;
 -})
 -
 ;; LASX
 (define_insn "add<mode>3"
   [(set (match_operand:ILASX 0 "register_operand" "=f,f,f")
 diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
 index 9d2374a46..ddb32cea2 100644
 --- a/gcc/config/loongarch/loongarch.cc
 +++ b/gcc/config/loongarch/loongarch.cc
@@ -2123,21 +2123,11 @@ loongarch_valid_offset_p (rtx x, machine_mode mode)
   /* We may need to split multiword moves, so make sure that every word
      is accessible.  */
 -  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
 +  if (!(LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
 +      && GET_MODE_SIZE (mode) > UNITS_PER_WORD
       && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD))
     return false;
 -  /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
 -  if (LSX_SUPPORTED_MODE_P (mode)
 -      && !loongarch_signed_immediate_p (INTVAL (x), 10,
 -					loongarch_ldst_scaled_shift (mode)))
 -    return false;
 -
 -  /* LASX XVLD.B and XVST.B supports 10-bit signed offsets without shift.  */
 -  if (LASX_SUPPORTED_MODE_P (mode)
 -      && !loongarch_signed_immediate_p (INTVAL (x), 10, 0))
 -    return false;
 -
   return true;
 }
@@ -2372,9 +2362,8 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
       case ADDRESS_REG:
 	if (lsx_p)
 	  {
 -	    /* LSX LD.* and ST.* supports 10-bit signed offsets.  */
 -	    if (loongarch_signed_immediate_p (INTVAL (addr.offset), 10,
 -					      loongarch_ldst_scaled_shift (mode)))
 +	    /* LSX LD.* and ST.* supports 12-bit signed offsets.  */
 +	    if (IMM12_OPERAND (INTVAL (addr.offset)))
 	      return 1;
 	    else
 	      return 0;
 diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
 index 3e3248ef4..02e89247b 100644
 --- a/gcc/config/loongarch/lsx.md
 +++ b/gcc/config/loongarch/lsx.md
@@ -812,32 +812,6 @@
   DONE;
 })
 -;; Offset load
 -(define_expand "lsx_ld_<lsxfmt_f>"
 -  [(match_operand:LSX 0 "register_operand")
 -   (match_operand 1 "pmode_register_operand")
 -   (match_operand 2 "aq10<lsxfmt>_operand")]
 -  "ISA_HAS_LSX"
 -{
 -  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
 -			    INTVAL (operands[2]));
 -  loongarch_emit_move (operands[0], gen_rtx_MEM (<MODE>mode, addr));
 -  DONE;
 -})
 -
 -;; Offset store
 -(define_expand "lsx_st_<lsxfmt_f>"
 -  [(match_operand:LSX 0 "register_operand")
 -   (match_operand 1 "pmode_register_operand")
 -   (match_operand 2 "aq10<lsxfmt>_operand")]
 -  "ISA_HAS_LSX"
 -{
 -  rtx addr = plus_constant (GET_MODE (operands[1]), operands[1],
 -			    INTVAL (operands[2]));
 -  loongarch_emit_move (gen_rtx_MEM (<MODE>mode, addr), operands[0]);
 -  DONE;
 -})
 -
 ;; Integer operations
 (define_insn "add<mode>3"
   [(set (match_operand:ILSX 0 "register_operand" "=f,f,f")
 diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
 index 3698b9103..824a85b36 100644
 --- a/gcc/config/loongarch/predicates.md
 +++ b/gcc/config/loongarch/predicates.md
@@ -167,22 +167,6 @@
   (and (match_code "const_int")
        (match_test "loongarch_signed_immediate_p (INTVAL (op), 8, 3)")))
 -(define_predicate "aq10b_operand"
 -  (and (match_code "const_int")
 -       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 0)")))
 -
 -(define_predicate "aq10h_operand"
 -  (and (match_code "const_int")
 -       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 1)")))
 -
 -(define_predicate "aq10w_operand"
 -  (and (match_code "const_int")
 -       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 2)")))
 -
 -(define_predicate "aq10d_operand"
 -  (and (match_code "const_int")
 -       (match_test "loongarch_signed_immediate_p (INTVAL (op), 10, 3)")))
 -
 (define_predicate "aq12b_operand"
   (and (match_code "const_int")
        (match_test "loongarch_signed_immediate_p (INTVAL (op), 12, 0)")))
 diff --git a/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
 new file mode 100644
 index 000000000..bfc208e4f
 --- /dev/null
 +++ b/gcc/testsuite/gcc.target/loongarch/vect-ld-st-imm12.c
@@ -0,0 +1,15 @@
 +/* { dg-do compile } */
 +/* { dg-options "-march=loongarch64 -mabi=lp64d -mlasx -O2" } */
 +/* { dg-final { scan-assembler-not "addi.d" } } */
 +
 +extern short a[1000];
 +extern short b[1000];
 +extern short c[1000];
 +
 +void
 +test (void)
 +{
 +  for (int i = 501; i < 517; i++)
 +    ((int *)(c + 1))[i] = ((int *)(a + 1))[i] + ((int *)(b + 1))[i];
 +}
 +
 -- 
 2.43.0
--- a/Show More
+++ b/Show More