!436 [Sync]Sync from openEuler-24.03-LTS

From: @jubo-run Reviewed-by: @huang-xiaoquan Signed-off-by: @huang-xiaoquan
2024-05-31 02:56:24 +00:00 · 2024-05-31 02:56:24 +00:00 · 14054239e6
commit 14054239e6
parent 18582a2742 64c9539b9a
170 changed files with 325236 additions and 9 deletions
--- a/0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
+++ b/0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
--- a/0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
+++ b/0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
--- a/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
+++ b/0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
@ -0,0 +1,550 @@
+From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Tue, 5 Dec 2023 15:33:08 +0800
+Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment
+ 1/3
+
+---
+ gcc/common.opt |   8 +
+ gcc/final.cc   | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
+ gcc/opts.cc    |  61 ++++++++
+ 3 files changed, 473 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b01df919e..e69947fc2 100644
+--- a/gcc/common.opt
+++ b/gcc/common.opt
+@@ -2546,6 +2546,14 @@ freorder-functions
+ Common Var(flag_reorder_functions) Optimization
+ Reorder functions to improve code placement.
+ 
+fauto-bolt
+Common Var(flag_auto_bolt)
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
+
+fauto-bolt=
+Common Joined RejectNegative
+Specify the feedback data directory required by BOLT-plugin.  The default is the current directory.
+
+ frerun-cse-after-loop
+ Common Var(flag_rerun_cse_after_loop) Optimization
+ Add a common subexpression elimination pass after loop optimizations.
+diff --git a/gcc/final.cc b/gcc/final.cc
+index a9868861b..d4c4fa08f 100644
+--- a/gcc/final.cc
+++ b/gcc/final.cc
+@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "rtl-iter.h"
+ #include "print-rtl.h"
+ #include "function-abi.h"
+#include "insn-codes.h"
+ #include "common/common-target.h"
+ 
+ #ifdef XCOFF_DEBUGGING_INFO
+@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
+       }
+ }
+ #endif
+-
+
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
+
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
+
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
+
+/* Return the relative offset address of the start instruction of BB,
+   return -1 if it is empty instruction.    */
+
+static int 
+get_bb_start_addr (basic_block bb)
+{
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	{
+	  continue;
+	}
+      /* The jump target of call is not in this function, so
+	 it should be excluded.    */
+      if (CALL_P (insn))
+        {
+	  return -1;
+	}
+
+      int insn_code = recog_memoized (insn);
+
+      /* The instruction NOP in llvm-bolt belongs to the previous
+	 BB, so it needs to be skipped.   */
+      if (insn_code != CODE_FOR_nop)
+        {
+	  return INSN_ADDRESSES (INSN_UID (insn));
+	}
+    }
+  return -1;
+}
+
+/* Return the relative offet address of the end instruction of BB,
+   return -1 if it is empty or call instruction.    */
+
+static int
+get_bb_end_addr (basic_block bb)
+{
+  rtx_insn *insn;
+  int num_succs = EDGE_COUNT (bb->succs);
+  FOR_BB_INSNS_REVERSE (bb, insn)
+    {
+      if (!INSN_P (insn))
+        {
+	  continue;
+	}
+      /* The jump target of call is not in this function, so
+	 it should be excluded.     */
+      if (CALL_P (insn))
+        {
+	  return -1;
+	}
+      if ((num_succs == 1)
+	   || ((num_succs == 2) && any_condjump_p (insn)))
+	{
+	  return INSN_ADDRESSES (INSN_UID (insn));
+	}
+      else
+        {
+	  return -1;
+	}
+    }
+  return -1;
+}
+
+/* Return the end address of cfun.    */
+
+static int 
+get_function_end_addr ()
+{
+  rtx_insn *insn = get_last_insn ();
+  for (; insn != get_insns (); insn = PREV_INSN (insn))
+    {
+      if (!INSN_P (insn))
+        {
+	  continue;
+	}
+      return INSN_ADDRESSES (INSN_UID (insn));
+    }
+	  
+  return -1;
+} 
+
+/* Return the function profile status string.    */
+
+static const char * 
+get_function_profile_status () 
+{
+  const char *profile_status[] = {
+    "PROFILE_ABSENT",
+    "PROFILE_GUESSED",
+    "PROFILE_READ",
+    "PROFILE_LAST"     /* Last value, used by profile streaming.    */
+  };
+
+  return profile_status[profile_status_for_fn (cfun)];
+}
+
+/* Return the count from the feedback data, such as PGO or ADDO.    */
+
+inline static gcov_type 
+get_fdo_count (profile_count count)
+{
+  return count.quality () >= GUESSED 
+         ? count.to_gcov_type () : 0;
+}
+
+/* Return the profile quality string.    */
+
+static const char *
+get_fdo_count_quality (profile_count count)
+{
+  const char *profile_quality[] = {
+    "UNINITIALIZED_PROFILE",
+    "GUESSED_LOCAL",
+    "GUESSED_GLOBAL0",
+    "GUESSED_GLOBAL0_ADJUSTED",
+    "GUESSED",
+    "AFDO",
+    "ADJUSTED",
+    "PRECISE"
+  };
+
+  return profile_quality[count.quality ()];
+}
+
+static const char *
+alias_local_functions (const char *fnname)
+{
+  if (TREE_PUBLIC (cfun->decl))
+    {
+      return fnname;
+    }
+  return concat (fnname, "/", lbasename (dump_base_name), NULL);
+}
+
+/* Return function bind type string.    */
+
+static const char * 
+simple_get_function_bind ()
+{
+  const char *function_bind[] = {
+    "GLOBAL",
+    "WEAK",
+    "LOCAL",
+    "UNKNOWN"
+  };
+
+  if (TREE_PUBLIC (cfun->decl))
+    {
+      if (!(DECL_WEAK (cfun->decl)))
+        {
+	  return function_bind[0];
+	}
+      else
+        {
+	  return function_bind[1];
+	}
+    }
+  else  
+    {
+      return function_bind[2];
+    }
+		
+  return function_bind[3];
+}
+
+/* Dumo the callee functions insn in bb by CALL_P (insn).   */
+
+static void 
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+{
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (insn && CALL_P (insn))
+        {
+	  tree callee = get_call_fndecl (insn);
+
+	  if (callee)
+	    {
+	      fprintf (asm_out_file, "\t.string \"%x\"\n",
+		       INSN_ADDRESSES (INSN_UID (insn)));
+
+	      fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+		       ASM_FDO_CALLEE_FLAG,
+                       alias_local_functions (get_fnname_from_decl (callee)));
+
+              fprintf (asm_out_file,
+                       "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+                       call_count);
+
+              if (dump_file)
+                {
+                  fprintf (dump_file, "call: %x --> %s \n",
+                           INSN_ADDRESSES (INSN_UID (insn)),
+                           alias_local_functions
+                           (get_fnname_from_decl (callee)));
+                }
+            }
+        }
+     } 
+}
+
+/* Dump the edge info into asm.    */
+static int
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+  edge e;
+  edge_iterator ei;
+  gcov_type edge_total_count = 0;
+
+  FOR_EACH_EDGE (e, ei, bb->succs)
+    {
+      gcov_type edge_count = get_fdo_count (e->count ());
+      edge_total_count += edge_count;
+
+      int edge_start_addr = get_bb_end_addr (e->src);
+      int edge_end_addr = get_bb_start_addr(e->dest);
+
+      if (edge_start_addr == -1 || edge_end_addr == -1)
+        {
+          continue;
+        }
+      
+      /* This is a reserved assert for the original design.    If this
+         assert is found, use the address of the previous instruction
+         as edge_start_addr.   */
+      gcc_assert (edge_start_addr != edge_end_addr);
+
+      if (dump_file)
+        {
+          fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
+                   edge_start_addr, edge_end_addr, edge_count);
+        }
+
+      if (edge_count > 0)
+        {
+          fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
+          fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
+          fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+                  edge_count);
+        }
+    }
+
+    gcov_type call_count = MAX (edge_total_count, bb_count);
+    if (call_count > 0)
+      {
+        dump_direct_callee_info_to_asm (bb, call_count);
+      }
+}
+
+/* Dump the bb info into asm.    */
+
+static void 
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+{
+  int bb_start_addr = get_bb_start_addr (bb);
+  if (bb_start_addr != -1)
+    {
+      fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
+      fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+               bb_count);
+    }
+}
+
+/* Dump the function info into asm.    */
+
+static void 
+dump_function_info_to_asm (const char *fnname)
+{
+  fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+           ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+  fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+           ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+  fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+           ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n FUNC_NAME: %s\n",
+               alias_local_functions (fnname));
+      fprintf (dump_file, " file: %s\n",
+               dump_base_name);
+      fprintf (dump_file, "profile_status: %s\n",
+               get_function_profile_status ());
+      fprintf (dump_file, " size: %x\n",
+               get_function_end_addr ());
+      fprintf (dump_file, " function_bind: %s\n",
+               simple_get_function_bind ());
+    }
+}
+
+/* Dump function profile into form AutoFDO or PGO to asm.    */
+
+static void
+dump_fdo_info_to_asm (const char *fnname)
+{
+  basic_block bb;
+
+  dump_function_info_to_asm (fnname);
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      gcov_type bb_count = get_fdo_count (bb->count);
+      if (bb_count == 0)
+        {
+          continue;
+        }
+      
+      if (dump_file)
+        {
+          fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
+                   get_bb_start_addr (bb), get_bb_end_addr (bb),
+                   bb_count, get_fdo_count_quality (bb->count));
+        }
+
+      if (flag_profile_use) 
+        {
+          dump_edge_jump_info_to_asm (bb, bb_count);
+        }
+      else if (flag_auto_profile)
+        {
+          dump_bb_info_to_asm (bb, bb_count);
+        }
+    }
+}
+
+/* When -fauto-bolt option is turnded on, the .text.fdo section 
+   will be generated in the *.s file if there is feedback information
+   from PGO or AutoFDO. This section will parserd in BOLT-plugin.    */
+
+static void 
+dump_profile_to_elf_sections ()
+{
+  if (!flag_function_sections)
+    {
+      error ("-fauto-bolt should work with -ffunction-section");
+      return;
+    }
+  if (!flag_ipa_ra)
+    {
+      error ("-fauto-bolt should work with -fipa-ra");
+      return;
+    }
+  if (flag_align_jumps)
+    {
+      error ("-fauto-bolt is not supported with -falign-jumps");
+      return;
+    }
+  if (flag_align_labels)
+    {
+      error ("-fauto-bolt is not spported with -falign-loops");
+      return;
+    }
+  if (flag_align_loops)
+    {
+      error ("-fauto-bolt is not supported with -falign-loops");
+      return;
+    }
+  
+  /* Return if no feedback data.    */
+  if (!flag_profile_use && !flag_auto_profile)
+    {
+      error ("-fauto-bolt should use with -profile-use or -fauto-profile");
+      return;
+    }
+  
+  /* Avoid empty functions.    */
+  if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
+    {
+      return;
+    }
+  int flags = SECTION_DEBUG | SECTION_EXCLUDE;
+  const char *fnname = get_fnname_from_decl (current_function_decl);
+  char *profile_fnname = NULL;
+
+  asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname);
+  switch_to_section (get_section (profile_fnname, flags, NULL));
+  dump_fdo_info_to_asm (fnname);
+
+  if (profile_fnname)
+    {
+      free (profile_fnname);
+      profile_fnname = NULL;
+    }
+}
+
+ /* Turn the RTL into assembly.  */
+ static unsigned int
+ rest_of_handle_final (void)
+@@ -4334,6 +4731,12 @@ rest_of_handle_final (void)
+     targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
+ 				decl_fini_priority_lookup
+ 				  (current_function_decl));
+    
+    if (flag_auto_bolt)
+      {
+        dump_profile_to_elf_sections ();
+      }
+
+   return 0;
+ }
+ 
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index b868d189e..6d57e7d69 100644
+--- a/gcc/opts.cc
+++ b/gcc/opts.cc
+@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
+   if (opts->x_flag_vtable_verify && opts->x_flag_lto)
+     sorry ("vtable verification is not supported with LTO");
+ 
+  /* Currently -fauto-bolt is not supported for LTO.    */
+  if (opts->x_flag_auto_bolt && opts->x_flag_lto)
+    sorry ("%<-fauto-bolt%> is not supported with LTO");
+
+   /* Control IPA optimizations based on different -flive-patching level.  */
+   if (opts->x_flag_live_patching)
+     control_options_for_live_patching (opts, opts_set,
+@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
+       = (opts->x_flag_unroll_loops
+          || opts->x_flag_peel_loops
+          || opts->x_optimize >= 3);
+  
+  if (opts->x_flag_auto_bolt)
+    {
+      /* Record the function section to facilitate the feedback
+	 data storage.  */
+      if (!opts->x_flag_function_sections)
+        {
+	  inform (loc,
+	          "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
+		  " enabling %<-ffunction-sections%>");
+	  opts->x_flag_function_sections = true;
+	}
+
+      /* Cancel the internal alignment of the function.  The binary
+	 optimizer bolt will cancel the internal alignment optimization
+	 of the function, so the alignment is meaningless at this time,
+	 and if not, it will bring trouble to the calculation of the
+	 offset address of the instruction.  */
+      if (opts->x_flag_align_jumps)
+        {
+	  inform (loc,
+		  "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
+		  " disabling %<-falign-jumps%>");
+	  opts->x_flag_align_jumps = false;
+	}
+
+      if (opts->x_flag_align_labels)
+        {
+	  inform (loc,
+		  "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
+		  " disabling %<-falign-labels%>");
+	          opts->x_flag_align_labels = false;
+	}
+
+      if (opts->x_flag_align_loops)
+        {
+	  inform (loc,
+		  "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
+		  " disabling %<-falign-loops%>");
+	  opts->x_flag_align_loops = false;
+	}
+
+      /* When parsing instructions in RTL phase, we need to know
+	 the call information of instructions to avoid being optimized.  */
+      if (!opts->x_flag_ipa_ra)
+        {
+	  inform (loc,
+		  "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
+		  " enabling %<-fipa-ra%>");
+	  opts->x_flag_ipa_ra = true;
+	}
+    }
+ 
+   /* With -fcx-limited-range, we do cheap and quick complex arithmetic.  */
+   if (opts->x_flag_cx_limited_range)
+@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts,
+ 				&opts->x_flag_align_functions,
+ 				&opts->x_str_align_functions);
+       break;
+    
+    case OPT_fauto_bolt_:
+    case OPT_fauto_bolt:
+      /* Deferred.  */  
+      break;  
+ 
+     case OPT_ftabstop_:
+       /* It is documented that we silently ignore silly values.  */
+-- 
+2.33.0
+
--- a/0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
+++ b/0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
--- a/0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
+++ b/0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
@ -0,0 +1,345 @@
+From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Mon, 22 Jan 2024 15:38:24 +0800
+Subject: [PATCH] [AutoBOLT] Enable BOLT linker plugin on aarch64 3/3
+
+---
+ Makefile.def     | 10 ++++++++++
+ configure        | 27 ++++++++++++++++++++++++++-
+ configure.ac     | 22 +++++++++++++++++++++-
+ gcc/config.host  |  1 +
+ gcc/config.in    | 13 +++++++++++++
+ gcc/configure    | 10 ++++++++--
+ gcc/configure.ac |  4 ++++
+ gcc/gcc.cc       | 23 +++++++++++++++++++++++
+ 8 files changed, 106 insertions(+), 4 deletions(-)
+
+diff --git a/Makefile.def b/Makefile.def
+index 72d585496..0ba868890 100644
+--- a/Makefile.def
+++ b/Makefile.def
+@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
+ host_modules= { module= lto-plugin; bootstrap=true;
+ 		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
+ 		extra_make_flags='@extra_linker_plugin_flags@'; };
+host_modules= { module= bolt-plugin; bootstrap=true;
+		extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
+		extra_make_flags='@extra_linker_plugin_flags@'; };
+ host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
+ host_modules= { module= gotools; };
+ host_modules= { module= libctf; bootstrap=true; };
+@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
+ dependencies = { module=configure-gcc; on=all-mpc; };
+ dependencies = { module=configure-gcc; on=all-isl; };
+ dependencies = { module=configure-gcc; on=all-lto-plugin; };
+dependencies = { module=configure-gcc; on=all-bolt-plugin; };
+ dependencies = { module=configure-gcc; on=all-binutils; };
+ dependencies = { module=configure-gcc; on=all-gas; };
+ dependencies = { module=configure-gcc; on=all-ld; };
+@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
+ dependencies = { module=all-gcc; on=all-libiberty; };
+ dependencies = { module=all-gcc; on=all-fixincludes; };
+ dependencies = { module=all-gcc; on=all-lto-plugin; };
+dependencies = { module=all-gcc; on=all-bolt-plugin; };
+ dependencies = { module=all-gcc; on=all-libiconv; };
+ dependencies = { module=info-gcc; on=all-build-libiberty; };
+ dependencies = { module=dvi-gcc; on=all-build-libiberty; };
+@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
+ dependencies = { module=html-gcc; on=all-build-libiberty; };
+ dependencies = { module=install-gcc ; on=install-fixincludes; };
+ dependencies = { module=install-gcc ; on=install-lto-plugin; };
+dependencies = { module=install-gcc ; on=install-bolt-plugin; };
+ dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
+ dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
+dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
+ 
+ dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
+ dependencies = { module=configure-libcpp; on=configure-intl; };
+@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
+ dependencies = { module=all-lto-plugin; on=all-libiberty; };
+ dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
+ 
+dependencies = { module=all-bolt-plugin; on=all-libiberty; };
+dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
+
+ dependencies = { module=configure-libcc1; on=configure-gcc; };
+ dependencies = { module=all-libcc1; on=all-gcc; };
+ 
+diff --git a/configure b/configure
+index 5dcaab14a..aff62c464 100755
+--- a/configure
+++ b/configure
+@@ -826,6 +826,7 @@ with_isl
+ with_isl_include
+ with_isl_lib
+ enable_isl_version_check
+enable_bolt
+ enable_lto
+ enable_linker_plugin_configure_flags
+ enable_linker_plugin_flags
+@@ -1550,6 +1551,7 @@ Optional Features:
+                           enable the PGO build
+   --disable-isl-version-check
+                           disable check for isl version
+  --enable-bolt           enable bolt optimization support
+   --enable-lto            enable link time optimization support
+   --enable-linker-plugin-configure-flags=FLAGS
+                           additional flags for configuring linker plugins
+@@ -8564,6 +8566,15 @@ fi
+ 
+ 
+ 
+# Check for BOLT support.
+# Check whether --enable-bolt was given.
+if test "${enable_bolt+set}" = set; then :
+  enableval=$enable_bolt; enable_bolt=$enableval
+else
+  enable_bolt=no; default_enable_bolt=no
+fi
+
+
+ # Check for LTO support.
+ # Check whether --enable-lto was given.
+ if test "${enable_lto+set}" = set; then :
+@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
+   # ELF platforms build the lto-plugin always.
+   build_lto_plugin=yes
+ 
+  # ELF platforms can build the bolt-plugin.
+  # NOT BUILD BOLT BY DEFAULT.
+  case $target in
+    aarch64*-*-linux*)
+    if test $enable_bolt = yes; then :
+      build_bolt_plugin=yes
+    fi
+    ;;
+  esac
+
+ else
+   if test x"$default_enable_lto" = x"yes" ; then
+     case $target in
+@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
+     fi
+   fi
+ 
+  if test "${build_bolt_plugin}" = "yes" ; then
+      configdirs="$configdirs bolt-plugin"
+  fi
+
+   # If we're building an offloading compiler, add the LTO front end.
+   if test x"$enable_as_accelerator_for" != x ; then
+     case ,${enable_languages}, in
+@@ -9202,7 +9227,7 @@ fi
+ extra_host_libiberty_configure_flags=
+ extra_host_zlib_configure_flags=
+ case " $configdirs " in
+-  *" lto-plugin "* | *" libcc1 "*)
+  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
+     # When these are to be built as shared libraries, the same applies to
+     # libiberty.
+     extra_host_libiberty_configure_flags=--enable-shared
+diff --git a/configure.ac b/configure.ac
+index 85977482a..f310d75ca 100644
+--- a/configure.ac
+++ b/configure.ac
+@@ -1863,6 +1863,12 @@ fi
+ AC_SUBST(isllibs)
+ AC_SUBST(islinc)
+ 
+# Check for BOLT support.
+AC_ARG_ENABLE(bolt,
+[AS_HELP_STRING([--enable-bolt], [enable bolt optimization support])],
+enable_bolt=$enableval,
+enable_bolt=no; default_enable_bolt=no)
+
+ # Check for LTO support.
+ AC_ARG_ENABLE(lto,
+ [AS_HELP_STRING([--enable-lto], [enable link time optimization support])],
+@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
+ 
+ ACX_ELF_TARGET_IFELSE([# ELF platforms build the lto-plugin always.
+   build_lto_plugin=yes
+
+  # ELF platforms can build the bolt-plugin.
+  # NOT BUILD BOLT BY DEFAULT.
+  case $target in
+    aarch64*-*-linux*)
+    if test $enable_bolt = yes; then :
+      build_bolt_plugin=yes
+    fi
+    ;;
+  esac
+ ],[if test x"$default_enable_lto" = x"yes" ; then
+     case $target in
+       *-apple-darwin[[912]]* | *-cygwin* | *-mingw* | *djgpp*) ;;
+@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
+     fi
+   fi
+ 
+  if test "${build_bolt_plugin}" = "yes" ; then
+      configdirs="$configdirs bolt-plugin"
+  fi
+
+   # If we're building an offloading compiler, add the LTO front end.
+   if test x"$enable_as_accelerator_for" != x ; then
+     case ,${enable_languages}, in
+@@ -2457,7 +2477,7 @@ fi
+ extra_host_libiberty_configure_flags=
+ extra_host_zlib_configure_flags=
+ case " $configdirs " in
+-  *" lto-plugin "* | *" libcc1 "*)
+  *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)    
+     # When these are to be built as shared libraries, the same applies to
+     # libiberty.
+     extra_host_libiberty_configure_flags=--enable-shared
+diff --git a/gcc/config.host b/gcc/config.host
+index 4ca300f11..bf7dcb4cc 100644
+--- a/gcc/config.host
+++ b/gcc/config.host
+@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o
+ host_can_use_collect2=yes
+ use_long_long_for_widest_fast_int=no
+ host_lto_plugin_soname=liblto_plugin.so
+host_bolt_plugin_soname=libbolt_plugin.so
+ 
+ # Unsupported hosts list.  Generally, only include hosts known to fail here,
+ # since we allow hosts not listed to be supported generically.
+diff --git a/gcc/config.in b/gcc/config.in
+index 64c27c9cf..6bb25b25b 100644
+--- a/gcc/config.in
+++ b/gcc/config.in
+@@ -24,6 +24,13 @@
+ #endif
+ 
+ 
+/* Define to the name of the BOLT plugin DSO that must be passed to the
+   linker's -plugin=LIB option. */
+#ifndef USED_FOR_TARGET
+#undef BOLTPLUGINSONAME
+#endif
+
+
+ /* Define to the root for URLs about GCC changes. */
+ #ifndef USED_FOR_TARGET
+ #undef CHANGES_ROOT_URL
+@@ -2208,6 +2215,12 @@
+ #endif
+ 
+ 
+/* Define which stat syscall is able to handle 64bit indodes. */
+#ifndef USED_FOR_TARGET
+#undef HOST_STAT_FOR_64BIT_INODES
+#endif
+
+
+ /* Define as const if the declaration of iconv() needs const. */
+ #ifndef USED_FOR_TARGET
+ #undef ICONV_CONST
+diff --git a/gcc/configure b/gcc/configure
+index 98bbf0f85..30f386789 100755
+--- a/gcc/configure
+++ b/gcc/configure
+@@ -13578,6 +13578,12 @@ case $use_collect2 in
+ esac
+ 
+ 
+cat >>confdefs.h <<_ACEOF
+#define BOLTPLUGINSONAME "${host_bolt_plugin_soname}"
+_ACEOF
+
+
+
+ cat >>confdefs.h <<_ACEOF
+ #define LTOPLUGINSONAME "${host_lto_plugin_soname}"
+ _ACEOF
+@@ -19668,7 +19674,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 19671 "configure"
+#line 19677 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+@@ -19774,7 +19780,7 @@ else
+   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+   lt_status=$lt_dlunknown
+   cat > conftest.$ac_ext <<_LT_EOF
+-#line 19777 "configure"
+#line 19783 "configure"
+ #include "confdefs.h"
+ 
+ #if HAVE_DLFCN_H
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index c74f4b555..dd6cd60f8 100644
+--- a/gcc/configure.ac
+++ b/gcc/configure.ac
+@@ -2531,6 +2531,10 @@ case $use_collect2 in
+     ;;
+ esac
+ 
+AC_DEFINE_UNQUOTED(BOLTPLUGINSONAME,"${host_bolt_plugin_soname}",
+[Define to the name of the BOLT plugin DSO that must be
+  passed to the linker's -plugin=LIB option.])
+
+ AC_DEFINE_UNQUOTED(LTOPLUGINSONAME,"${host_lto_plugin_soname}",
+ [Define to the name of the LTO plugin DSO that must be
+   passed to the linker's -plugin=LIB option.])
+diff --git a/gcc/gcc.cc b/gcc/gcc.cc
+index fbcc9d033..b0d03430e 100644
+--- a/gcc/gcc.cc
+++ b/gcc/gcc.cc
+@@ -1156,6 +1156,8 @@ proper position among the other output files.  */
+ %{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+     %(linker) " \
+     LINK_PLUGIN_SPEC \
+   "%{fauto-bolt|fauto-bolt=*|fbolt-use|fbolt-use=*: \
+    -plugin %(linker_auto_bolt_plugin_file) }"\
+    "%{flto|flto=*:%<fcompare-debug*} \
+     %{flto} %{fno-lto} %{flto=*} %l " LINK_PIE_SPEC \
+    "%{fuse-ld=*:-fuse-ld=%*} " LINK_COMPRESS_DEBUG_SPEC \
+@@ -1210,6 +1212,7 @@ static const char *endfile_spec = ENDFILE_SPEC;
+ static const char *startfile_spec = STARTFILE_SPEC;
+ static const char *linker_name_spec = LINKER_NAME;
+ static const char *linker_plugin_file_spec = "";
+static const char *linker_auto_bolt_plugin_file_spec = "";
+ static const char *lto_wrapper_spec = "";
+ static const char *lto_gcc_spec = "";
+ static const char *post_link_spec = POST_LINK_SPEC;
+@@ -1723,6 +1726,8 @@ static struct spec_list static_specs[] =
+   INIT_STATIC_SPEC ("multilib_reuse",		&multilib_reuse),
+   INIT_STATIC_SPEC ("linker",			&linker_name_spec),
+   INIT_STATIC_SPEC ("linker_plugin_file",	&linker_plugin_file_spec),
+  INIT_STATIC_SPEC ("linker_auto_bolt_plugin_file",
+    &linker_auto_bolt_plugin_file_spec),
+   INIT_STATIC_SPEC ("lto_wrapper",		&lto_wrapper_spec),
+   INIT_STATIC_SPEC ("lto_gcc",			&lto_gcc_spec),
+   INIT_STATIC_SPEC ("post_link",		&post_link_spec),
+@@ -9118,6 +9123,24 @@ driver::maybe_run_linker (const char *argv0) const
+ 	    }
+ #endif
+ 	  set_static_spec_shared (&lto_gcc_spec, argv0);
+
+	  /* Set bolt-plugin.  */
+	  const char *fauto_bolt = "fauto-bolt";
+	  const char *fbolt_use = "fbolt-use";
+	  if (switch_matches (fauto_bolt, fauto_bolt + strlen (fauto_bolt), 1)
+	    || switch_matches (fbolt_use, fbolt_use + strlen (fbolt_use), 1))
+	    {
+	      linker_auto_bolt_plugin_file_spec = find_a_file (&exec_prefixes,
+		BOLTPLUGINSONAME, X_OK, false);
+	      if (!linker_auto_bolt_plugin_file_spec)
+		{
+		  fatal_error (input_location,
+			       "-fauto-bolt or -fbolt-use is used, but %s is not found",
+			       BOLTPLUGINSONAME);
+
+		}
+	    }
+
+ 	}
+ 
+       /* Rebuild the COMPILER_PATH and LIBRARY_PATH environment variables
+-- 
+2.33.0
+
--- a/0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
+++ b/0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
@ -0,0 +1,312 @@
+From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao <zhaozhenyu17@huawei.com>
+Date: Sat, 23 Mar 2024 22:56:09 +0800
+Subject: [PATCH] [Autofdo]Enable discrimibator and MCF algorithm on Autofdo
+
+---
+ gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
+ gcc/cfghooks.cc     |   7 ++
+ gcc/opts.cc         |   5 +-
+ gcc/tree-inline.cc  |  14 ++++
+ 4 files changed, 193 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
+index 2b34b80b8..f45f0ec66 100644
+--- a/gcc/auto-profile.cc
+++ b/gcc/auto-profile.cc
+@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
+   if (name == NULL)
+     return -1;
+   string_index_map::const_iterator iter = map_.find (name);
+  /* Function name may be duplicate.  Try to distinguish by the
+     #file_name#function_name defined by the autofdo tool chain.  */
+  if (iter == map_.end ())
+    {
+      char* file_name = get_original_name (lbasename (dump_base_name));
+      char* file_func_name
+	= concat ("#", file_name, "#", name, NULL);
+      iter = map_.find (file_func_name);
+      free (file_name);
+      free (file_func_name);
+    }
+   if (iter == map_.end ())
+     return -1;
+ 
+@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
+ 
+   for (unsigned i = 0; i < num_pos_counts; i++)
+     {
+-      unsigned offset = gcov_read_unsigned () & 0xffff0000;
+      unsigned offset = gcov_read_unsigned ();
+       unsigned num_targets = gcov_read_unsigned ();
+       gcov_type count = gcov_read_counter ();
+       s->pos_counts[offset].count = count;
+@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
+   function_instance *s = get_function_instance_by_inline_stack (stack);
+   if (s == NULL)
+     return false;
+  if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
+    {
+      return true;
+    }
+   return s->get_count_info (stack[0].second, info);
+ }
+ 
+@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
+     }
+ }
+ 
+/* Process the following scene when the branch probability
+   inversion when do function afdo_propagate (). E.g.
+   BB_NUM (sample count)
+      BB1 (1000)
+       /    \
+    BB2 (10) BB3 (0)
+      \       /
+	BB4
+   In afdo_propagate ().count of BB3 is calculated by
+   COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
+   In fact, BB3 may be colder than BB2 by sample count.
+   This function allocate source BB count to wach succ BB by sample
+   rate, E.g.
+   BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT))  */
+
+static void
+afdo_preprocess_bb_count ()
+{
+  basic_block bb;
+  FOR_ALL_BB_FN (bb, cfun)
+    {
+      if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
+	  && bb->count > profile_count::zero ().afdo ())
+	{
+	  basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
+	  basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
+	  if (single_succ_edge (bb1) && single_succ_edge (bb2)
+	      && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
+	    {
+	      gcov_type max_count = 0;
+	      gcov_type total_count = 0;
+	      edge e;
+	      edge_iterator ei;
+	      FOR_EACH_EDGE (e, ei, bb->succs)
+		{
+		  if (!e->dest->count.ipa_p ())
+		    {
+		      continue;
+		    }
+		  max_count = MAX (max_count, e->dest->count.to_gcov_type ());
+		  total_count += e->dest->count.to_gcov_type ();
+		}
+	      /* Only bb_count > max_count * 2, branch probability will
+		 inversion.  */
+	      if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
+		{
+		  FOR_EACH_EDGE (e, ei, bb->succs)
+		    {
+		      gcov_type target_count = bb->count.to_gcov_type ()
+			* e->dest->count.to_gcov_type ()/ total_count;
+		      e->dest->count
+			= profile_count::from_gcov_type
+			  (target_count).afdo ();
+		    }
+		}
+	    }
+	}
+    }
+}
+
+ /* Propagate counts on control flow graph and calculate branch
+    probabilities.  */
+ 
+@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
+     }
+ 
+   afdo_find_equiv_class (annotated_bb);
+  afdo_preprocess_bb_count ();
+   afdo_propagate (annotated_bb);
+ 
+   FOR_EACH_BB_FN (bb, cfun)
+@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
+   return false;
+ }
+ 
+/* Preparation before executing MCF algorithm.  */
+
+static void
+afdo_init_mcf ()
+{
+  basic_block bb;
+  edge e;
+  edge_iterator ei;
+
+  if (dump_file)
+    {
+      fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
+    }
+
+  /* Step1: when use mcf, BB id must be continous,
+     so we need compact_blocks ().  */
+  compact_blocks ();
+
+  /* Step2: allocate memory for MCF input data.  */
+  bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
+  edge_gcov_counts = new hash_map<edge, gcov_type>;
+
+  /* Step3: init MCF input data from cfg.  */
+  FOR_ALL_BB_FN (bb, cfun)
+    {
+      /* Init BB count for MCF.  */
+      bb_gcov_count (bb) = bb->count.to_gcov_type ();
+
+      gcov_type total_count = 0;
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  total_count += e->dest->count.to_gcov_type ();
+	}
+
+      /* If there is no sample in each successor blocks, source
+	 BB samples are allocated to each edge by branch static prob.  */
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  if (total_count == 0)
+	    {
+	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
+		* e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
+	    }
+	  else
+	    {
+	      edge_gcov_count (e) = e->src->count.to_gcov_type ()
+		* e->dest->count.to_gcov_type () / total_count;
+	    }
+	}
+    }
+}
+
+
+/* Free the resources used by MCF and reset BB count from MCF result.
+   branch probability has been updated in mcf_smooth_cfg ().  */
+
+static void
+afdo_process_after_mcf ()
+{
+  basic_block bb;
+  /* Reset BB count from MCF result.  */
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      if (bb_gcov_count (bb))
+	{
+	  bb->count
+	    = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
+	}
+    }
+
+    /* Clean up MCF resource.  */
+    bb_gcov_counts.release ();
+    delete edge_gcov_counts;
+    edge_gcov_counts = NULL;
+}
+
+ /* Annotate auto profile to the control flow graph. Do not annotate value
+    profile for stmts in PROMOTED_STMTS.  */
+ 
+@@ -1574,8 +1727,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
+   afdo_source_profile->mark_annotated (cfun->function_end_locus);
+   if (max_count > profile_count::zero ())
+     {
+-      /* Calculate, propagate count and probability information on CFG.  */
+-      afdo_calculate_branch_prob (&annotated_bb);
+      /* 1 means -fprofile-correction is enbaled manually, and MCF
+	 algorithm will be used to calculate count and probability.
+	 Otherwise, use the default calculate algorithm.  */
+      if (flag_profile_correction == 1)
+	{
+	  afdo_init_mcf ();
+	  mcf_smooth_cfg ();
+	  afdo_process_after_mcf ();
+	}
+      else
+	{
+	  /* Calculate, propagate count and probability information on CFG.  */
+	  afdo_calculate_branch_prob (&annotated_bb);
+	}
+     }
+   update_max_bb_count ();
+   profile_status_for_fn (cfun) = PROFILE_READ;
+diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc
+index c0b7bdcd9..323663010 100644
+--- a/gcc/cfghooks.cc
+++ b/gcc/cfghooks.cc
+@@ -542,6 +542,9 @@ split_block_1 (basic_block bb, void *i)
+     return NULL;
+ 
+   new_bb->count = bb->count;
+  /* Copy discriminator from original bb for distinguishes among
+     several basic blocks that share a common locus, allowing for
+     more accurate autofdo.  */
+   new_bb->discriminator = bb->discriminator;
+ 
+   if (dom_info_available_p (CDI_DOMINATORS))
+@@ -1113,6 +1116,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
+     move_block_after (new_bb, after);
+ 
+   new_bb->flags = (bb->flags & ~BB_DUPLICATED);
+  /* Copy discriminator from original bb for distinguishes among
+     several basic blocks that share a common locus, allowing for
+     more accurate autofdo.  */
+  new_bb->discriminator = bb->discriminator;
+   FOR_EACH_EDGE (s, ei, bb->succs)
+     {
+       /* Since we are creating edges from a new block to successors
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index 2bba88140..4b4925331 100644
+--- a/gcc/opts.cc
+++ b/gcc/opts.cc
+@@ -3014,7 +3014,10 @@ common_handle_option (struct gcc_options *opts,
+       /* FALLTHRU */
+     case OPT_fauto_profile:
+       enable_fdo_optimizations (opts, opts_set, value);
+-      SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
+	  /* 2 is special and means flag_profile_correction trun on by
+	     -fauto-profile.  */
+      SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
+			   (value ? 2 : 0));
+       break;
+ 
+     case OPT_fipa_struct_reorg_:
+diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
+index f892cee3f..f50dbbc52 100644
+--- a/gcc/tree-inline.cc
+++ b/gcc/tree-inline.cc
+@@ -2038,6 +2038,10 @@ copy_bb (copy_body_data *id, basic_block bb,
+      basic_block_info automatically.  */
+   copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
+   copy_basic_block->count = bb->count.apply_scale (num, den);
+  /* Copy discriminator from original bb for distinguishes among
+     several basic blocks that share a common locus, allowing for
+     more accurate autofdo.  */
+  copy_basic_block->discriminator = bb->discriminator;
+ 
+   copy_gsi = gsi_start_bb (copy_basic_block);
+ 
+@@ -3058,6 +3062,16 @@ copy_cfg_body (copy_body_data * id,
+ 	  den += e->count ();
+       ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
+     }
+  /* When autofdo uses PMU as the sampling unit, the number of
+     ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
+     be zero.  It using for adjust_for_ipa_scaling will cause the
+     inlined BB count incorrectly overestimated.  So set den equal
+     to num, which is the source inline BB count to avoid
+     overestimated.  */
+  if (den == profile_count::zero ().afdo ())
+    {
+      den = num;
+    }
+ 
+   profile_count::adjust_for_ipa_scaling (&num, &den);
+ 
+-- 
+2.33.0
+
--- a/0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
+++ b/0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
@ -0,0 +1,194 @@
+From aa39a66f6029fe16a656d7c6339908b953fb1e04 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 11:27:43 +0300
+Subject: [PATCH 01/18] Add insn defs and correct costs for cmlt generation
+
+---
+ gcc/config/aarch64/aarch64-simd.md  | 48 +++++++++++++++++++++++++++++
+ gcc/config/aarch64/aarch64.cc       | 15 +++++++++
+ gcc/config/aarch64/aarch64.opt      |  4 +++
+ gcc/config/aarch64/iterators.md     |  3 +-
+ gcc/config/aarch64/predicates.md    | 25 +++++++++++++++
+ gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++
+ 6 files changed, 114 insertions(+), 1 deletion(-)
+ create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index ee7f0b89c..82f73805f 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -6454,6 +6454,54 @@
+   [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
+ )
+ 
+;; Use cmlt to replace vector arithmetic operations like this (SImode example):
+;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
+;; TODO: maybe extend to scalar operations or other cm** instructions.
+
+(define_insn "*aarch64_cmlt_as_arith<mode>"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
+	(minus:<V_INT_EQUIV>
+	  (ashift:<V_INT_EQUIV>
+	    (and:<V_INT_EQUIV>
+	      (lshiftrt:<V_INT_EQUIV>
+		(match_operand:VDQHSD 1 "register_operand" "w")
+		(match_operand:VDQHSD 2 "half_size_minus_one_operand"))
+	      (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
+	    (match_operand:VDQHSD 4 "half_size_operand"))
+	  (and:<V_INT_EQUIV>
+	    (lshiftrt:<V_INT_EQUIV>
+	      (match_dup 1)
+	      (match_dup 2))
+	    (match_dup 3))))]
+  "TARGET_SIMD && flag_cmlt_arith"
+  "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
+  [(set_attr "type" "neon_compare_zero")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*arch64_cmlt_tmp<mode>"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
+	(and:<V_INT_EQUIV>
+	  (lshiftrt:<V_INT_EQUIV>
+	    (match_operand:VDQHSD 1 "register_operand" "w")
+	    (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
+	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
+  "TARGET_SIMD && flag_cmlt_arith"
+  "#"
+  "&& reload_completed"
+  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
+	(lshiftrt:<V_INT_EQUIV>
+	  (match_operand:VDQHSD 1 "register_operand")
+	  (match_operand:VDQHSD 2 "half_size_minus_one_operand")))
+   (set (match_dup 0)
+	(and:<V_INT_EQUIV>
+	  (match_dup 0)
+	  (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
+  ""
+  [(set_attr "type" "neon_compare_zero")]
+)
+
+ (define_insn_and_split "aarch64_cm<optab>di"
+   [(set (match_operand:DI 0 "register_operand" "=w,w,r")
+ 	(neg:DI
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index a3da4ca30..04072ca25 100644
+--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
+@@ -14064,6 +14064,21 @@ cost_minus:
+ 	    return true;
+ 	  }
+ 
+	/* Detect aarch64_cmlt_as_arith instruction. Now only this pattern
+	   matches the condition. The costs of cmlt and sub instructions
+	   are comparable, so we are not increasing the cost here.  */
+	if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT
+	    && GET_CODE (op1) == AND)
+	  {
+	    rtx op0_subop0 = XEXP (op0, 0);
+	    if (rtx_equal_p (op0_subop0, op1))
+	      {
+		rtx lshrt_op = XEXP (op0_subop0, 0);
+		if (GET_CODE (lshrt_op) == LSHIFTRT)
+		  return true;
+	      }
+	  }
+
+ 	/* Look for SUB (extended register).  */
+ 	if (is_a <scalar_int_mode> (mode)
+ 	    && aarch64_rtx_arith_op_extract_p (op1))
+diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+index a64b927e9..101664c7c 100644
+--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
+@@ -262,6 +262,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0.
+ This option is for use with fstack-protector-strong and not for use in
+ user-land code.
+ 
+mcmlt-arith
+Target Var(flag_cmlt_arith) Optimization Init(0)
+Use SIMD cmlt instruction to perform some arithmetic/logic calculations.
+
+ TargetVariable
+ long aarch64_stack_protector_guard_offset = 0
+ 
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 26a840d7f..967e6b0b1 100644
+--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
+@@ -1485,7 +1485,8 @@
+ 			  (V2DI "2s")])
+ 
+ ;; Register suffix narrowed modes for VQN.
+-(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
+(define_mode_attr V2ntype [(V4HI "8b") (V2SI "4h")
+			   (V8HI "16b") (V4SI "8h")
+ 			   (V2DI "4s")])
+ 
+ ;; Widened modes of vector modes.
+diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+index c308015ac..07c14aacb 100644
+--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
+@@ -49,6 +49,31 @@
+   return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
+ })
+ 
+(define_predicate "half_size_minus_one_operand"
+  (match_code "const_vector")
+{
+  op = unwrap_const_vec_duplicate (op);
+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
+  return CONST_INT_P (op) && (UINTVAL (op) == size - 1);
+})
+
+(define_predicate "half_size_operand"
+  (match_code "const_vector")
+{
+  op = unwrap_const_vec_duplicate (op);
+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
+  return CONST_INT_P (op) && (UINTVAL (op) == size);
+})
+
+(define_predicate "cmlt_arith_mask_operand"
+  (match_code "const_vector")
+{
+  op = unwrap_const_vec_duplicate (op);
+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
+  unsigned long long mask = ((unsigned long long) 1 << size) | 1;
+  return CONST_INT_P (op) && (UINTVAL (op) == mask);
+})
+
+ (define_predicate "subreg_lowpart_operator"
+   (ior (match_code "truncate")
+        (and (match_code "subreg")
+diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c
+new file mode 100755
+index 000000000..b4c9a37ff
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-cmlt.c
+@@ -0,0 +1,20 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -mcmlt-arith" } */
+
+/* The test checks usage of cmlt insns for arithmetic/logic calculations
+ * in foo ().  It's inspired by sources of x264 codec.  */
+
+typedef unsigned short int uint16_t;
+typedef unsigned int uint32_t;
+
+void foo( uint32_t *a, uint32_t *b)
+{
+  for (unsigned i = 0; i < 4; i++)
+    {
+      uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1))
+		    &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
+      b[i] = (a[i]+s)^s;
+    }
+}
+
+/* { dg-final { scan-assembler-times {cmlt\t} 1 } }  */
+-- 
+2.33.0
+
--- a/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
+++ b/0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
@ -0,0 +1,560 @@
+From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
+From: vchernon <chernonog.vyacheslav@huawei.com>
+Date: Wed, 28 Feb 2024 23:05:12 +0800
+Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements     new
+ option:       -fifcvt-allow-complicated-cmps:         allows ifcvt to deal
+ with complicated cmps like
+
+        cmp reg1 (reg2 + reg3)
+
+        can increase compilation time
+    new param:
+      -param=ifcvt-allow-register-renaming=[0,1,2]
+        1 : allows ifcvt to rename registers in then and else bb
+        2 : allows to rename registers in condition and else/then bb
+        can increase compilation time and register pressure
+---
+ gcc/common.opt                                |   4 +
+ gcc/ifcvt.cc                                  | 291 +++++++++++++++---
+ gcc/params.opt                                |   4 +
+ .../gcc.c-torture/execute/ifcvt-renaming-1.c  |  35 +++
+ gcc/testsuite/gcc.dg/ifcvt-6.c                |  27 ++
+ 5 files changed, 311 insertions(+), 50 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index c7c6bc256..aa00fb7b0 100644
+--- a/gcc/common.opt
+++ b/gcc/common.opt
+@@ -3691,4 +3691,8 @@ fipa-ra
+ Common Var(flag_ipa_ra) Optimization
+ Use caller save register across calls if possible.
+ 
+fifcvt-allow-complicated-cmps
+Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
+
+ ; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
+index 2c1eba312..584db7b55 100644
+--- a/gcc/ifcvt.cc
+++ b/gcc/ifcvt.cc
+@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
+     }
+ 
+   /* Don't even try if the comparison operands or the mode of X are weird.  */
+-  if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
+  if (!flag_ifcvt_allow_complicated_cmps
+      && (cond_complex
+	  || !SCALAR_INT_MODE_P (GET_MODE (x))))
+     return NULL_RTX;
+ 
+   return emit_store_flag (x, code, XEXP (cond, 0),
+@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+   /* Currently support only simple single sets in test_bb.  */
+   if (!sset
+       || !noce_operand_ok (SET_DEST (sset))
+-      || contains_ccmode_rtx_p (SET_DEST (sset))
+      || (!flag_ifcvt_allow_complicated_cmps
+	  && contains_ccmode_rtx_p (SET_DEST (sset)))
+       || !noce_operand_ok (SET_SRC (sset)))
+     return false;
+ 
+@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
+    in this function.  */
+ 
+ static bool
+-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+bbs_ok_for_cmove_arith (basic_block bb_a,
+			basic_block bb_b,
+			rtx to_rename,
+			bitmap conflict_regs)
+ {
+   rtx_insn *a_insn;
+   bitmap bba_sets = BITMAP_ALLOC (&reg_obstack);
+-
+  bitmap intersections = BITMAP_ALLOC (&reg_obstack);
+   df_ref def;
+   df_ref use;
+  rtx_insn *last_a = last_active_insn (bb_a, FALSE);
+ 
+   FOR_BB_INSNS (bb_a, a_insn)
+     {
+@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       rtx sset_a = single_set (a_insn);
+ 
+       if (!sset_a)
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
+	goto end_cmove_arith_check_and_fail;
+       /* Record all registers that BB_A sets.  */
+       FOR_EACH_INSN_DEF (def, a_insn)
+-	if (!(to_rename && DF_REF_REG (def) == to_rename))
+	if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
+ 	  bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
+     }
+ 
+  bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
+   rtx_insn *b_insn;
+-
+   FOR_BB_INSNS (bb_b, b_insn)
+     {
+       if (!active_insn_p (b_insn))
+@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       rtx sset_b = single_set (b_insn);
+ 
+       if (!sset_b)
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
+	goto end_cmove_arith_check_and_fail;
+ 
+       /* Make sure this is a REG and not some instance
+ 	 of ZERO_EXTRACT or SUBREG or other dangerous stuff.
+@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
+       if (MEM_P (SET_DEST (sset_b)))
+ 	gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
+       else if (!REG_P (SET_DEST (sset_b)))
+-	{
+-	  BITMAP_FREE (bba_sets);
+-	  return false;
+-	}
+	goto end_cmove_arith_check_and_fail;
+ 
+-      /* If the insn uses a reg set in BB_A return false.  */
+      /* If the insn uses a reg set in BB_A return false
+	 or try to collect register list for renaming.  */
+       FOR_EACH_INSN_USE (use, b_insn)
+ 	{
+-	  if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
+	  if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
+ 	    {
+-	      BITMAP_FREE (bba_sets);
+-	      return false;
+	      if (param_ifcvt_allow_register_renaming < 1)
+		  goto end_cmove_arith_check_and_fail;
+
+	      /* Those regs should be renamed.  We can't rename CC reg, but
+		 possibly we can provide combined comparison in the future.  */
+	      if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
+		goto end_cmove_arith_check_and_fail;
+	      bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
+ 	    }
+ 	}
+-
+     }
+ 
+   BITMAP_FREE (bba_sets);
+  BITMAP_FREE (intersections);
+   return true;
+
+end_cmove_arith_check_and_fail:
+  BITMAP_FREE (bba_sets);
+  BITMAP_FREE (intersections);
+  return false;
+ }
+ 
+ /* Emit copies of all the active instructions in BB except the last.
+@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
+   return true;
+ }
+ 
+/* This function tries to rename regs that intersect with considered bb
+   inside condition expression.  Condition expression will be moved down
+   if the optimization will be applied, so it is essential to be sure that
+   all intersected registers will be renamed otherwise transformation
+   can't be applied.  Function returns true if renaming was successful
+   and optimization can proceed futher.  */
+
+static bool
+noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
+{
+  bool success = true;
+  if (bitmap_empty_p (cond_rename_regs))
+    return true;
+  if (param_ifcvt_allow_register_renaming < 2)
+    return false;
+  df_ref use;
+  rtx_insn *cmp_insn = if_info->cond_earliest;
+  /*  Jump instruction as a condion currently unsupported.  */
+  if (JUMP_P (cmp_insn))
+    return false;
+  rtx_insn *before_cmp = PREV_INSN (cmp_insn);
+  start_sequence ();
+  rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
+  basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
+  FOR_EACH_INSN_USE (use, cmp_insn)
+    {
+      if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
+	{
+	  rtx use_reg = DF_REF_REG (use);
+	  rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
+	  if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
+	    {
+	      end_sequence ();
+	      return false;
+	    }
+	  noce_emit_move_insn (tmp, use_reg);
+	}
+    }
+
+  emit_insn (PATTERN (copy_of_cmp));
+  rtx_insn *seq = get_insns ();
+  unshare_all_rtl_in_chain (seq);
+  end_sequence ();
+
+  emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
+  delete_insn_and_edges (cmp_insn);
+  rtx_insn *insn;
+  FOR_BB_INSNS (cmp_block, insn)
+    df_insn_rescan (insn);
+
+  if_info->cond = noce_get_condition (if_info->jump,
+				      &copy_of_cmp,
+				      if_info->then_else_reversed);
+  if_info->cond_earliest = copy_of_cmp;
+  if_info->rev_cond = NULL_RTX;
+
+  return success;
+}
+
+/* This function tries to rename regs that intersect with considered bb.
+   return true if the renaming was successful and optimization can
+   proceed futher, false otherwise.  */
+static bool
+noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
+{
+  if (bitmap_empty_p (rename_regs))
+    return true;
+  rtx_insn *insn;
+  rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
+  bool res = true;
+  start_sequence ();
+  FOR_BB_INSNS (test_bb, insn)
+    {
+      if (!active_insn_p (insn))
+	continue;
+      /* Only ssets are supported for now.  */
+      rtx sset = single_set (insn);
+      gcc_assert (sset);
+      rtx x = SET_DEST (sset);
+      if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
+	continue;
+      /* Do not need to rename dest in the last instruction
+	 it will be renamed anyway.  */
+      if (insn == last_insn)
+	continue;
+      machine_mode mode = GET_MODE (x);
+      rtx tmp = gen_reg_rtx (mode);
+      if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
+	{
+	  gcc_assert (insn != last_insn);
+	  /* We can generate additional move for such case,
+	     but it will increase register preasure.
+	     For now just stop transformation.  */
+	  rtx result_rtx = SET_DEST (single_set (last_insn));
+	  if (REG_P (result_rtx) && (x != result_rtx))
+	    {
+	      res = false;
+	      break;
+	    }
+	  if (!validate_replace_rtx (x, tmp, insn))
+	    gcc_unreachable ();
+	  noce_emit_move_insn (tmp,x);
+	}
+      set_used_flags (insn);
+      rtx_insn *rename_candidate;
+      for (rename_candidate = NEXT_INSN (insn);
+	   rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
+	   rename_candidate = NEXT_INSN (rename_candidate))
+	{
+	  if (!reg_overlap_mentioned_p (x, rename_candidate))
+	    continue;
+
+	  int replace_res = TRUE;
+	  if (rename_candidate == last_insn)
+	    {
+	      validate_replace_src_group (x, tmp, rename_candidate);
+	      replace_res = apply_change_group ();
+	    }
+	  else
+	    replace_res = validate_replace_rtx (x, tmp, rename_candidate);
+	  gcc_assert (replace_res);
+	  set_used_flags (rename_candidate);
+	}
+      set_used_flags (x);
+      set_used_flags (tmp);
+    }
+    rtx_insn *seq = get_insns ();
+    unshare_all_rtl_in_chain (seq);
+    end_sequence ();
+    emit_insn_before_setloc (seq, first_active_insn (test_bb),
+			     INSN_LOCATION (first_active_insn (test_bb)));
+    FOR_BB_INSNS (test_bb, insn)
+      df_insn_rescan (insn);
+  return res;
+}
+
+ /* Try more complex cases involving conditional_move.  */
+ 
+ static int
+@@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
+ 	  std::swap (then_bb, else_bb);
+ 	}
+     }
+-
+  bitmap else_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
+  bitmap then_bb_rename_regs = BITMAP_ALLOC (&reg_obstack);
+   if (then_bb && else_bb
+-      && (!bbs_ok_for_cmove_arith (then_bb, else_bb,  if_info->orig_x)
+-	  || !bbs_ok_for_cmove_arith (else_bb, then_bb,  if_info->orig_x)))
+-    return FALSE;
+      && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
+				   if_info->orig_x,
+				   then_bb_rename_regs)
+	  || !bbs_ok_for_cmove_arith (else_bb, then_bb,
+				      if_info->orig_x,
+				      else_bb_rename_regs)))
+    {
+      BITMAP_FREE (then_bb_rename_regs);
+      BITMAP_FREE (else_bb_rename_regs);
+      return FALSE;
+    }
+  bool prepass_renaming = noce_rename_regs_in_bb (then_bb,
+						  then_bb_rename_regs)
+			  && noce_rename_regs_in_bb (else_bb,
+						     else_bb_rename_regs);
+
+  BITMAP_FREE (then_bb_rename_regs);
+  BITMAP_FREE (else_bb_rename_regs);
+
+  if (!prepass_renaming)
+   return FALSE;
+ 
+   start_sequence ();
+ 
+@@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op)
+ 
+ static bool
+ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+-			      unsigned int *cost, bool *simple_p)
+			     unsigned int *cost, bool *simple_p,
+			     bitmap cond_rename_regs)
+ {
+   if (!test_bb)
+     return false;
+@@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+   rtx_insn *prev_last_insn = PREV_INSN (last_insn);
+   gcc_assert (prev_last_insn);
+ 
+-  /* For now, disallow setting x multiple times in test_bb.  */
+-  if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn))
+  if (REG_P (x)
+      && reg_set_between_p (x, first_insn, prev_last_insn)
+      && param_ifcvt_allow_register_renaming < 1)
+     return false;
+ 
+   bitmap test_bb_temps = BITMAP_ALLOC (&reg_obstack);
+@@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
+   rtx_insn *insn;
+   FOR_BB_INSNS (test_bb, insn)
+     {
+-      if (insn != last_insn)
+-	{
+-	  if (!active_insn_p (insn))
+-	    continue;
+      if (insn == last_insn)
+	continue;
+      if (!active_insn_p (insn))
+	continue;
+ 
+-	  if (!insn_valid_noce_process_p (insn, cc))
+-	    goto free_bitmap_and_fail;
+      if (!insn_valid_noce_process_p (insn, cc))
+	goto free_bitmap_and_fail;
+ 
+-	  rtx sset = single_set (insn);
+-	  gcc_assert (sset);
+      rtx sset = single_set (insn);
+      gcc_assert (sset);
+ 
+-	  if (contains_mem_rtx_p (SET_SRC (sset))
+-	      || !REG_P (SET_DEST (sset))
+-	      || reg_overlap_mentioned_p (SET_DEST (sset), cond))
+-	    goto free_bitmap_and_fail;
+      if (contains_mem_rtx_p (SET_SRC (sset))
+	  || !REG_P (SET_DEST (sset)))
+	goto free_bitmap_and_fail;
+ 
+-	  potential_cost += pattern_cost (sset, speed_p);
+-	  bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
+      if (reg_overlap_mentioned_p (SET_DEST (sset), cond))
+	{
+	  if (param_ifcvt_allow_register_renaming < 1)
+	    goto free_bitmap_and_fail;
+	  rtx sset_dest = SET_DEST (sset);
+	  if (REG_P (sset_dest)
+	      && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC))
+	    bitmap_set_bit (cond_rename_regs, REGNO (sset_dest));
+	  else
+	    goto free_bitmap_and_fail;
+ 	}
+	potential_cost += pattern_cost (sset, speed_p);
+	if (SET_DEST (sset) != SET_DEST (last_set))
+	  bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
+     }
+ 
+   /* If any of the intermediate results in test_bb are live after test_bb
+@@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info)
+ 
+   bool speed_p = optimize_bb_for_speed_p (test_bb);
+   unsigned int then_cost = 0, else_cost = 0;
+  bitmap cond_rename_regs = BITMAP_ALLOC (&reg_obstack);
+   if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
+-				    &if_info->then_simple))
+-    return false;
+				    &if_info->then_simple, cond_rename_regs))
+    {
+      BITMAP_FREE (cond_rename_regs);
+      return false;
+    }
+ 
+   if (else_bb
+       && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
+-				       &if_info->else_simple))
+-    return false;
+				       &if_info->else_simple, cond_rename_regs))
+    {
+      BITMAP_FREE (cond_rename_regs);
+      return false;
+    }
+ 
+  if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
+    {
+      BITMAP_FREE (cond_rename_regs);
+      return false;
+    }
+  BITMAP_FREE (cond_rename_regs);
+  cond = if_info->cond;
+   if (speed_p)
+     if_info->original_cost += average_cost (then_cost, else_cost,
+ 					    find_edge (test_bb, then_bb));
+@@ -5823,12 +6014,13 @@ if_convert (bool after_combine)
+ {
+   basic_block bb;
+   int pass;
+-
+   if (optimize == 1)
+     {
+       df_live_add_problem ();
+       df_live_set_all_dirty ();
+     }
+  free_dominance_info (CDI_DOMINATORS);
+  cleanup_cfg (CLEANUP_EXPENSIVE);
+ 
+   /* Record whether we are after combine pass.  */
+   ifcvt_after_combine = after_combine;
+@@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void)
+ 	  dump_reg_info (dump_file);
+ 	  dump_flow_info (dump_file, dump_flags);
+ 	}
+-      cleanup_cfg (CLEANUP_EXPENSIVE);
+       if_convert (false);
+       if (num_updated_if_blocks)
+ 	/* Get rid of any dead CC-related instructions.  */
+diff --git a/gcc/params.opt b/gcc/params.opt
+index d2196dc68..ba87f820b 100644
+--- a/gcc/params.opt
+++ b/gcc/params.opt
+@@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
+ Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
+ Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
+ 
+-param=ifcvt-allow-register-renaming=
+Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
+Allow RTL if-conversion pass to aggressively rename registers in basic blocks.  Sometimes additional moves will be created.
+
+ -param=max-sched-extend-regions-iters=
+ Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization
+ The maximum number of iterations through CFG to extend regions.
+diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+new file mode 100644
+index 000000000..65c4d4140
+--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
+@@ -0,0 +1,35 @@
+
+extern void abort(void);
+
+__attribute__ ((noinline))
+int foo (int x, int y, int z, int a, int b)
+{
+  if (a < 2) {
+      if (a == 0) {
+	  if (x - y < 0)
+	    x = x - y + z;
+	  else
+	    x = x - y;
+	}
+      else {
+	  if (x + y >= z)
+	    x = x + y - z;
+	  else
+	    x = x + y;
+	}
+    }
+  return x;
+}
+
+int main(void) {
+  if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2
+    abort ();
+  if (foo (50,10,7,0,1) != 40) // x - y = 40
+    abort ();
+  if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8
+    abort ();
+  if (foo (5,10,70,1,1) != 15) // x + y = 15
+    abort ();
+  return 0;
+}
+
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c
+new file mode 100644
+index 000000000..be9a67b3f
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ifcvt-6.c
+@@ -0,0 +1,27 @@
+/* { dg-do compile { target { aarch64*-*-* } } } */
+/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */
+
+typedef unsigned int uint16_t;
+
+uint16_t
+foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a,
+     uint16_t b, uint16_t c, uint16_t d) {
+  int i = 1;
+  int j = 1;
+  if (a > b) {
+      j = x;
+      if (b > c)
+	i = y;
+      else
+	i = z;
+    }
+  else {
+      j = y;
+      if (c > d)
+	i = z;
+    }
+  return i * j;
+}
+
+/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */
+
+-- 
+2.33.0
+
--- a/0037-Perform-early-if-conversion-of-simple-arithmetic.patch
+++ b/0037-Perform-early-if-conversion-of-simple-arithmetic.patch
@ -0,0 +1,109 @@
+From 310eade1450995b55d9f8120561022fbf164b2ec Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Thu, 12 Jan 2023 14:52:49 +0300
+Subject: [PATCH 03/18] Perform early if-conversion of simple arithmetic
+
+---
+ gcc/common.opt                      |  4 ++++
+ gcc/match.pd                        | 25 +++++++++++++++++++
+ gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++
+ 3 files changed, 66 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index aa00fb7b0..dac477c04 100644
+--- a/gcc/common.opt
+++ b/gcc/common.opt
+@@ -1821,6 +1821,10 @@ fif-conversion2
+ Common Var(flag_if_conversion2) Optimization
+ Perform conversion of conditional jumps to conditional execution.
+ 
+fif-conversion-gimple
+Common Var(flag_if_conversion_gimple) Optimization
+Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
+
+ fstack-reuse=
+ Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
+ -fstack-reuse=[all|named_vars|none]	Set stack reuse level for local variables.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 6f24d5079..3cbaf2a5b 100644
+--- a/gcc/match.pd
+++ b/gcc/match.pd
+@@ -4278,6 +4278,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   )
+  )
+ )
+
+(if (flag_if_conversion_gimple)
+ (for simple_op (plus minus bit_and bit_ior bit_xor)
+  (simplify
+   (cond @0 (simple_op @1 INTEGER_CST@2) @1)
+   (switch
+    /* a = cond ? a + 1 : a -> a = a + ((int) cond) */
+    (if (integer_onep (@2))
+     (simple_op @1 (convert (convert:boolean_type_node @0))))
+    /* a = cond ? a + powerof2cst : a ->
+       a = a + ((int) cond) << log2 (powerof2cst) */
+    (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
+     (with
+      {
+	tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
+      }
+      (simple_op @1 (lshift (convert (convert:boolean_type_node @0))
+			    { shift; })
+      )
+     )
+    )
+   )
+  )
+ )
+)
+ #endif
+ 
+ #if GIMPLE
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
+new file mode 100644
+index 000000000..0f7c87e5c
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
+@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
+
+int test_int (int optimizable_int) {
+    if (optimizable_int > 5)
+	++optimizable_int;
+    return optimizable_int;
+}
+
+int test_int_pow2 (int optimizable_int_pow2) {
+    if (optimizable_int_pow2 <= 4)
+	optimizable_int_pow2 += 1024;
+    return optimizable_int_pow2;
+}
+
+int test_int_non_pow2 (int not_optimizable_int_non_pow2) {
+    if (not_optimizable_int_non_pow2 == 1)
+	not_optimizable_int_non_pow2 += 513;
+    return not_optimizable_int_non_pow2;
+}
+
+float test_float (float not_optimizable_float) {
+    if (not_optimizable_float > 5)
+	not_optimizable_float += 1;
+    return not_optimizable_float;
+}
+
+/* Expecting if-else block in test_float and test_int_non_pow2 only. */
+/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */
+/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */
+
+/* Expecting shifted result only for optimizable_int_pow2. */
+/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */
+/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */
+-- 
+2.33.0
+
--- a/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
+++ b/0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
@ -0,0 +1,252 @@
+From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Tue, 24 Jan 2023 16:43:40 +0300
+Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen
+ ops too.
+
+---
+ gcc/common.opt                 |   5 ++
+ gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
+ gcc/tree-ssa-math-opts.cc      |  43 ++++++++--
+ 3 files changed, 184 insertions(+), 7 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index dac477c04..39c90604e 100644
+--- a/gcc/common.opt
+++ b/gcc/common.opt
+@@ -3106,6 +3106,11 @@ freciprocal-math
+ Common Var(flag_reciprocal_math) SetByCombined Optimization
+ Same as -fassociative-math for expressions which include division.
+ 
+fuaddsub-overflow-match-all
+Common Var(flag_uaddsub_overflow_match_all)
+Match unsigned add/sub overflow even if the target does not support
+the corresponding instruction.
+
+ ; Nonzero means that unsafe floating-point math optimizations are allowed
+ ; for the sake of speed.  IEEE compliance is not guaranteed, and operations
+ ; are allowed to assume that their arguments and results are "normal"
+diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
+new file mode 100644
+index 000000000..96c26d308
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/uaddsub.c
+@@ -0,0 +1,143 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
+#include <stdint.h>
+
+typedef unsigned __int128 uint128_t;
+typedef struct uint256_t
+{
+  uint128_t lo;
+  uint128_t hi;
+} uint256_t;
+
+uint16_t add16 (uint8_t a, uint8_t b)
+{
+  uint8_t tmp = a + b;
+  uint8_t overflow = 0;
+  if (tmp < a)
+    overflow = 1;
+
+  uint16_t res = overflow;
+  res <<= 8;
+  res += tmp;
+  return res;
+}
+
+uint32_t add32 (uint16_t a, uint16_t b)
+{
+  uint16_t tmp = a + b;
+  uint16_t overflow = 0;
+  if (tmp < a)
+    overflow = 1;
+
+  uint32_t res = overflow;
+  res <<= 16;
+  res += tmp;
+  return res;
+}
+
+uint64_t add64 (uint32_t a, uint32_t b)
+{
+  uint32_t tmp = a + b;
+  uint32_t overflow = 0;
+  if (tmp < a)
+    overflow = 1;
+
+  uint64_t res = overflow;
+  res <<= 32;
+  res += tmp;
+  return res;
+}
+
+uint128_t add128 (uint64_t a, uint64_t b)
+{
+  uint64_t tmp = a + b;
+  uint64_t overflow = 0;
+  if (tmp < a)
+    overflow = 1;
+
+  uint128_t res = overflow;
+  res <<= 64;
+  res += tmp;
+  return res;
+}
+
+uint256_t add256 (uint128_t a, uint128_t b)
+{
+  uint128_t tmp = a + b;
+  uint128_t overflow = 0;
+  if (tmp < a)
+    overflow = 1;
+
+  uint256_t res;
+  res.hi = overflow;
+  res.lo = tmp;
+  return res;
+}
+
+uint16_t sub16 (uint8_t a, uint8_t b)
+{
+  uint8_t tmp = a - b;
+  uint8_t overflow = 0;
+  if (tmp > a)
+    overflow = -1;
+
+  uint16_t res = overflow;
+  res <<= 8;
+  res += tmp;
+  return res;
+}
+
+uint32_t sub32 (uint16_t a, uint16_t b)
+{
+  uint16_t tmp = a - b;
+  uint16_t overflow = 0;
+  if (tmp > a)
+    overflow = -1;
+
+  uint32_t res = overflow;
+  res <<= 16;
+  res += tmp;
+  return res;
+}
+
+uint64_t sub64 (uint32_t a, uint32_t b)
+{
+  uint32_t tmp = a - b;
+  uint32_t overflow = 0;
+  if (tmp > a)
+    overflow = -1;
+
+  uint64_t res = overflow;
+  res <<= 32;
+  res += tmp;
+  return res;
+}
+
+uint128_t sub128 (uint64_t a, uint64_t b)
+{
+  uint64_t tmp = a - b;
+  uint64_t overflow = 0;
+  if (tmp > a)
+    overflow = -1;
+
+  uint128_t res = overflow;
+  res <<= 64;
+  res += tmp;
+  return res;
+}
+
+uint256_t sub256 (uint128_t a, uint128_t b)
+{
+  uint128_t tmp = a - b;
+  uint128_t overflow = 0;
+  if (tmp > a)
+    overflow = -1;
+
+  uint256_t res;
+  res.hi = overflow;
+  res.lo = tmp;
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 232e903b0..55d6ee8ae 100644
+--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
+@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
+     }
+ }
+ 
+/* Check if the corresponding operation has wider equivalent on the target.  */
+
+static bool
+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
+{
+  machine_mode wider_mode;
+  FOR_EACH_WIDER_MODE (wider_mode, mode)
+    {
+      machine_mode next_mode;
+      if (optab_handler (op, wider_mode) != CODE_FOR_nothing
+	  || (op == smul_optab
+	      && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
+	      && (find_widening_optab_handler ((unsignedp
+						? umul_widen_optab
+						: smul_widen_optab),
+						next_mode, mode))))
+	return true;
+    }
+
+  return false;
+}
+ 
+ /* Helper function of match_arith_overflow.  For MUL_OVERFLOW, if we have
+    a check for non-zero like:
+@@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+ 		       || code == MINUS_EXPR
+ 		       || code == MULT_EXPR
+ 		       || code == BIT_NOT_EXPR);
+  int unsignedp = TYPE_UNSIGNED (type);
+   if (!INTEGRAL_TYPE_P (type)
+-      || !TYPE_UNSIGNED (type)
+-      || has_zero_uses (lhs)
+-      || (code != PLUS_EXPR
+-	  && code != MULT_EXPR
+-	  && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
+-			    TYPE_MODE (type)) == CODE_FOR_nothing))
+      || !unsignedp
+      || has_zero_uses (lhs))
+     return false;
+ 
+  if (code == PLUS_EXPR || code == MINUS_EXPR)
+    {
+      machine_mode mode = TYPE_MODE (type);
+      optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
+      if (optab_handler (op, mode) == CODE_FOR_nothing
+	  && (!flag_uaddsub_overflow_match_all
+	      || !wider_optab_check_p (op, mode, unsignedp)))
+	return false;
+    }
+
+   tree rhs1 = gimple_assign_rhs1 (stmt);
+   tree rhs2 = gimple_assign_rhs2 (stmt);
+   FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
+@@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
+       || (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
+       || (code == PLUS_EXPR
+ 	  && optab_handler (uaddv4_optab,
+-			    TYPE_MODE (type)) == CODE_FOR_nothing)
+			    TYPE_MODE (type)) == CODE_FOR_nothing
+	  && !flag_uaddsub_overflow_match_all)
+       || (code == MULT_EXPR
+ 	  && optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
+ 			    TYPE_MODE (type)) == CODE_FOR_nothing))
+-- 
+2.33.0
+
--- a/0039-Match-double-sized-mul-pattern.patch
+++ b/0039-Match-double-sized-mul-pattern.patch
@ -0,0 +1,488 @@
+From e7b22f97f960b62e555dfd6f2e3ae43973fcbb3e Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Wed, 25 Jan 2023 15:04:07 +0300
+Subject: [PATCH 05/18] Match double sized mul pattern
+
+---
+ gcc/match.pd                              | 136 +++++++++++++++++++++
+ gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/double_sized_mul-2.c |  62 ++++++++++
+ gcc/tree-ssa-math-opts.cc                 |  80 ++++++++++++
+ 4 files changed, 419 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 3cbaf2a5b..61866cb90 100644
+--- a/gcc/match.pd
+++ b/gcc/match.pd
+@@ -7895,3 +7895,139 @@ and,
+ 	       == TYPE_UNSIGNED (TREE_TYPE (@3))))
+        && single_use (@4)
+        && single_use (@5))))
+
+/* Match multiplication with double sized result.
+
+   Consider the following calculations:
+   arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo)
+	       * (2^(bit_size/2) * arg1_hi + arg1_lo)
+   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
+	       + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi)
+	       + arg0_lo * arg1_lo
+
+   The products of high and low parts fits in bit_size values, thus they are
+   placed in high and low parts of result respectively.
+
+   The sum of the mixed products may overflow, so we need a detection for that.
+   Also it has a bit_size/2 offset, thus it intersects with both high and low
+   parts of result.  Overflow detection constant is bit_size/2 due to this.
+
+   With this info:
+   arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
+	       + 2^(bit_size/2) * middle
+	       + 2^bit_size * possible_middle_overflow
+	       + arg0_lo * arg1_lo
+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow)
+	       + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo)
+	       + arg0_lo * arg1_lo
+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi
+	       +	       possible_middle_overflow)
+	       + 2^(bit_size/2) * middle_lo
+	       + arg0_lo * arg1_lo
+
+   The last sum can produce overflow for the high result part.  With this:
+   arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow
+	       +	       possible_res_lo_overflow + middle_hi)
+	       + res_lo
+	       = res_hi + res_lo
+
+   This formula is quite big to fit into one match pattern with all of the
+   combinations of terms inside it.  There are many helpers for better code
+   readability.
+
+   The simplification basis is res_hi: assuming that res_lo only is not
+   real practical case for such calculations.
+
+   Overflow handling is done via matching complex calculations:
+   the realpart and imagpart are quite handy here.  */
+/* Match low and high parts of the argument.  */
+(match (double_size_mul_arg_lo @0 @1)
+ (bit_and @0 INTEGER_CST@1)
+  (if (wi::to_wide (@1)
+       == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type)))))
+(match (double_size_mul_arg_hi @0 @1)
+ (rshift @0 INTEGER_CST@1)
+  (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2)))
+
+/* Match various argument parts products.  */
+(match (double_size_mul_lolo @0 @1)
+ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3))
+  (if (single_use (@4))))
+(match (double_size_mul_hihi @0 @1)
+ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3))
+  (if (single_use (@4))))
+(match (double_size_mul_lohi @0 @1)
+ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3))
+  (if (single_use (@4))))
+
+/* Match complex middle sum.  */
+(match (double_size_mul_middle_complex @0 @1)
+ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0))
+  (if (num_imm_uses (@2) == 2)))
+
+/* Match real middle results.  */
+(match (double_size_mul_middle @0 @1)
+ (realpart@2 (double_size_mul_middle_complex @0 @1))
+  (if (num_imm_uses (@2) == 2)))
+(match (double_size_mul_middleres_lo @0 @1)
+ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
+       && single_use (@3))))
+(match (double_size_mul_middleres_hi @0 @1)
+ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
+       && single_use (@3))))
+
+/* Match low result part.  */
+/* Number of uses may be < 2 in case when we are interested in
+   high part only.  */
+(match (double_size_mul_res_lo_complex @0 @1)
+ (IFN_ADD_OVERFLOW:c@2
+  (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1))
+  (if (num_imm_uses (@2) <= 2)))
+(match (double_size_mul_res_lo @0 @1)
+ (realpart (double_size_mul_res_lo_complex @0 @1)))
+
+/* Match overflow terms.  */
+(match (double_size_mul_overflow_check_lo @0 @1 @5)
+ (convert@4 (ne@3
+  (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop))
+  (if (single_use (@2) && single_use (@3) && single_use (@4))))
+(match (double_size_mul_overflow_check_hi @0 @1)
+ (lshift@6 (convert@5 (ne@4
+  (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop))
+	   INTEGER_CST@2)
+  (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
+       && single_use (@3) && single_use (@4) && single_use (@5)
+       && single_use (@6))))
+
+/* Match all possible permutations for high result part calculations.  */
+(for op1 (double_size_mul_hihi
+	  double_size_mul_overflow_check_hi
+	  double_size_mul_middleres_hi)
+     op2 (double_size_mul_overflow_check_hi
+	  double_size_mul_middleres_hi
+	  double_size_mul_hihi)
+     op3 (double_size_mul_middleres_hi
+	  double_size_mul_hihi
+	  double_size_mul_overflow_check_hi)
+ (match (double_size_mul_candidate @0 @1 @2 @3)
+  (plus:c@2
+   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1))
+   (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))
+    (if (single_use (@4) && single_use (@5))))
+ (match (double_size_mul_candidate @0 @1 @2 @3)
+  (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3)
+   (plus:c@4 (op1:c @0 @1)
+    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
+     (if (single_use (@4) && single_use (@5))))
+ (match (double_size_mul_candidate @0 @1 @2 @3)
+  (plus:c@2 (op1:c @0 @1)
+   (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3)
+    (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
+     (if (single_use (@4) && single_use (@5))))
+ (match (double_size_mul_candidate @0 @1 @2 @3)
+  (plus:c@2 (op1:c @0 @1)
+   (plus:c@4 (op2:c @0 @1)
+    (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
+     (if (single_use (@4) && single_use (@5)))))
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+new file mode 100644
+index 000000000..4d475cc8a
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+@@ -0,0 +1,141 @@
+/* { dg-do compile } */
+/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
+   proper overflow detection in some cases.  */
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+#include <stdint.h>
+
+typedef unsigned __int128 uint128_t;
+
+uint16_t mul16 (uint8_t a, uint8_t b)
+{
+  uint8_t a_lo = a & 0xF;
+  uint8_t b_lo = b & 0xF;
+  uint8_t a_hi = a >> 4;
+  uint8_t b_hi = b >> 4;
+  uint8_t lolo = a_lo * b_lo;
+  uint8_t lohi = a_lo * b_hi;
+  uint8_t hilo = a_hi * b_lo;
+  uint8_t hihi = a_hi * b_hi;
+  uint8_t middle = hilo + lohi;
+  uint8_t middle_hi = middle >> 4;
+  uint8_t middle_lo = middle << 4;
+  uint8_t res_lo = lolo + middle_lo;
+  uint8_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  res_hi += (middle < hilo ? 0x10 : 0);
+  uint16_t res = ((uint16_t) res_hi) << 8;
+  res += res_lo;
+  return res;
+}
+
+uint32_t mul32 (uint16_t a, uint16_t b)
+{
+  uint16_t a_lo = a & 0xFF;
+  uint16_t b_lo = b & 0xFF;
+  uint16_t a_hi = a >> 8;
+  uint16_t b_hi = b >> 8;
+  uint16_t lolo = a_lo * b_lo;
+  uint16_t lohi = a_lo * b_hi;
+  uint16_t hilo = a_hi * b_lo;
+  uint16_t hihi = a_hi * b_hi;
+  uint16_t middle = hilo + lohi;
+  uint16_t middle_hi = middle >> 8;
+  uint16_t middle_lo = middle << 8;
+  uint16_t res_lo = lolo + middle_lo;
+  uint16_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  res_hi += (middle < hilo ? 0x100 : 0);
+  uint32_t res = ((uint32_t) res_hi) << 16;
+  res += res_lo;
+  return res;
+}
+
+uint64_t mul64 (uint32_t a, uint32_t b)
+{
+  uint32_t a_lo = a & 0xFFFF;
+  uint32_t b_lo = b & 0xFFFF;
+  uint32_t a_hi = a >> 16;
+  uint32_t b_hi = b >> 16;
+  uint32_t lolo = a_lo * b_lo;
+  uint32_t lohi = a_lo * b_hi;
+  uint32_t hilo = a_hi * b_lo;
+  uint32_t hihi = a_hi * b_hi;
+  uint32_t middle = hilo + lohi;
+  uint32_t middle_hi = middle >> 16;
+  uint32_t middle_lo = middle << 16;
+  uint32_t res_lo = lolo + middle_lo;
+  uint32_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  res_hi += (middle < hilo ? 0x10000 : 0);
+  uint64_t res = ((uint64_t) res_hi) << 32;
+  res += res_lo;
+  return res;
+}
+
+uint128_t mul128 (uint64_t a, uint64_t b)
+{
+  uint64_t a_lo = a & 0xFFFFFFFF;
+  uint64_t b_lo = b & 0xFFFFFFFF;
+  uint64_t a_hi = a >> 32;
+  uint64_t b_hi = b >> 32;
+  uint64_t lolo = a_lo * b_lo;
+  uint64_t lohi = a_lo * b_hi;
+  uint64_t hilo = a_hi * b_lo;
+  uint64_t hihi = a_hi * b_hi;
+  uint64_t middle = hilo + lohi;
+  uint64_t middle_hi = middle >> 32;
+  uint64_t middle_lo = middle << 32;
+  uint64_t res_lo = lolo + middle_lo;
+  uint64_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  res_hi += (middle < hilo ? 0x100000000 : 0);
+  uint128_t res = ((uint128_t) res_hi) << 64;
+  res += res_lo;
+  return res;
+}
+
+uint64_t mul64_perm (uint32_t a, uint32_t b)
+{
+  uint32_t a_lo = a & 0xFFFF;
+  uint32_t b_lo = b & 0xFFFF;
+  uint32_t a_hi = a >> 16;
+  uint32_t b_hi = b >> 16;
+  uint32_t lolo = a_lo * b_lo;
+  uint32_t lohi = a_lo * b_hi;
+  uint32_t hilo = a_hi * b_lo;
+  uint32_t hihi = a_hi * b_hi;
+  uint32_t middle = hilo + lohi;
+  uint32_t middle_hi = middle >> 16;
+  uint32_t middle_lo = middle << 16;
+  uint32_t res_lo = lolo + middle_lo;
+  uint32_t res_hi = hihi + middle_hi;
+  res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
+  res_hi = middle < hilo ? res_hi + 0x10000 : res_hi;
+  uint64_t res = ((uint64_t) res_hi) << 32;
+  res += res_lo;
+  return res;
+}
+
+uint128_t mul128_perm (uint64_t a, uint64_t b)
+{
+  uint64_t a_lo = a & 0xFFFFFFFF;
+  uint64_t b_lo = b & 0xFFFFFFFF;
+  uint64_t a_hi = a >> 32;
+  uint64_t b_hi = b >> 32;
+  uint64_t lolo = a_lo * b_lo;
+  uint64_t lohi = a_lo * b_hi;
+  uint64_t hilo = a_hi * b_lo;
+  uint64_t hihi = a_hi * b_hi;
+  uint64_t middle = hilo + lohi;
+  uint64_t middle_hi = middle >> 32;
+  uint64_t middle_lo = middle << 32;
+  uint64_t res_lo = lolo + middle_lo;
+  uint64_t res_hi = hihi + middle_hi;
+  res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
+  res_hi = middle < hilo ? res_hi + 0x100000000 : res_hi;
+  uint128_t res = ((uint128_t) res_hi) << 64;
+  res += res_lo;
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+new file mode 100644
+index 000000000..cc6e5af25
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+@@ -0,0 +1,62 @@
+/* { dg-do compile } */
+/* fif-conversion-gimple is required for proper overflow detection
+   in some cases.  */
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+#include <stdint.h>
+
+typedef unsigned __int128 uint128_t;
+typedef struct uint256_t
+{
+    uint128_t lo;
+    uint128_t hi;
+} uint256_t;
+
+uint64_t mul64_double_use (uint32_t a, uint32_t b)
+{
+  uint32_t a_lo = a & 0xFFFF;
+  uint32_t b_lo = b & 0xFFFF;
+  uint32_t a_hi = a >> 16;
+  uint32_t b_hi = b >> 16;
+  uint32_t lolo = a_lo * b_lo;
+  uint32_t lohi = a_lo * b_hi;
+  uint32_t hilo = a_hi * b_lo;
+  uint32_t hihi = a_hi * b_hi;
+  uint32_t middle = hilo + lohi;
+  uint32_t middle_hi = middle >> 16;
+  uint32_t middle_lo = middle << 16;
+  uint32_t res_lo = lolo + middle_lo;
+  uint32_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  res_hi += (middle < hilo ? 0x10000 : 0);
+  uint64_t res = ((uint64_t) res_hi) << 32;
+  res += res_lo;
+  return res + lolo;
+}
+
+uint256_t mul256 (uint128_t a, uint128_t b)
+{
+  uint128_t a_lo = a & 0xFFFFFFFFFFFFFFFF;
+  uint128_t b_lo = b & 0xFFFFFFFFFFFFFFFF;
+  uint128_t a_hi = a >> 64;
+  uint128_t b_hi = b >> 64;
+  uint128_t lolo = a_lo * b_lo;
+  uint128_t lohi = a_lo * b_hi;
+  uint128_t hilo = a_hi * b_lo;
+  uint128_t hihi = a_hi * b_hi;
+  uint128_t middle = hilo + lohi;
+  uint128_t middle_hi = middle >> 64;
+  uint128_t middle_lo = middle << 64;
+  uint128_t res_lo = lolo + middle_lo;
+  uint128_t res_hi = hihi + middle_hi;
+  res_hi += (res_lo < middle_lo ? 1 : 0);
+  /* Constant is to big warning WA */
+  uint128_t overflow_tmp = (middle < hilo ? 1 : 0);
+  overflow_tmp <<= 64;
+  res_hi += overflow_tmp;
+  uint256_t res;
+  res.lo = res_lo;
+  res.hi = res_hi;
+  return res;
+}
+
+/* { dg-final { scan-tree-dump-not "double sized mul optimized" "widening_mul" } } */
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 55d6ee8ae..2c06b8a60 100644
+--- a/gcc/tree-ssa-math-opts.cc
+++ b/gcc/tree-ssa-math-opts.cc
+@@ -210,6 +210,9 @@ static struct
+ 
+   /* Number of highpart multiplication ops inserted.  */
+   int highpart_mults_inserted;
+
+  /* Number of optimized double sized multiplications.  */
+  int double_sized_mul_optimized;
+ } widen_mul_stats;
+ 
+ /* The instance of "struct occurrence" representing the highest
+@@ -4893,6 +4896,78 @@ optimize_spaceship (gimple *stmt)
+ }
+ 
+ 
+/* Pattern matcher for double sized multiplication defined in match.pd.  */
+extern bool gimple_double_size_mul_candidate (tree, tree*, tree (*)(tree));
+
+static bool
+convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt)
+{
+  gimple *use_stmt, *complex_res_lo;
+  gimple_stmt_iterator insert_before;
+  imm_use_iterator use_iter;
+  tree match[4]; // arg0, arg1, res_hi, complex_res_lo
+  tree arg0, arg1, widen_mult, new_type, tmp;
+  tree lhs = gimple_assign_lhs (stmt);
+  location_t loc = UNKNOWN_LOCATION;
+  machine_mode mode;
+
+  if (!gimple_double_size_mul_candidate (lhs, match, NULL))
+    return false;
+
+  new_type = build_nonstandard_integer_type (
+	  TYPE_PRECISION (TREE_TYPE (match[0])) * 2, 1);
+  mode = TYPE_MODE (new_type);
+
+  /* Early return if the target multiplication doesn't exist on target.  */
+  if (optab_handler (smul_optab, mode) == CODE_FOR_nothing
+      && !wider_optab_check_p (smul_optab, mode, 1))
+    return false;
+
+  /* Determine the point where the wide multiplication
+     should be inserted.  Complex low res is OK since it is required
+     by both high and low part getters, thus it dominates both of them.  */
+  complex_res_lo = SSA_NAME_DEF_STMT (match[3]);
+  insert_before = gsi_for_stmt (complex_res_lo);
+  gsi_next (&insert_before);
+
+  /* Create the widen multiplication.  */
+  arg0 = build_and_insert_cast (&insert_before, loc, new_type, match[0]);
+  arg1 = build_and_insert_cast (&insert_before, loc, new_type, match[1]);
+  widen_mult = build_and_insert_binop (&insert_before, loc, "widen_mult",
+				       MULT_EXPR, arg0, arg1);
+
+  /* Find the mult low part getter.  */
+  FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match[3])
+    if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
+      break;
+
+  /* Create high and low (if needed) parts extractors.  */
+  /* Low part.  */
+  if (use_stmt)
+    {
+      loc = gimple_location (use_stmt);
+      tmp = build_and_insert_cast (&insert_before, loc,
+	  	      		   TREE_TYPE (gimple_get_lhs (use_stmt)),
+	  			   widen_mult);
+      gassign *new_stmt = gimple_build_assign (gimple_get_lhs (use_stmt),
+	    				       NOP_EXPR, tmp);
+      gsi_replace (&insert_before, new_stmt, true);
+    }
+
+  /* High part.  */
+  loc = gimple_location (stmt);
+  tmp = build_and_insert_binop (gsi, loc, "widen_mult_hi",
+				RSHIFT_EXPR, widen_mult,
+				build_int_cst (new_type,
+					       TYPE_PRECISION (new_type) / 2));
+  tmp = build_and_insert_cast (gsi, loc, TREE_TYPE (lhs), tmp);
+  gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, tmp);
+  gsi_replace (gsi, new_stmt, true);
+
+  widen_mul_stats.double_sized_mul_optimized++;
+  return true;
+}
+
+ /* Find integer multiplications where the operands are extended from
+    smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
+    or MULT_HIGHPART_EXPR where appropriate.  */
+@@ -4987,6 +5062,9 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
+ 	      break;
+ 
+ 	    case PLUS_EXPR:
+	      if (convert_double_size_mul (&gsi, stmt))
+		break;
+	      __attribute__ ((fallthrough));
+ 	    case MINUS_EXPR:
+ 	      if (!convert_plusminus_to_widen (&gsi, stmt, code))
+ 		match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
+@@ -5091,6 +5169,8 @@ pass_optimize_widening_mul::execute (function *fun)
+ 			    widen_mul_stats.divmod_calls_inserted);
+   statistics_counter_event (fun, "highpart multiplications inserted",
+ 			    widen_mul_stats.highpart_mults_inserted);
+  statistics_counter_event (fun, "double sized mul optimized",
+			    widen_mul_stats.double_sized_mul_optimized);
+ 
+   return cfg_changed ? TODO_cleanup_cfg : 0;
+ }
+-- 
+2.33.0
+
--- a/0040-Port-icp-patch-to-GCC-12.patch
+++ b/0040-Port-icp-patch-to-GCC-12.patch
--- a/0041-Port-fixes-in-icp-to-GCC-12.patch
+++ b/0041-Port-fixes-in-icp-to-GCC-12.patch
@ -0,0 +1,100 @@
+From aaa117a9ff58fb208e8c8859e075ca425f995f63 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Tue, 27 Feb 2024 07:43:57 +0800
+Subject: [PATCH 07/18] Port fixes in icp to GCC 12
+
+---
+ gcc/ipa-devirt.cc | 37 ++++++++++++++++++++++++++++++-------
+ 1 file changed, 30 insertions(+), 7 deletions(-)
+
+diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
+index 383839189..318535d06 100644
+--- a/gcc/ipa-devirt.cc
+++ b/gcc/ipa-devirt.cc
+@@ -4431,6 +4431,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
+   if (!map->count (ftype_uid))
+     return;
+   type_set* s = (*map)[ftype_uid];
+  if (!s)
+    {
+      fprintf (dump_file, "%d (no set)", ftype_uid);
+      return;
+    }
+   for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
+     fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
+ }
+@@ -4696,12 +4701,19 @@ maybe_register_aliases (tree type1, tree type2)
+       if (register_ailas_type (type1, type2, ta_map))
+ 	analyze_pointees (type1, type2);
+     }
+  unsigned type1_uid = TYPE_UID (type1);
+  unsigned type2_uid = TYPE_UID (type2);
+  if (type_uid_map->count (type1_uid) == 0)
+    (*type_uid_map)[type1_uid] = type1;
+  if (type_uid_map->count (type2_uid) == 0)
+    (*type_uid_map)[type2_uid] = type2;
+
+   /* If function and non-function type pointers alias,
+      the function type is unsafe.  */
+   if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
+-    unsafe_types->insert (TYPE_UID (type1));
+    unsafe_types->insert (type1_uid);
+   if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
+-    unsafe_types->insert (TYPE_UID (type2));
+    unsafe_types->insert (type2_uid);
+ 
+   /* Try to figure out with pointers to incomplete types.  */
+   if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
+@@ -4825,10 +4837,12 @@ compare_block_and_init_type (tree block, tree t1)
+ static void
+ analyze_global_var (varpool_node *var)
+ {
+-  var->get_constructor();
+   tree decl = var->decl;
+-  if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
+-      || integer_zerop (DECL_INITIAL (decl)))
+  if (decl || !DECL_INITIAL (decl))
+    return;
+  var->get_constructor ();
+  if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
+      || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
+     return;
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -4998,7 +5012,9 @@ analyze_assign_stmt (gimple *stmt)
+     {
+       rhs = TREE_OPERAND (rhs, 0);
+       if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
+-	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
+	  || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
+	  || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
+	  || TREE_CODE (rhs) == RESULT_DECL)
+ 	rhs_type = build_pointer_type (TREE_TYPE (rhs));
+       else if (TREE_CODE (rhs) == COMPONENT_REF)
+ 	{
+@@ -5012,7 +5028,12 @@ analyze_assign_stmt (gimple *stmt)
+ 	  gcc_assert (POINTER_TYPE_P (rhs_type));
+ 	}
+       else
+-	gcc_unreachable();
+	{
+	  fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+		   get_tree_code_name (TREE_CODE (rhs)));
+	  print_gimple_stmt (dump_file, stmt, 0);
+	  gcc_unreachable ();
+	}
+     }
+   else
+     rhs_type = TREE_TYPE (rhs);
+@@ -5710,6 +5731,8 @@ merge_fs_map_for_ftype_aliases ()
+       decl_set *d_set = it1->second;
+       tree type = (*type_uid_map)[it1->first];
+       type_set *set = (*fta_map)[it1->first];
+      if (!set)
+	continue;
+       for (type_set::const_iterator it2 = set->begin ();
+ 	   it2 != set->end (); it2++)
+ 	{
+-- 
+2.33.0
+
--- a/0042-Add-split-complex-instructions-pass.patch
+++ b/0042-Add-split-complex-instructions-pass.patch
--- a/0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
+++ b/0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
--- a/0044-Port-maxmin-patch-to-GCC-12.patch
+++ b/0044-Port-maxmin-patch-to-GCC-12.patch
@ -0,0 +1,378 @@
+From a3013c074cd2ab5f71eb98a587a627f38c68656c Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 17:07:24 +0800
+Subject: [PATCH 12/18] Port maxmin patch to GCC 12
+
+---
+ gcc/config/aarch64/aarch64-simd.md    | 256 ++++++++++++++++++++++++++
+ gcc/config/aarch64/predicates.md      |  19 ++
+ gcc/testsuite/gcc.dg/combine-maxmin.c |  46 +++++
+ 3 files changed, 321 insertions(+)
+ create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
+
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index 82f73805f..de92802f5 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -1138,6 +1138,82 @@
+   [(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
+ )
+ 
+;; Simplify the extension with following truncation for shift+neg operation.
+
+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (truncate:V4HI
+	    (ashiftrt:V4SI
+	      (neg:V4SI
+		(sign_extend:V4SI
+		  (vec_select:V4HI
+		    (match_operand:V8HI 1 "register_operand")
+		    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+	  (truncate:V4HI
+	    (ashiftrt:V4SI
+	      (neg:V4SI
+		(sign_extend:V4SI
+		  (vec_select:V4HI
+		    (match_dup 1)
+		    (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
+	      (match_dup 2)))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_operand:V8HI 0 "register_operand" "=w")
+	(ashiftrt:V8HI
+	  (neg:V8HI
+	    (match_operand:V8HI 1 "register_operand" "w"))
+	  (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
+  {
+    /* Reduce the shift amount to smaller mode.  */
+    int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
+	      - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
+    operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+  }
+  [(set_attr "type" "multiple")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (truncate:V4HI
+	    (ashiftrt:V4SI
+	      (neg:V4SI
+		(match_operand:V4SI 1 "register_operand" "w"))
+	      (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+	  (truncate:V4HI
+	    (ashiftrt:V4SI
+	      (neg:V4SI
+		(match_operand:V4SI 3 "register_operand" "w"))
+	      (match_dup 2)))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_operand:V4SI 1 "register_operand" "=w")
+	(ashiftrt:V4SI
+	  (neg:V4SI
+	    (match_dup 1))
+	  (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+   (set (match_operand:V4SI 3 "register_operand" "=w")
+	(ashiftrt:V4SI
+	  (neg:V4SI
+	    (match_dup 3))
+	  (match_dup 2)))
+   (set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (truncate:V4HI
+	    (match_dup 1))
+	  (truncate:V4HI
+	    (match_dup 3))))]
+  ""
+  [(set_attr "type" "multiple")]
+)
+
+ (define_insn "*aarch64_simd_sra<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+ 	(plus:VDQ_I
+@@ -1714,6 +1790,26 @@
+  }
+ )
+ 
+(define_insn "vec_pack_trunc_shifted_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
+       (vec_concat:<VNARROWQ2>
+	 (truncate:<VNARROWQ>
+	   (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
+	      (match_operand:VQN 2 "half_size_operand" "w")))
+	 (truncate:<VNARROWQ>
+	   (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
+	      (match_operand:VQN 4 "half_size_operand" "w")))))]
+ "TARGET_SIMD"
+ {
+   if (BYTES_BIG_ENDIAN)
+     return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
+   else
+     return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
+ }
+  [(set_attr "type" "neon_permute<q>")
+   (set_attr "length" "4")]
+)
+
+ (define_insn "aarch64_shrn<mode>_insn_le"
+   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+ 	(vec_concat:<VNARROWQ2>
+@@ -6652,6 +6748,166 @@
+   [(set_attr "type" "neon_tst<q>")]
+ )
+ 
+;; Simplify the extension with following truncation for cmtst-like operation.
+
+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (plus:V4HI
+	    (truncate:V4HI
+	      (eq:V4SI
+		(sign_extend:V4SI
+		  (vec_select:V4HI
+		    (and:V8HI
+		      (match_operand:V8HI 1 "register_operand")
+		      (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+		    (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+		(match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
+	    (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
+	  (plus:V4HI
+	    (truncate:V4HI
+	      (eq:V4SI
+		(sign_extend:V4SI
+		  (vec_select:V4HI
+		    (and:V8HI
+		      (match_dup 1)
+		      (match_dup 2))
+		    (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
+		(match_dup 4)))
+	    (match_dup 5))))]
+  "TARGET_SIMD && !reload_completed"
+  "#"
+  "&& true"
+  [(set (match_operand:V8HI 6 "register_operand" "=w")
+	(match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+   (set (match_operand:V8HI 0 "register_operand" "=w")
+	(plus:V8HI
+	  (eq:V8HI
+	    (and:V8HI
+	      (match_operand:V8HI 1 "register_operand" "w")
+	      (match_dup 6))
+	    (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
+	  (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
+  {
+    if (can_create_pseudo_p ())
+      {
+	int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
+	operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+	int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
+	operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
+
+	operands[6] = gen_reg_rtx (V8HImode);
+      }
+    else
+      FAIL;
+  }
+  [(set_attr "type" "neon_tst_q")]
+)
+
+;; Three helper definitions that allow combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+	(neg:V4SI
+	  (eq:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(and:V8HI
+		  (match_operand:V8HI 1 "register_operand")
+		  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+		(match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+  "TARGET_SIMD && !reload_completed"
+  "#"
+  "&& true"
+  [(set (match_operand:V8HI 5 "register_operand" "=w")
+	(and:V8HI
+	  (match_operand:V8HI 1 "register_operand")
+	  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+   (set (match_operand:V4SI 0 "register_operand" "=w")
+	(sign_extend:V4SI
+	  (vec_select:V4HI
+	    (match_dup 5)
+	    (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+   (set (match_dup 0)
+	(neg:V4SI
+	  (eq:V4SI
+	    (match_dup 0)
+	    (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+  {
+    if (can_create_pseudo_p ())
+      operands[5] = gen_reg_rtx (V8HImode);
+    else
+      FAIL;
+  }
+  [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=w")
+	  (neg:V4SI
+	    (eq:V4SI
+	      (sign_extend:V4SI
+		(vec_select:V4HI
+		  (and:V8HI
+		    (match_operand:V8HI 1 "register_operand")
+		    (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+		  (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
+	      (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+  "TARGET_SIMD && !reload_completed"
+  "#"
+  "&& true"
+  [(set (match_operand:V8HI 5 "register_operand" "=w")
+	(and:V8HI
+	  (match_operand:V8HI 1 "register_operand")
+	  (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+   (set (match_operand:V4SI 0 "register_operand" "=w")
+	(sign_extend:V4SI
+	  (vec_select:V4HI
+	    (match_dup 5)
+	    (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
+   (set (match_dup 0)
+	  (neg:V4SI
+	    (eq:V4SI
+	      (match_dup 0)
+	      (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+  {
+    if (can_create_pseudo_p ())
+      operands[5] = gen_reg_rtx (V8HImode);
+    else
+      FAIL;
+  }
+  [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
+  [(set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (truncate:V4HI
+	    (not:V4SI
+	      (match_operand:V4SI 1 "register_operand" "w")))
+	  (truncate:V4HI
+	    (not:V4SI
+	      (match_operand:V4SI 2 "register_operand" "w")))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(set (match_operand:V4SI 1 "register_operand" "=w")
+	(not:V4SI
+	  (match_dup 1)))
+   (set (match_operand:V4SI 2 "register_operand" "=w")
+	(not:V4SI
+	  (match_dup 2)))
+   (set (match_operand:V8HI 0 "register_operand" "=w")
+	(vec_concat:V8HI
+	  (truncate:V4HI
+	    (match_dup 1))
+	  (truncate:V4HI
+	    (match_dup 2))))]
+  ""
+  [(set_attr "type" "multiple")]
+)
+
+ (define_insn_and_split "aarch64_cmtstdi"
+   [(set (match_operand:DI 0 "register_operand" "=w,r")
+ 	(neg:DI
+diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
+index 07c14aacb..1b8496c07 100644
+--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
+@@ -118,6 +118,25 @@
+ 	     (match_test "aarch64_simd_valid_immediate (op, NULL,
+ 							AARCH64_CHECK_ORR)"))))
+ 
+(define_predicate "aarch64_bic_imm_for_maxmin"
+   (match_code "const_vector")
+{
+  if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
+    return false;
+  op = unwrap_const_vec_duplicate (op);
+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
+  return CONST_INT_P (op)
+	 && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
+})
+
+(define_predicate "maxmin_arith_shift_operand"
+   (match_code "const_vector")
+{
+  op = unwrap_const_vec_duplicate (op);
+  unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
+  return CONST_INT_P (op) && (UINTVAL (op) == size);
+})
+
+ (define_predicate "aarch64_reg_or_bic_imm"
+    (ior (match_operand 0 "register_operand")
+ 	(and (match_code "const_vector")
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+new file mode 100755
+index 000000000..06bce7029
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -0,0 +1,46 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fdump-rtl-combine-all" } */
+
+/* The test checks usage of smax/smin insns for clip evaluation and
+ * uzp1/uzp2 insns for vector element narrowing.  It's inspired by
+ * sources of x264 codec.  */
+
+typedef unsigned char uint8_t;
+typedef long int intptr_t;
+typedef signed short int int16_t;
+
+static __attribute__((always_inline)) inline uint8_t clip (int x )
+{
+    return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
+}
+
+void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+	 intptr_t stride, int width, int height, int16_t *buf)
+{
+    const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
+    for( int y = 0; y < height; y++ ) {
+        for( int x = -2; x < width+3; x++ ) {
+            int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
+		     + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
+            dstv[x] = clip ( (v + 16) >> 5 );
+            buf[x+2] = v + pad;
+        }
+        for( int x = 0; x < width; x++ )
+            dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
+			      + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
+			     - 32*pad + 512) >> 10);
+        for( int x = 0; x < width; x++ )
+            dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
+			      + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
+			     + 16) >> 5);
+        dsth += stride;
+        dstv += stride;
+        dstc += stride;
+        src += stride;
+    }
+}
+
+/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
+/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
+/* { dg-final { scan-assembler-times {cmtst\t} 2 } }  */
+/* { dg-final { scan-assembler-times {uzp1\t} 6 } }  */
+-- 
+2.33.0
+
--- a/0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
+++ b/0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
@ -0,0 +1,239 @@
+From 11da40d18e35219961226d40f11b0702b8649044 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Thu, 22 Feb 2024 17:13:27 +0800
+Subject: [PATCH 13/18] Port moving minmask pattern to gimple to GCC 12
+
+---
+ gcc/common.opt                          |   4 +
+ gcc/match.pd                            | 104 ++++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/combine-maxmin-1.c |  15 ++++
+ gcc/testsuite/gcc.dg/combine-maxmin-2.c |  14 ++++
+ gcc/testsuite/gcc.dg/combine-maxmin.c   |  19 +++--
+ 5 files changed, 151 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 6c6fabb31..3a5004271 100644
+--- a/gcc/common.opt
+++ b/gcc/common.opt
+@@ -1846,6 +1846,10 @@ fif-conversion-gimple
+ Common Var(flag_if_conversion_gimple) Optimization
+ Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
+ 
+fconvert-minmax
+Common Var(flag_convert_minmax) Optimization
+Convert saturating clipping to min max.
+
+ fstack-reuse=
+ Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
+ -fstack-reuse=[all|named_vars|none]	Set stack reuse level for local variables.
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 61866cb90..3a19e93b3 100644
+--- a/gcc/match.pd
+++ b/gcc/match.pd
+@@ -8031,3 +8031,107 @@ and,
+    (plus:c@4 (op2:c @0 @1)
+     (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
+      (if (single_use (@4) && single_use (@5)))))
+
+/* MinMax pattern matching helpers.  More info on the transformation below.  */
+
+/* Match (a & 0b11..100..0) pattern.  */
+(match (minmax_cmp_arg @0 @1)
+ (bit_and @0 INTEGER_CST@1)
+ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
+
+/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
+   This statement is blocking for the transformation of unsigned integers.
+   Do type check here to avoid unnecessary duplications.  */
+(match (minmax_sat_arg @0)
+ (rshift (negate @0) INTEGER_CST@1)
+ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
+      && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
+
+/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
+   The matched pattern can be described as saturated clipping.
+
+   The pattern supports truncation via both casts and bit_and.
+   Also there are patterns for possible inverted conditions.  */
+(if (flag_convert_minmax)
+/* Truncation via casts.  Unfortunately convert? cannot be applied here
+   because convert and cond take different number of arguments.  */
+ (simplify
+  (convert
+   (cond
+    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+    (convert? (minmax_sat_arg @0))
+    (convert? @0)))
+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+ (simplify
+  (cond
+   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+   (convert? (minmax_sat_arg @0))
+   (convert? @0))
+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+
+ (simplify
+  (convert
+   (cond
+    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+    (convert? @0)
+    (convert? (minmax_sat_arg @0))))
+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+ (simplify
+  (cond
+   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+   (convert? @0)
+   (convert? (minmax_sat_arg @0)))
+  (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+
+ /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
+ (simplify
+  (convert
+   (cond
+    (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
+    (convert? @0)))
+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+ (simplify
+  (cond
+   (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
+   (convert? @0))
+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+
+ (simplify
+  (convert
+   (cond
+    (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+    (convert? @0)
+    (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; })))))
+ (simplify
+  (cond
+   (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+   (convert? @0)
+   (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
+  (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+   (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+    (convert (min (max @0 { integer_zero_node; })
+		  { mask; }))))))
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
+new file mode 100644
+index 000000000..859ff7df8
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
+@@ -0,0 +1,15 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fconvert-minmax" } */
+
+#include <inttypes.h>
+
+__attribute__((noinline))
+void test (int32_t *restrict a, int32_t *restrict x)
+{
+  for (int i = 0; i < 4; i++)
+    a[i] = ((((-x[i]) >> 31) ^ x[i])
+            & (-((int32_t)((x[i] & (~((1 << 8)-1))) == 0)))) ^ ((-x[i]) >> 31);
+}
+
+/* { dg-final { scan-assembler-not {smax\t} } }  */
+/* { dg-final { scan-assembler-not {smin\t} } }  */
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
+new file mode 100644
+index 000000000..63d4d85b3
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
+@@ -0,0 +1,14 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fconvert-minmax" } */
+
+#include <inttypes.h>
+
+__attribute__((noinline))
+void test (int8_t *restrict a, int32_t *restrict x)
+{
+  for (int i = 0; i < 8; i++)
+    a[i] = ((x[i] & ~((1 << 9)-1)) ? (-x[i])>>31 & ((1 << 9)-1) : x[i]);
+}
+
+/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
+/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+index 06bce7029..a984fa560 100755
+--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile { target aarch64-*-* } } */
+-/* { dg-options "-O3 -fdump-rtl-combine-all" } */
+/* { dg-options "-O3 -fconvert-minmax" } */
+ 
+ /* The test checks usage of smax/smin insns for clip evaluation and
+  * uzp1/uzp2 insns for vector element narrowing.  It's inspired by
+@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ {
+     const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
+     for( int y = 0; y < height; y++ ) {
+        /* This loop is not being vectorized now.  */
+         for( int x = -2; x < width+3; x++ ) {
+             int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
+ 		     + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
+             dstv[x] = clip ( (v + 16) >> 5 );
+             buf[x+2] = v + pad;
+         }
+
+        /* Produces two versions of the code: 3xUZP1/2xMAX/2xMIN + 1xUZP1/1xMAX/1xMIN.  */
+         for( int x = 0; x < width; x++ )
+             dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
+ 			      + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
+ 			     - 32*pad + 512) >> 10);
+
+        /* Priduces two versions of the code: 1xUZP1/2xMAX/2xMIN + 0xUZP1/1xMAX/1xMIN.  */
+         for( int x = 0; x < width; x++ )
+             dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
+ 			      + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
+ 			     + 16) >> 5);
+
+         dsth += stride;
+         dstv += stride;
+         dstc += stride;
+@@ -40,7 +46,10 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+     }
+ }
+ 
+-/* { dg-final { scan-assembler-times {smax\t} 4 } }  */
+-/* { dg-final { scan-assembler-times {smin\t} 4 } }  */
+-/* { dg-final { scan-assembler-times {cmtst\t} 2 } }  */
+-/* { dg-final { scan-assembler-times {uzp1\t} 6 } }  */
+/* Max is performed on 0 from signed values, match smax exactly.  */
+/* { dg-final { scan-assembler-times {smax\t} 6 } }  */
+/* Min is performed on signed val>0 and a mask, min sign doesn't matter.  */
+/* { dg-final { scan-assembler-times {[us]min\t} 6 } }  */
+/* All of the vectorized patterns are expected to be matched.  */
+/* { dg-final { scan-assembler-not {cmtst\t} } }  */
+/* { dg-final { scan-assembler-times {uzp1\t} 5 } }  */
+-- 
+2.33.0
+
--- a/0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
+++ b/0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
@ -0,0 +1,65 @@
+From dbcb2630c426c8dd2117b5ce625da8422dd8cd65 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Feb 2024 17:20:17 +0800
+Subject: [PATCH 14/18] Add new pattern to pass the maxmin tests
+
+---
+ gcc/match.pd                          | 24 ++++++++++++++++++++++++
+ gcc/testsuite/gcc.dg/combine-maxmin.c |  2 +-
+ 2 files changed, 25 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 3a19e93b3..aee58e47b 100644
+--- a/gcc/match.pd
+++ b/gcc/match.pd
+@@ -8038,6 +8038,10 @@ and,
+ (match (minmax_cmp_arg @0 @1)
+  (bit_and @0 INTEGER_CST@1)
+  (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
+/* Match ((unsigned) a > 0b0..01..1) pattern.  */
+(match (minmax_cmp_arg1 @0 @1)
+ (gt @0 INTEGER_CST@1)
+ (if (wi::popcount (wi::to_widest (@1) + 1) == 1)))
+ 
+ /* Match (inversed_sign_bit >> sign_bit_pos) pattern.
+    This statement is blocking for the transformation of unsigned integers.
+@@ -8095,6 +8099,26 @@ and,
+     (convert (min (max @0 { integer_zero_node; })
+ 		  { mask; })))))
+ 
+ (simplify
+  (convert
+   (cond
+    (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
+    (convert? (minmax_sat_arg @0))
+    (convert? @0)))
+  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
+    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
+		  { mask; })))))
+ (simplify
+  (cond
+   (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
+   (convert? (minmax_sat_arg @0))
+   (convert? @0))
+  (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+   (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
+    (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
+		  { mask; })))))
+
+  /* Truncation via bit_and with mask.  Same concerns on convert? here.  */
+  (simplify
+   (convert
+diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
+index a984fa560..5c0c9cc49 100755
+--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
+@@ -52,4 +52,4 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ /* { dg-final { scan-assembler-times {[us]min\t} 6 } }  */
+ /* All of the vectorized patterns are expected to be matched.  */
+ /* { dg-final { scan-assembler-not {cmtst\t} } }  */
+-/* { dg-final { scan-assembler-times {uzp1\t} 5 } }  */
+/* { dg-final { scan-assembler-times {uzp1\t} 2 } }  */
+-- 
+2.33.0
+
--- a/0047-AES-Implement-AES-pattern-matching.patch
+++ b/0047-AES-Implement-AES-pattern-matching.patch
--- a/0048-crypto-accel-add-optimization-level-requirement-to-t.patch
+++ b/0048-crypto-accel-add-optimization-level-requirement-to-t.patch
@ -0,0 +1,27 @@
+From 915d549b03c10ab403538888149facd417a02ebc Mon Sep 17 00:00:00 2001
+From: vchernon <chernonog.vyacheslav@huawei.com>
+Date: Wed, 27 Dec 2023 23:31:26 +0800
+Subject: [PATCH 16/18] [crypto-accel] add optimization level requirement to
+ the gate
+
+fix issue (src-openEuler/gcc: I8RRDW)
+---
+ gcc/crypto-accel.cc | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc
+index f4e810a6b..e7766a585 100644
+--- a/gcc/crypto-accel.cc
+++ b/gcc/crypto-accel.cc
+@@ -2391,7 +2391,7 @@ public:
+   /* opt_pass methods: */
+   virtual bool gate (function *)
+     {
+-      if (flag_crypto_accel_aes <= 0)
+      if (flag_crypto_accel_aes <= 0 || optimize < 1)
+ 	return false;
+       return targetm.get_v16qi_mode
+ 	&& targetm.gen_rev32v16qi
+-- 
+2.33.0
+
--- a/0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
+++ b/0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
@ -0,0 +1,239 @@
+From b5865aef36ebaac87ae30d51f08bfe081795ed67 Mon Sep 17 00:00:00 2001
+From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
+Date: Tue, 12 Mar 2024 23:30:56 +0800
+Subject: [PATCH 17/18] Add more flexible check for pointer aliasing during
+ vectorization It takes minimum between number of iteration and segment length
+ it helps to speed up loops with small number of iterations when only tail can
+ be vectorized
+
+---
+ gcc/params.opt                                |  5 ++
+ .../sve/var_stride_flexible_segment_len_1.c   | 23 +++++++
+ gcc/tree-data-ref.cc                          | 67 +++++++++++++------
+ gcc/tree-data-ref.h                           | 11 ++-
+ gcc/tree-vect-data-refs.cc                    | 14 +++-
+ 5 files changed, 95 insertions(+), 25 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 6176d4790..7e5c119cf 100644
+--- a/gcc/params.opt
+++ b/gcc/params.opt
+@@ -1180,6 +1180,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop.
+ Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization
+ Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check.
+ 
+-param=vect-alias-flexible-segment-len=
+Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization
+Use a minimum length of different segments.  Currenlty the minimum between
+iteration number and vectorization length is chosen by this param.
+
+ -param=vect-max-version-for-alignment-checks=
+ Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
+ Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+new file mode 100644
+index 000000000..894f075f3
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
+@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */
+
+#define TYPE int
+#define SIZE 257
+
+void __attribute__ ((weak))
+f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
+{
+  for (int i = 0; i < SIZE; ++i)
+    x[i * n] += y[i * n];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
+   an overlap check that multiplies by (257-1)*4.  */
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
+/* One range check and a check for n being zero.  */
+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
+diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
+index 397792c35..e6ae9e847 100644
+--- a/gcc/tree-data-ref.cc
+++ b/gcc/tree-data-ref.cc
+@@ -2329,31 +2329,15 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
+    same arguments.  Try to optimize cases in which the second access
+    is a write and in which some overlap is valid.  */
+ 
+-static bool
+-create_waw_or_war_checks (tree *cond_expr,
+static void
+create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a,
+ 			  const dr_with_seg_len_pair_t &alias_pair)
+ {
+   const dr_with_seg_len& dr_a = alias_pair.first;
+   const dr_with_seg_len& dr_b = alias_pair.second;
+ 
+-  /* Check for cases in which:
+-
+-     (a) DR_B is always a write;
+-     (b) the accesses are well-ordered in both the original and new code
+-	 (see the comment above the DR_ALIAS_* flags for details); and
+-     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
+-  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+-    return false;
+-
+-  /* Check for equal (but possibly variable) steps.  */
+   tree step = DR_STEP (dr_a.dr);
+-  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+-    return false;
+-
+-  /* Make sure that we can operate on sizetype without loss of precision.  */
+   tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+-  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+-    return false;
+ 
+   /* All addresses involved are known to have a common alignment ALIGN.
+      We can therefore subtract ALIGN from an exclusive endpoint to get
+@@ -2370,9 +2354,6 @@ create_waw_or_war_checks (tree *cond_expr,
+ 			       fold_convert (ssizetype, indicator),
+ 			       ssize_int (0));
+ 
+-  /* Get lengths in sizetype.  */
+-  tree seg_len_a
+-    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
+   step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
+ 
+   /* Each access has the following pattern:
+@@ -2479,6 +2460,50 @@ create_waw_or_war_checks (tree *cond_expr,
+   *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
+   if (dump_enabled_p ())
+     dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
+}
+
+/* This is a wrapper function for create_waw_or_war_checks2.  */
+static bool
+create_waw_or_war_checks (tree *cond_expr,
+			  const dr_with_seg_len_pair_t &alias_pair)
+{
+  const dr_with_seg_len& dr_a = alias_pair.first;
+  const dr_with_seg_len& dr_b = alias_pair.second;
+
+  /* Check for cases in which:
+
+     (a) DR_B is always a write;
+     (b) the accesses are well-ordered in both the original and new code
+     (see the comment above the DR_ALIAS_* flags for details); and
+     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
+  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
+    return false;
+
+  /* Check for equal (but possibly variable) steps.  */
+  tree step = DR_STEP (dr_a.dr);
+  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
+    return false;
+
+  /* Make sure that we can operate on sizetype without loss of precision.  */
+  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
+  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
+    return false;
+
+  /* Get lengths in sizetype.  */
+  tree seg_len_a
+    = fold_convert (sizetype,
+		    rewrite_to_non_trapping_overflow (dr_a.seg_len));
+  create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair);
+  if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2)
+    {
+      tree seg_len2_a
+	= fold_convert (sizetype,
+			rewrite_to_non_trapping_overflow (dr_a.seg_len2));
+      tree cond_expr2;
+      create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair);
+      *cond_expr =  fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+				 *cond_expr, cond_expr2);
+   }
+   return true;
+ }
+ 
+diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
+index f643a95b2..9bc5f16ee 100644
+--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
+@@ -213,12 +213,19 @@ class dr_with_seg_len
+ public:
+   dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
+ 		   unsigned int a)
+-    : dr (d), seg_len (len), access_size (size), align (a) {}
+-
+    : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a)
+    {}
+  dr_with_seg_len (data_reference_p d, tree len, tree len2,
+		   unsigned HOST_WIDE_INT size, unsigned int a)
+    : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a)
+    {}
+   data_reference_p dr;
+   /* The offset of the last access that needs to be checked minus
+      the offset of the first.  */
+   tree seg_len;
+  /* The second version of segment length.  Currently this is used to
+     soften checks for a small number of iterations.  */
+  tree seg_len2;
+   /* A value that, when added to abs (SEG_LEN), gives the total number of
+      bytes in the segment.  */
+   poly_uint64 access_size;
+diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
+index 4e615b80b..04e68f621 100644
+--- a/gcc/tree-vect-data-refs.cc
+++ b/gcc/tree-vect-data-refs.cc
+@@ -3646,6 +3646,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+     {
+       poly_uint64 lower_bound;
+       tree segment_length_a, segment_length_b;
+      tree segment_length2_a, segment_length2_b;
+       unsigned HOST_WIDE_INT access_size_a, access_size_b;
+       unsigned int align_a, align_b;
+ 
+@@ -3751,6 +3752,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+ 	{
+ 	  segment_length_a = size_zero_node;
+ 	  segment_length_b = size_zero_node;
+	  segment_length2_a = size_zero_node;
+	  segment_length2_b = size_zero_node;
+ 	}
+       else
+ 	{
+@@ -3759,8 +3762,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+ 	    length_factor = scalar_loop_iters;
+ 	  else
+ 	    length_factor = size_int (vect_factor);
+	  /* In any case we should rememeber scalar_loop_iters
+	     this helps to create flexible aliasing check
+	     for small number of iterations.  */
+ 	  segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
+ 	  segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
+	  segment_length2_a
+	    = vect_vfa_segment_size (dr_info_a, scalar_loop_iters);
+	  segment_length2_b
+	    = vect_vfa_segment_size (dr_info_b, scalar_loop_iters);
+ 	}
+       access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
+       access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
+@@ -3805,9 +3815,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+ 	}
+ 
+       dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
+-			    access_size_a, align_a);
+			    segment_length2_a, access_size_a, align_a);
+       dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
+-			    access_size_b, align_b);
+			    segment_length2_b, access_size_b, align_b);
+       /* Canonicalize the order to be the one that's needed for accurate
+ 	 RAW, WAR and WAW flags, in cases where the data references are
+ 	 well-ordered.  The order doesn't really matter otherwise,
+-- 
+2.33.0
+
--- a/0050-Port-IPA-prefetch-to-GCC-12.patch
+++ b/0050-Port-IPA-prefetch-to-GCC-12.patch
--- a/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
+++ b/0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
--- a/0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch
+++ b/0052-Fix-fails-in-IPA-prefetch-src-openEuler-gcc-I96ID7.patch
@ -0,0 +1,94 @@
+From 0263daa1312d0cdcdf9c770bcf5d982a2d4fc16b Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 29 Mar 2024 17:15:41 +0800
+Subject: [PATCH 2/2] Fix fails in IPA prefetch (src-openEuler/gcc: I96ID7)
+
+---
+ gcc/ipa-prefetch.cc | 28 ++++++++++++++++++++++++++--
+ 1 file changed, 26 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 9537e4835..1ceb5137f 100644
+--- a/gcc/ipa-prefetch.cc
+++ b/gcc/ipa-prefetch.cc
+@@ -366,6 +366,7 @@ typedef std::map<memref_t *, memref_t *> memref_map;
+ typedef std::map<memref_t *, tree> memref_tree_map;
+ 
+ typedef std::set<gimple *> stmt_set;
+typedef std::set<tree> tree_set;
+ typedef std::map<tree, tree> tree_map;
+ 
+ tree_memref_map *tm_map;
+@@ -1124,8 +1125,21 @@ analyse_loops ()
+     }
+ }
+ 
+/* Compare memrefs by IDs; helper for qsort.  */
+
+static int
+memref_id_cmp (const void *p1, const void *p2)
+{
+  const memref_t *mr1 = *(const memref_t **) p1;
+  const memref_t *mr2 = *(const memref_t **) p2;
+
+  if ((unsigned) mr1->mr_id > (unsigned) mr2->mr_id)
+    return 1;
+  return -1;
+}
+
+ /* Reduce the set filtering out memrefs with the same memory references,
+-   return the result vector of memrefs.  */
+   sort and return the result vector of memrefs.  */
+ 
+ static void
+ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+@@ -1162,6 +1176,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+ 	    vec.safe_push (mr1);
+ 	}
+     }
+  vec.qsort (memref_id_cmp);
+   if (dump_file)
+     {
+       fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ());
+@@ -1663,10 +1678,15 @@ optimize_function (cgraph_node *n, function *fn)
+     }
+ 
+   /* Create other new vars.  Insert new stmts.  */
+  vec<memref_t *> used_mr_vec = vNULL;
+   for (memref_set::const_iterator it = used_mrs.begin ();
+        it != used_mrs.end (); it++)
+    used_mr_vec.safe_push (*it);
+  used_mr_vec.qsort (memref_id_cmp);
+
+  for (unsigned int j = 0; j < used_mr_vec.length (); j++)
+     {
+-      memref_t *mr = *it;
+      memref_t *mr = used_mr_vec[j];
+       if (mr == comp_mr)
+ 	continue;
+       gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
+@@ -1702,6 +1722,7 @@ optimize_function (cgraph_node *n, function *fn)
+       local = integer_three_node;
+       break;
+     }
+  tree_set prefetched_addrs;
+   for (unsigned int j = 0; j < vmrs.length (); j++)
+     {
+       memref_t *mr = vmrs[j];
+@@ -1714,10 +1735,13 @@ optimize_function (cgraph_node *n, function *fn)
+       tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
+       if (decl_map->count (addr))
+ 	addr = (*decl_map)[addr];
+      if (prefetched_addrs.count (addr))
+	continue;
+       last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
+ 				     3, addr, write_p, local);
+       pcalls.safe_push (last_stmt);
+       gimple_seq_add_stmt (&stmts, last_stmt);
+      prefetched_addrs.insert (addr);
+       if (dump_file)
+ 	{
+ 	  fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
+-- 
+2.33.0
+
--- a/0053-struct-reorg-Add-Semi-Relayout.patch
+++ b/0053-struct-reorg-Add-Semi-Relayout.patch
--- a/0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch
+++ b/0054-Struct-Reorg-Bugfix-for-structure-pointer-compressio.patch
@ -0,0 +1,28 @@
+From 9dc3df938b9ed2c27498c8548087fee1ce930366 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Tue, 2 Apr 2024 11:08:30 +0800
+Subject: [PATCH] [Struct Reorg] Bugfix for structure pointer compression
+
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index fa33f2d35..3922873f3 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -7541,9 +7541,11 @@ ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void)
+       if (!type->has_legal_alloc_num)
+ 	{
+ 	  if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE)
+	    {
+ 	    if (dump_file)
+ 	      fprintf (dump_file, " has unknown alloc size, but"
+ 				  " in unsafe mode, so");
+	    }
+ 	  else
+ 	    {
+ 	      if (dump_file)
+-- 
+2.33.0
+
--- a/0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
+++ b/0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
@ -0,0 +1,420 @@
+From 55c547748af36ffc3f2d5ed154a91fb3fcb8431c Mon Sep 17 00:00:00 2001
+From: Mingchuan Wu <wumingchuan1992@foxmail.com>
+Date: Thu, 11 Apr 2024 15:49:59 +0800
+Subject: [PATCH] [Struct Reorg] Port bugfixes to GCC 12.3.1
+
+Migrated from commits in GCC10.3.1:
+https://gitee.com/openeuler/gcc/commit/41af6d361a6d85ef4fce8a8438113d765596afdd
+https://gitee.com/openeuler/gcc/commit/25d74b98caeaae881e374924886ee664aa1af5bc
+https://gitee.com/openeuler/gcc/commit/b5a3bfe92f96cd0d2224d80ac4eaa80dab1bd6bf
+https://gitee.com/openeuler/gcc/commit/708ffe6f132ee39441b66b6ab6b98847d35916b7
+https://gitee.com/openeuler/gcc/commit/e875e4e7f3716aa268ffbbf55ee199ec82b6aeba
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc      | 97 ++++++++++---------
+ gcc/testsuite/gcc.dg/struct/dfe_escape.c      | 50 ++++++++++
+ gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c    | 69 +++++++++++++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  2 +
+ gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 ++++++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 +++
+ gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 +++++
+ 7 files changed, 243 insertions(+), 46 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 6a202b4bd..f03d1d875 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -466,10 +466,19 @@ srtype::has_dead_field (void)
+   unsigned i;
+   FOR_EACH_VEC_ELT (fields, i, this_field)
+     {
+-      if (!(this_field->field_access & READ_FIELD))
+-	{
+-	  may_dfe = true;
+-	  break;
+      /* Function pointer members are not processed, because DFE
+         does not currently support accurate analysis of function
+         pointers, and we have not identified specific use cases. */
+      if (!(this_field->field_access & READ_FIELD)
+	 && !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
+	{
+	  /* Fields with escape risks should not be processed. */
+	  if (this_field->type == NULL
+	      || (this_field->type->escapes == does_not_escape))
+	    {
+	      may_dfe = true;
+	      break;
+	    }
+ 	}
+     }
+   return may_dfe;
+@@ -1032,8 +1041,13 @@ srtype::create_new_type (void)
+     {
+       srfield *f = fields[i];
+       if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
+-	  && !(f->field_access & READ_FIELD))
+-	continue;
+	  && !(f->field_access & READ_FIELD)
+	  && !FUNCTION_POINTER_TYPE_P (f->fieldtype))
+	{
+	  /* Fields with escape risks should not be processed. */
+	  if (f->type == NULL || (f->type->escapes == does_not_escape))
+	    continue;
+	}
+       f->create_new_fields (newtype, newfields, newlast);
+     }
+ 
+@@ -3815,9 +3829,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other,
+       if (VOID_POINTER_P (TREE_TYPE (side))
+ 	  && TREE_CODE (side) == SSA_NAME)
+ 	{
+-	  /* The type is other, the declaration is side.  */
+-	  current_function->record_decl (type, side, -1,
+-		isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
+	  tree inner = SSA_NAME_VAR (side);
+	  if (inner)
+	    {
+	      srdecl *in = find_decl (inner);
+	      if (in && !in->type->has_escaped ())
+		{
+		  /* The type is other, the declaration is side.  */
+		  current_function->record_decl (type, side, -1,
+			isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
+		}
+	     }
+ 	}
+       else
+ 	/* *_1 = &MEM[(void *)&x + 8B].  */
+@@ -3910,6 +3932,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
+ 	maybe_mark_or_record_other_side (rhs, lhs, stmt);
+       if (TREE_CODE (lhs) == SSA_NAME)
+ 	maybe_mark_or_record_other_side (lhs, rhs, stmt);
+
+      /* Handle missing ARRAY_REF cases.  */
+      if (TREE_CODE (lhs) == ARRAY_REF)
+	mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
+      if (TREE_CODE (rhs) == ARRAY_REF)
+	mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
+     }
+ }
+ 
+@@ -5272,8 +5300,11 @@ ipa_struct_reorg::record_accesses (void)
+ 	record_function (cnode);
+       else
+ 	{
+-	  tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
+-	  mark_type_as_escape (return_type, escape_return, NULL);
+	  if (cnode->externally_visible)
+	    {
+	      tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
+	      mark_type_as_escape (return_type, escape_return, NULL);
+	    }
+ 	}
+ 
+     }
+@@ -5889,6 +5920,7 @@ ipa_struct_reorg::rewrite_expr (tree expr,
+   bool escape_from_base = false;
+ 
+   tree newbase[max_split];
+  memset (newbase, 0, sizeof (tree[max_split]));
+   memset (newexpr, 0, sizeof (tree[max_split]));
+ 
+   if (TREE_CODE (expr) == CONSTRUCTOR)
+@@ -6912,7 +6944,7 @@ create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr,
+ }
+ 
+ tree
+-ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt,
+ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt ATTRIBUTE_UNUSED,
+ 						gimple_stmt_iterator *gsi,
+ 						tree ptr, tree offset,
+ 						srtype *type)
+@@ -7889,41 +7921,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
+    should be removed.  */
+ 
+ bool
+-ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
+ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
+ {
+-  if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
+-    /* Delete debug gimple now.  */
+-    return true;
+-  bool remove = false;
+-  if (gimple_debug_bind_p (stmt))
+-    {
+-      tree var = gimple_debug_bind_get_var (stmt);
+-      tree newvar[max_split];
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-      if (gimple_debug_bind_has_value_p (stmt))
+-	{
+-	  var = gimple_debug_bind_get_value (stmt);
+-	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+-	    var = TREE_OPERAND (var, 0);
+-	  if (rewrite_expr (var, newvar, true))
+-	    remove = true;
+-	}
+-    }
+-  else if (gimple_debug_source_bind_p (stmt))
+-    {
+-      tree var = gimple_debug_source_bind_get_var (stmt);
+-      tree newvar[max_split];
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-      var = gimple_debug_source_bind_get_value (stmt);
+-      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
+-	var = TREE_OPERAND (var, 0);
+-      if (rewrite_expr (var, newvar, true))
+-	remove = true;
+-    }
+-
+-  return remove;
+  /* In debug statements, there might be some statements that have
+     been optimized out in gimple but left in debug gimple.  Sometimes
+     these statements need to be analyzed to escape, but in rewrite
+     stage it shouldn't happen.  It needs to care a lot to handle these
+     cases but seems useless.  So now we just delete debug gimple.  */
+  return true;
+ }
+ 
+ /* Rewrite PHI nodes, return true if the PHI was replaced.  */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
+new file mode 100644
+index 000000000..09efe8027
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
+@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+typedef struct network
+{    
+  int x;
+} network_t;
+
+struct arc
+{
+  int flow;
+  network_t* net_add;
+};
+
+const int MAX = 100;
+
+/* let it escape_array, "Type is used in an array [not handled yet]".  */
+network_t* net[2];
+arc_p stop_arcs = NULL;
+
+int
+main ()
+{
+  net[0] = (network_t*) calloc (1, sizeof(network_t));
+  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
+
+  net[0]->x = 100;
+
+  for (unsigned i = 0; i < 3; i++)
+    {        
+      net[0]->x = net[0]->x + 2;
+      stop_arcs->flow = net[0]->x / 2;
+      stop_arcs->flow = stop_arcs->flow + 20;
+      stop_arcs->net_add = net[0];
+      stop_arcs++;
+    }
+
+  if( net[1] != 0 && stop_arcs != 0)
+    {
+      return -1;
+    }
+  return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
+new file mode 100644
+index 000000000..74ea93bbc
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
+@@ -0,0 +1,69 @@
+/* { dg-do compile } */
+/* { dg-do run } */
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#ifdef STACK_SIZE
+#if STACK_SIZE > 16000
+#define N 1000
+#else
+#define N (STACK_SIZE/16)
+#endif
+#else
+#define N 1000
+#endif
+
+int num;
+
+int (*foo)(int d);
+int f (int t);
+
+typedef struct str_t str_t1;
+struct str_t
+{
+   int a;
+   float b;
+   int (*foo)(int d);
+};
+
+int main ()
+{
+   int i, r;
+   r = rand ();
+   num = r > N ? N : r;
+   str_t1 * p1 = calloc (num, sizeof (str_t1));
+   if (p1 == NULL)
+      return 0;
+   for (i = 0; i < num; i++)
+     {
+       p1[i].foo = malloc (1 * sizeof (f));
+       p1[i].foo = f;
+       p1[i].foo (i);
+     }
+
+   for (i = 0; i < num; i++)
+      p1[i].a = 1;
+
+   for (i = 0; i < num; i++)
+      p1[i].b = 2;
+
+   for (i = 0; i < num; i++)
+      if (p1[i].a != 1)
+	 abort ();
+
+   for (i = 0; i < num; i++)
+      if (abs (p1[i].b - 2) > 0.0001)
+	 abort ();
+
+   return 0;
+}
+
+int f (int t)
+{
+   if ( t < 0)
+      abort ();
+   return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+index c5a955b00..687f6609f 100644
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+@@ -46,6 +46,8 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf_*.c]] \
+ # -fipa-struct-reorg=3
+ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
+ 	"" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
+gcc-dg-runtest $srcdir/$subdir/struct_reorg-7.c \
+	"" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
+ 
+ # -fipa-struct-reorg=4
+ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
+new file mode 100644
+index 000000000..ec422f76f
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
+@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct a {
+  int b;
+  char c;
+};
+struct {
+  double d;
+  _Bool e;
+} * f;
+struct g {
+  struct a h;
+} i;
+long j;
+void k();
+void l() { k(i); }
+void k(struct a m) {
+  f->e = 0;
+  for (;;)
+    l();
+}
+int main() {
+  for (; j; f = 0) {
+    struct g *n = 0;
+    char o = n->h.c;
+  }
+  l();
+}
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
+new file mode 100644
+index 000000000..3e42aa84a
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct a {
+  int b;
+  double c;
+};
+struct d {
+  struct a e;
+};
+int f;
+int main() {
+  _Bool g;
+  struct d **h = 0;
+  g = *h += f;
+}
+diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
+new file mode 100644
+index 000000000..d434f9fe0
+--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
+@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct foo {
+  long element1;
+  long element2;
+};
+
+struct goo {
+  struct foo element_foo;
+};
+
+struct goo g1;
+
+void func () {
+  struct foo (*local)[] = 0;
+  long idx;
+  (g1).element_foo = (*local)[idx];
+}
+
+struct foo g2;
+int main () {
+  func ();
+  g2 = g1.element_foo;
+  return 0;
+}
+-- 
+2.33.0
+
--- a/0056-Fix-bug-that-verifying-gimple-failed-when-reorg-leve.patch
+++ b/0056-Fix-bug-that-verifying-gimple-failed-when-reorg-leve.patch
@ -0,0 +1,27 @@
+From fa6f80044dcebd28506e871e6e5d25e2dfd7e105 Mon Sep 17 00:00:00 2001
+From: tiancheng-bao <baotiancheng1@huawei.com>
+Date: Fri, 12 Apr 2024 15:09:28 +0800
+Subject: [PATCH 01/32] Fix bug that verifying gimple failed when reorg-level >
+ 5
+
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index f03d1d875..e08577c0c 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -7461,6 +7461,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+ 	    continue;
+ 	  tree lhs_expr = newlhs[i] ? newlhs[i] : lhs;
+ 	  tree rhs_expr = newrhs[i] ? newrhs[i] : rhs;
+	  if (!useless_type_conversion_p (TREE_TYPE (lhs_expr),
+					  TREE_TYPE (rhs_expr)))
+	    rhs_expr = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (lhs_expr), rhs_expr);  
+ 	  gimple *newstmt = gimple_build_assign (lhs_expr, rhs_expr);
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    {
+-- 
+2.28.0.windows.1
+
--- a/0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
+++ b/0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
@ -0,0 +1,90 @@
+From 13e82fccba781b29e55a6e1934986514019b728d Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao <zhaozhenyu17@huawei.com>
+Date: Sun, 24 Mar 2024 20:42:27 +0800
+Subject: [PATCH 02/32] [AutoFdo] Fix memory leaks in autofdo
+
+---
+ gcc/final.cc | 22 ++++++++++++++--------
+ 1 file changed, 14 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/final.cc b/gcc/final.cc
+index d4c4fa08f..af4e529bb 100644
+--- a/gcc/final.cc
+++ b/gcc/final.cc
+@@ -4402,12 +4402,15 @@ get_fdo_count_quality (profile_count count)
+   return profile_quality[count.quality ()];
+ }
+ 
+-static const char *
+/* If the function is not public, return the function_name/file_name for
+   disambiguation of local symbols since there could be identical function
+   names coming from identical file names.  The caller needs to free memory.  */
+static char *
+ alias_local_functions (const char *fnname)
+ {
+   if (TREE_PUBLIC (cfun->decl))
+     {
+-      return fnname;
+      return concat (fnname, NULL);
+     }
+   return concat (fnname, "/", lbasename (dump_base_name), NULL);
+ }
+@@ -4457,12 +4460,13 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+ 
+ 	  if (callee)
+ 	    {
+	      char *func_name =
+		      alias_local_functions (get_fnname_from_decl (callee));
+ 	      fprintf (asm_out_file, "\t.string \"%x\"\n",
+ 		       INSN_ADDRESSES (INSN_UID (insn)));
+ 
+ 	      fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+-		       ASM_FDO_CALLEE_FLAG,
+-                       alias_local_functions (get_fnname_from_decl (callee)));
+		       ASM_FDO_CALLEE_FLAG, func_name);
+ 
+               fprintf (asm_out_file,
+                        "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
+@@ -4472,9 +4476,9 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
+                 {
+                   fprintf (dump_file, "call: %x --> %s \n",
+                            INSN_ADDRESSES (INSN_UID (insn)),
+-                           alias_local_functions
+-                           (get_fnname_from_decl (callee)));
+			   func_name);
+                 }
+	      free (func_name);
+             }
+         }
+      } 
+@@ -4547,8 +4551,9 @@ dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
+ static void 
+ dump_function_info_to_asm (const char *fnname)
+ {
+  char *func_name = alias_local_functions (fnname);
+   fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+-           ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
+	   ASM_FDO_CALLER_FLAG, func_name);
+   fprintf (asm_out_file, "\t.string \"%s%d\"\n",
+            ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
+   fprintf (asm_out_file, "\t.string \"%s%s\"\n",
+@@ -4557,7 +4562,7 @@ dump_function_info_to_asm (const char *fnname)
+   if (dump_file)
+     {
+       fprintf (dump_file, "\n FUNC_NAME: %s\n",
+-               alias_local_functions (fnname));
+	       func_name);
+       fprintf (dump_file, " file: %s\n",
+                dump_base_name);
+       fprintf (dump_file, "profile_status: %s\n",
+@@ -4567,6 +4572,7 @@ dump_function_info_to_asm (const char *fnname)
+       fprintf (dump_file, " function_bind: %s\n",
+                simple_get_function_bind ());
+     }
+  free (func_name);
+ }
+ 
+ /* Dump function profile into form AutoFDO or PGO to asm.    */
+-- 
+2.28.0.windows.1
+
--- a/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
+++ b/0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
@ -0,0 +1,321 @@
+From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
+From: "Vladimir N. Makarov" <vmakarov@redhat.com>
+Date: Thu, 26 Oct 2023 09:50:40 -0400
+Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences
+
+RISCV target developers reported that pseudos with equivalence used in
+a loop can be spilled.  Simple changes of heuristics of cost
+calculation of pseudos with equivalence or even ignoring equivalences
+resulted in numerous testsuite failures on different targets or worse
+spec2017 performance.  This patch implements more sophisticated cost
+calculations of pseudos with equivalences.  The patch does not change
+RA behaviour for targets still using the old reload pass instead of
+LRA.  The patch solves the reported problem and improves x86-64
+specint2017 a bit (specfp2017 performance stays the same).  The patch
+takes into account how the equivalence will be used: will it be
+integrated into the user insns or require an input reload insn.  It
+requires additional pass over insns.  To compensate RA slow down, the
+patch removes a pass over insns in the reload pass used by IRA before.
+This also decouples IRA from reload more and will help to remove the
+reload pass in the future if it ever happens.
+
+gcc/ChangeLog:
+
+	* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
+	LRA is used.
+	* ira-costs.cc: Include regset.h.
+	(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
+	New functions.
+	(find_costs_and_classes): Call calculate_equiv_gains and redefine
+	mem_cost of pseudos with equivs when LRA is used.
+	* var-tracking.cc: Include ira.h and lra.h.
+	(vt_initialize): Use lra_eliminate_regs when LRA is used.
+---
+ gcc/dwarf2out.cc    |   4 +-
+ gcc/ira-costs.cc    | 169 ++++++++++++++++++++++++++++++++++++++++++--
+ gcc/var-tracking.cc |  14 +++-
+ 3 files changed, 179 insertions(+), 8 deletions(-)
+
+diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
+index 0a5c081d8..f0f6f4fd4 100644
+--- a/gcc/dwarf2out.cc
+++ b/gcc/dwarf2out.cc
+@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
+      argument pointer and soft frame pointer rtx's.
+      Use DW_OP_fbreg offset DW_OP_stack_value in this case.  */
+   if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
+-      && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
+      && (ira_use_lra_p
+	  ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
+	  : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
+     {
+       dw_loc_descr_ref result = NULL;
+ 
+diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
+index 642fda529..c79311783 100644
+--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
+@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tm_p.h"
+ #include "insn-config.h"
+ #include "regs.h"
+#include "regset.h"
+ #include "ira.h"
+ #include "ira-int.h"
+ #include "addresses.h"
+@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
+     process_bb_for_costs (bb);
+ }
+ 
+/* Check that reg REGNO can be changed by TO in INSN.  Return true in case the
+   result insn would be valid one.  */
+static bool
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
+{
+  validate_replace_src_group (regno_reg_rtx[regno], to, insn);
+  bool res = verify_changes (0);
+  cancel_changes (0);
+  return res;
+}
+
+/* Return true if X contains a pseudo with equivalence.  In this case also
+   return the pseudo through parameter REG.  If the pseudo is a part of subreg,
+   return the subreg through parameter SUBREG.  */
+
+static bool
+get_equiv_regno (rtx x, int &regno, rtx &subreg)
+{
+  subreg = NULL_RTX;
+  if (GET_CODE (x) == SUBREG)
+    {
+      subreg = x;
+      x = SUBREG_REG (x);
+    }
+  if (REG_P (x)
+      && (ira_reg_equiv[REGNO (x)].memory != NULL
+	  || ira_reg_equiv[REGNO (x)].constant != NULL))
+    {
+      regno = REGNO (x);
+      return true;
+    }
+  RTX_CODE code = GET_CODE (x);
+  const char *fmt = GET_RTX_FORMAT (code);
+
+  for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    if (fmt[i] == 'e')
+      {
+	if (get_equiv_regno (XEXP (x, i), regno, subreg))
+	  return true;
+      }
+    else if (fmt[i] == 'E')
+      {
+	for (int j = 0; j < XVECLEN (x, i); j++)
+	  if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
+	    return true;
+      }
+  return false;
+}
+
+/* A pass through the current function insns.  Calculate costs of using
+   equivalences for pseudos and store them in regno_equiv_gains.  */
+
+static void
+calculate_equiv_gains (void)
+{
+  basic_block bb;
+  int regno, freq, cost;
+  rtx subreg;
+  rtx_insn *insn;
+  machine_mode mode;
+  enum reg_class rclass;
+  bitmap_head equiv_pseudos;
+
+  ira_assert (allocno_p);
+  bitmap_initialize (&equiv_pseudos, &reg_obstack);
+  for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
+    if (ira_reg_equiv[regno].init_insns != NULL
+	&& (ira_reg_equiv[regno].memory != NULL
+	    || (ira_reg_equiv[regno].constant != NULL
+		/* Ignore complicated constants which probably will be placed
+		   in memory:  */
+		&& GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE
+		&& GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR
+		&& GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF)))
+      {
+	rtx_insn_list *x;
+	for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
+	  {
+	    insn = x->insn ();
+	    rtx set = single_set (insn);
+
+	    if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno])
+	      break;
+	    bb = BLOCK_FOR_INSN (insn);
+	    ira_curr_regno_allocno_map
+	      = ira_bb_nodes[bb->index].parent->regno_allocno_map;
+	    mode = PSEUDO_REGNO_MODE (regno);
+	    rclass = pref[COST_INDEX (regno)];
+	    ira_init_register_move_cost_if_necessary (mode);
+	    if (ira_reg_equiv[regno].memory != NULL)
+	      cost = ira_memory_move_cost[mode][rclass][1];
+	    else
+	      cost = ira_register_move_cost[mode][rclass][rclass];
+	    freq = REG_FREQ_FROM_BB (bb);
+	    regno_equiv_gains[regno] += cost * freq;
+	  }
+	if (x != NULL)
+	  /* We found complicated equiv or reverse equiv mem=reg.  Ignore
+	     them.  */
+	  regno_equiv_gains[regno] = 0;
+	else
+	  bitmap_set_bit (&equiv_pseudos, regno);
+      }
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      freq = REG_FREQ_FROM_BB (bb);
+      ira_curr_regno_allocno_map
+	= ira_bb_nodes[bb->index].parent->regno_allocno_map;
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
+	      || !bitmap_bit_p (&equiv_pseudos, regno))
+	    continue;
+	  rtx subst = ira_reg_equiv[regno].memory;
+
+	  if (subst == NULL)
+	    subst = ira_reg_equiv[regno].constant;
+	  ira_assert (subst != NULL);
+	  mode = PSEUDO_REGNO_MODE (regno);
+	  ira_init_register_move_cost_if_necessary (mode);
+	  bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
+
+	  rclass = pref[COST_INDEX (regno)];
+	  if (MEM_P (subst)
+	      /* If it is a change of constant into double for example, the
+		 result constant probably will be placed in memory.  */
+	      || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
+	    cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
+	  else if (consumed_p)
+	    continue;
+	  else
+	    cost = ira_register_move_cost[mode][rclass][rclass];
+	  regno_equiv_gains[regno] -= cost * freq;
+	}
+    }
+  bitmap_clear (&equiv_pseudos);
+}
+
+ /* Find costs of register classes and memory for allocnos or pseudos
+    and their best costs.  Set up preferred, alternative and allocno
+    classes for pseudos.  */
+@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file)
+       if (pass == 0)
+ 	pref = pref_buffer;
+ 
+      if (ira_use_lra_p && allocno_p && pass == 1)
+	/* It is a pass through all insns.  So do it once and only for RA (not
+	   for insn scheduler) when we already found preferable pseudo register
+	   classes on the previous pass.  */
+	calculate_equiv_gains ();
+
+       /* Now for each allocno look at how desirable each class is and
+ 	 find which class is preferred.  */
+       for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
+@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file)
+ 	    }
+ 	  if (i >= first_moveable_pseudo && i < last_moveable_pseudo)
+ 	    i_mem_cost = 0;
+	  else if (ira_use_lra_p)
+	    {
+	      if (equiv_savings > 0)
+		{
+		  i_mem_cost = 0;
+		  if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
+		    fprintf (ira_dump_file,
+			     "   Use MEM for r%d as the equiv savings is %d\n",
+			     i, equiv_savings);
+		}
+	    }
+ 	  else if (equiv_savings < 0)
+ 	    i_mem_cost = -equiv_savings;
+ 	  else if (equiv_savings > 0)
+@@ -2378,7 +2535,10 @@ ira_costs (void)
+   total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
+ 						       * ira_allocnos_num);
+   initiate_regno_cost_classes ();
+-  calculate_elim_costs_all_insns ();
+  if (!ira_use_lra_p)
+    /* Process equivs in reload to update costs through hook
+       ira_adjust_equiv_reg_cost.  */
+    calculate_elim_costs_all_insns ();
+   find_costs_and_classes (ira_dump_file);
+   setup_allocno_class_and_costs ();
+   finish_regno_cost_classes ();
+@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void)
+     }
+ }
+ 
+-/* Add COST to the estimated gain for eliminating REGNO with its
+-   equivalence.  If COST is zero, record that no such elimination is
+-   possible.  */
+/* A hook from the reload pass.  Add COST to the estimated gain for eliminating
+   REGNO with its equivalence.  If COST is zero, record that no such
+   elimination is possible.  */
+ 
+ void
+ ira_adjust_equiv_reg_cost (unsigned regno, int cost)
+ {
+  ira_assert (!ira_use_lra_p);
+   if (cost == 0)
+     regno_equiv_gains[regno] = 0;
+   else
+diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc
+index 7c3ad0a55..b10c8c1eb 100644
+--- a/gcc/var-tracking.cc
+++ b/gcc/var-tracking.cc
+@@ -107,6 +107,8 @@
+ #include "cfgrtl.h"
+ #include "cfganal.h"
+ #include "reload.h"
+#include "ira.h"
+#include "lra.h"
+ #include "calls.h"
+ #include "tree-dfa.h"
+ #include "tree-ssa.h"
+@@ -10133,7 +10135,9 @@ vt_initialize (void)
+ #else
+       reg = arg_pointer_rtx;
+ #endif
+-      elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+      elim = (ira_use_lra_p
+	      ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+	      : eliminate_regs (reg, VOIDmode, NULL_RTX));
+       if (elim != reg)
+ 	{
+ 	  if (GET_CODE (elim) == PLUS)
+@@ -10153,7 +10157,9 @@ vt_initialize (void)
+       reg = arg_pointer_rtx;
+       fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl);
+ #endif
+-      elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+      elim = (ira_use_lra_p
+	      ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+	      : eliminate_regs (reg, VOIDmode, NULL_RTX));
+       if (elim != reg)
+ 	{
+ 	  if (GET_CODE (elim) == PLUS)
+@@ -10185,7 +10191,9 @@ vt_initialize (void)
+ #else
+       reg = arg_pointer_rtx;
+ #endif
+-      elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
+      elim = (ira_use_lra_p
+	      ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
+	      : eliminate_regs (reg, VOIDmode, NULL_RTX));
+       if (elim != reg)
+ 	{
+ 	  if (GET_CODE (elim) == PLUS)
+-- 
+2.28.0.windows.1
+
--- a/0087-Add-cost-calculation-for-reg-equivalence-invariants.patch
+++ b/0087-Add-cost-calculation-for-reg-equivalence-invariants.patch
@ -0,0 +1,49 @@
+From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001
+From: "Vladimir N. Makarov" <vmakarov@redhat.com>
+Date: Fri, 27 Oct 2023 08:28:24 -0400
+Subject: [PATCH 32/32] Add cost calculation for reg equivalence invariants
+
+My recent patch improving cost calculation for pseudos with equivalence
+resulted in failure of gcc.target/arm/eliminate.c on aarch64.  This patch
+fixes this failure.
+
+gcc/ChangeLog:
+
+	* ira-costs.cc: (get_equiv_regno, calculate_equiv_gains):
+	Process reg equivalence invariants.
+---
+ gcc/ira-costs.cc | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
+index c79311783..d33104a30 100644
+--- a/gcc/ira-costs.cc
+++ b/gcc/ira-costs.cc
+@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int &regno, rtx &subreg)
+     }
+   if (REG_P (x)
+       && (ira_reg_equiv[REGNO (x)].memory != NULL
+	  || ira_reg_equiv[REGNO (x)].invariant != NULL
+ 	  || ira_reg_equiv[REGNO (x)].constant != NULL))
+     {
+       regno = REGNO (x);
+@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void)
+   for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
+     if (ira_reg_equiv[regno].init_insns != NULL
+ 	&& (ira_reg_equiv[regno].memory != NULL
+	    || ira_reg_equiv[regno].invariant != NULL
+ 	    || (ira_reg_equiv[regno].constant != NULL
+ 		/* Ignore complicated constants which probably will be placed
+ 		   in memory:  */
+@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void)
+ 
+ 	  if (subst == NULL)
+ 	    subst = ira_reg_equiv[regno].constant;
+	  if (subst == NULL)
+	    subst = ira_reg_equiv[regno].invariant;
+ 	  ira_assert (subst != NULL);
+ 	  mode = PSEUDO_REGNO_MODE (regno);
+ 	  ira_init_register_move_cost_if_necessary (mode);
+-- 
+2.28.0.windows.1
+
--- a/0088-BUGFIX-Fix-the-configure-file-of-BOLT.patch
+++ b/0088-BUGFIX-Fix-the-configure-file-of-BOLT.patch
--- a/0089-StructReorderFields-Fix-gimple-call-not-rewritten.patch
+++ b/0089-StructReorderFields-Fix-gimple-call-not-rewritten.patch
@ -0,0 +1,48 @@
+From 302b7e15d6308c29c215db4c9901342e1106381a Mon Sep 17 00:00:00 2001
+From: huang-xiaoquan <huangxiaoquan1@huawei.com>
+Date: Mon, 29 Apr 2024 11:00:12 +0800
+Subject: [PATCH] [StructReorderFields] Fix gimple call not rewritten due to
+ empty function node
+
+Add parameter type escape for empty functions or inline functions.
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index e08577c0c..2257d3528 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -4366,6 +4366,17 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
+ 
+       argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE;
+     }
+
+  /* Types escapes via a argument at empty or inlined function.  */
+  cgraph_node *callee = node->get_edge (stmt)->callee;
+  if (!gimple_call_builtin_p (stmt, BUILT_IN_FREE)
+      && gimple_call_num_args (stmt)
+      && callee && (!callee->has_gimple_body_p () || callee->inlined_to))
+    {
+      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
+	mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
+			      escape_var_arg_function);
+    }
+ }
+ 
+ void
+@@ -8068,6 +8079,11 @@ ipa_struct_reorg::rewrite_functions (void)
+ 	      if (dump_file && (dump_flags & TDF_DETAILS))
+ 		{
+ 		  fprintf (dump_file, "\nNo rewrite:\n");
+		  if (current_function_decl == NULL)
+		    {
+		      fprintf (dump_file, "\ncurrent_function_decl == NULL\n");
+		      continue;
+		    }
+ 		  if (current_function_decl)
+ 		    dump_function_to_file (current_function_decl, dump_file,
+ 					   dump_flags | TDF_VOPS);
+-- 
+2.33.0
+
--- a/0090-double-sized-mul-testsuite-Add-march-armv8.2-a-for-d.patch
+++ b/0090-double-sized-mul-testsuite-Add-march-armv8.2-a-for-d.patch
@ -0,0 +1,40 @@
+From 01517aa2397f854ffa96128a0fb23dd5542be709 Mon Sep 17 00:00:00 2001
+From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
+Date: Tue, 30 Apr 2024 18:43:32 +0800
+Subject: [PATCH 1/4] [double-sized-mul][testsuite] Add march armv8.2-a for dg
+ tests
+
+---
+ gcc/testsuite/gcc.dg/double_sized_mul-1.c | 2 +-
+ gcc/testsuite/gcc.dg/double_sized_mul-2.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+index 4d475cc8a..d32a25223 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+@@ -1,7 +1,7 @@
+ /* { dg-do compile } */
+ /* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
+    proper overflow detection in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+index cc6e5af25..ff35902b7 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+@@ -1,7 +1,7 @@
+ /* { dg-do compile } */
+ /* fif-conversion-gimple is required for proper overflow detection
+    in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+-- 
+2.33.0
+
--- a/0091-IPA-Bugfix-Fix-fails-in-IPA-prefetch-src-openEuler-g.patch
+++ b/0091-IPA-Bugfix-Fix-fails-in-IPA-prefetch-src-openEuler-g.patch
@ -0,0 +1,34 @@
+From b84a896e2df214b08d6519a097cc410d3e582add Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Wed, 8 May 2024 21:28:32 +0800
+Subject: [PATCH 2/4] [IPA][Bugfix] Fix fails in IPA prefetch
+ (src-openEuler/gcc: I9J6N6)
+
+---
+ gcc/ipa-prefetch.cc | 10 ++++++++--
+ 1 file changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 1ceb5137f..94290ea9c 100644
+--- a/gcc/ipa-prefetch.cc
+++ b/gcc/ipa-prefetch.cc
+@@ -1432,8 +1432,14 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
+ 	  TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old);
+ 	  TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old);
+ 	  TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old);
+-	  /* TODO: maybe support this case.  */
+-	  gcc_assert (MR_DEPENDENCE_CLIQUE (old) == 0);
+	  if (MR_DEPENDENCE_CLIQUE (old) != 0)
+	    {
+	      MR_DEPENDENCE_CLIQUE (*tp) = MR_DEPENDENCE_CLIQUE (old);
+	      MR_DEPENDENCE_BASE (*tp) = MR_DEPENDENCE_BASE (old);
+	      if (dump_file)
+		fprintf (dump_file, "Copy clique=%d base=%d info.\n",
+			 MR_DEPENDENCE_CLIQUE (old), MR_DEPENDENCE_BASE (old));
+	    }
+ 	  /* We cannot propagate the TREE_THIS_NOTRAP flag if we have
+ 	     remapped a parameter as the property might be valid only
+ 	     for the parameter itself.  */
+-- 
+2.33.0
+
--- a/0092-AES-Bugfix-Change-set_of-to-reg_set_p-and-add-check-.patch
+++ b/0092-AES-Bugfix-Change-set_of-to-reg_set_p-and-add-check-.patch
@ -0,0 +1,29 @@
+From acb6bbf0612aead00a879892ba8ed816c90fe788 Mon Sep 17 00:00:00 2001
+From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
+Date: Wed, 8 May 2024 19:24:27 +0800
+Subject: [PATCH 3/4] [AES][Bugfix] Change set_of to reg_set_p, and add check
+ for global_regs fix for I9JDHE
+
+---
+ gcc/rtl-matcher.h | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/rtl-matcher.h b/gcc/rtl-matcher.h
+index 6aed8d98d..5310f6266 100644
+--- a/gcc/rtl-matcher.h
+++ b/gcc/rtl-matcher.h
+@@ -56,8 +56,9 @@ check_def_chain_ref (df_ref ref, rtx reg)
+   if (!ref || !DF_REF_INSN_INFO (ref))
+     return false;
+ 
+-  return !global_regs[REGNO (reg)]
+-    || set_of (reg, DF_REF_INSN (ref));
+  return !(REGNO (reg) < FIRST_PSEUDO_REGISTER
+	   && global_regs[REGNO (reg)])
+    || reg_set_p (reg, DF_REF_INSN (ref));
+ }
+ 
+ /* Get the single def instruction of the reg being used in the insn.  */
+-- 
+2.33.0
+
--- a/0093-fix-bugs-within-pointer-compression-and-DFE.patch
+++ b/0093-fix-bugs-within-pointer-compression-and-DFE.patch
@ -0,0 +1,26 @@
+From 48724ee73cd58b67d59962ee4d56ac85db797e61 Mon Sep 17 00:00:00 2001
+From: tiancheng-bao <baotiancheng1@huawei.com>
+Date: Fri, 10 May 2024 17:52:27 +0800
+Subject: [PATCH 4/4] fix bugs within pointer compression and DFE
+
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+index 2257d3528..1a169c635 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
+@@ -7472,9 +7472,6 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+ 	    continue;
+ 	  tree lhs_expr = newlhs[i] ? newlhs[i] : lhs;
+ 	  tree rhs_expr = newrhs[i] ? newrhs[i] : rhs;
+-	  if (!useless_type_conversion_p (TREE_TYPE (lhs_expr),
+-					  TREE_TYPE (rhs_expr)))
+-	    rhs_expr = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (lhs_expr), rhs_expr);  
+ 	  gimple *newstmt = gimple_build_assign (lhs_expr, rhs_expr);
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    {
+-- 
+2.33.0
+
--- a/0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
+++ b/0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
@ -0,0 +1,28 @@
+From 4861c3db991e947060de54a4d20c1a13747a6024 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Wed, 15 May 2024 14:41:45 +0800
+Subject: [PATCH] [BUGFIX] AutoBOLT function miss bind type
+
+---
+ gcc/final.cc | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/final.cc b/gcc/final.cc
+index af4e529bb..c440846f7 100644
+--- a/gcc/final.cc
+++ b/gcc/final.cc
+@@ -4272,9 +4272,9 @@ leaf_renumber_regs_insn (rtx in_rtx)
+ 
+ #define ASM_FDO_CALLER_FLAG ".fdo.caller "
+ #define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
+-#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
+ 
+-#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
+ 
+ /* Return the relative offset address of the start instruction of BB,
+    return -1 if it is empty instruction.    */
+-- 
+2.33.0
+
--- a/0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
+++ b/0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
--- a/0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch
+++ b/0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch
@ -0,0 +1,45 @@
+From 06e86b362f74ba0706fb5d8377f78d24b658c300 Mon Sep 17 00:00:00 2001
+From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
+Date: Sat, 18 May 2024 12:22:23 +0800
+Subject: [PATCH] [Bugfix] Autofdo use PMU sampling set num eauals den
+
+---
+ gcc/final.cc    | 2 +-
+ gcc/tree-cfg.cc | 8 ++++++++
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/final.cc b/gcc/final.cc
+index f66c9d155..e4bfceabc 100644
+--- a/gcc/final.cc
+++ b/gcc/final.cc
+@@ -4604,7 +4604,7 @@ dump_profile_to_elf_sections ()
+   /* Return if no feedback data.    */
+   if (!flag_profile_use && !flag_auto_profile)
+     {
+-      error ("-fauto-bolt should use with -profile-use or -fauto-profile");
+      error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
+       return;
+     }
+   
+diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
+index 05fc45147..48b52f785 100644
+--- a/gcc/tree-cfg.cc
+++ b/gcc/tree-cfg.cc
+@@ -9741,6 +9741,14 @@ execute_fixup_cfg (void)
+   /* Same scaling is also done by ipa_merge_profiles.  */
+   profile_count num = node->count;
+   profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+  /* When autofdo uses PMU as the sampling unit, the number of
+     node can not be obtained directly, sometimes it will be zero,
+     but the execution number for function should at least be 1. We
+     set num be den here to make sure the num will not decrease.  */
+  if (num == profile_count::zero ().afdo () && den.quality () == profile_quality::AFDO)
+    {
+      num = den;
+    }
+   bool scale = num.initialized_p () && !(num == den);
+   auto_bitmap dce_ssa_names;
+ 
+-- 
+2.33.0
+
--- a/Libvtv-Add-loongarch-support.patch
+++ b/Libvtv-Add-loongarch-support.patch
@ -0,0 +1,59 @@
+From 62ea18c632200edbbf46b4e957bc4d997f1c66f0 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Tue, 27 Sep 2022 15:28:43 +0800
+Subject: [PATCH 024/124] Libvtv: Add loongarch support.
+
+The loongarch64 specification permits page sizes of 4KiB, 16KiB and 64KiB,
+but only 16KiB pages are supported for now.
+
+Co-Authored-By: qijingwen <qijingwen@loongson.cn>
+
+include/ChangeLog:
+
+	* vtv-change-permission.h (defined): Determines whether the macro
+	__loongarch_lp64 is defined
+	(VTV_PAGE_SIZE): Set VTV_PAGE_SIZE to 16KiB for loongarch64.
+
+libvtv/ChangeLog:
+
+	* configure.tgt: Add loongarch support.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ include/vtv-change-permission.h | 4 ++++
+ libvtv/configure.tgt            | 3 +++
+ 2 files changed, 7 insertions(+)
+
+diff --git a/include/vtv-change-permission.h b/include/vtv-change-permission.h
+index 70bdad92b..e7b9294a0 100644
+--- a/include/vtv-change-permission.h
+++ b/include/vtv-change-permission.h
+@@ -48,6 +48,10 @@ extern void __VLTChangePermission (int);
+ #else 
+ #if defined(__sun__) && defined(__svr4__) && defined(__sparc__)
+ #define VTV_PAGE_SIZE 8192
+#elif defined(__loongarch_lp64)
+/* The page size is configurable by the kernel to be 4, 16 or 64 KiB.
+   For now, only the default page size of 16KiB is supported.  */
+#define VTV_PAGE_SIZE 16384
+ #else
+ #define VTV_PAGE_SIZE 4096
+ #endif
+diff --git a/libvtv/configure.tgt b/libvtv/configure.tgt
+index aa2a3f675..6cdd1e97a 100644
+--- a/libvtv/configure.tgt
+++ b/libvtv/configure.tgt
+@@ -50,6 +50,9 @@ case "${target}" in
+ 	;;
+   x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
+ 	;;
+  loongarch*-*-linux*)
+	VTV_SUPPORTED=yes
+	;;
+   *)
+ 	;;
+ esac
+-- 
+2.33.0
+
--- a/LoongArch-Add-LA664-support.patch
+++ b/LoongArch-Add-LA664-support.patch
@ -0,0 +1,332 @@
+From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 16 Nov 2023 20:43:53 +0800
+Subject: [PATCH] LoongArch: Add LA664 support.
+
+Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1.
+Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664.
+
+gcc/ChangeLog:
+
+	* config.gcc: Support LA664.
+	* config/loongarch/genopts/loongarch-strings: Likewise.
+	* config/loongarch/genopts/loongarch.opt.in: Likewise.
+	* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise.
+	* config/loongarch/loongarch-def.c: Likewise.
+	* config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise.
+	(ISA_BASE_LA64V110): Define macro.
+	(N_ARCH_TYPES): Update value.
+	(N_TUNE_TYPES): Update value.
+	(CPU_LA664): New macro.
+	* config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise.
+	(isa_base_compat_p): Likewise.
+	* config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled
+	when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110.
+	(TARGET_uARCH_LA664): Define macro.
+	* config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise.
+	* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width):
+	Add LA664 support.
+	* config/loongarch/loongarch.opt: Regenerate.
+
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config.gcc                                | 10 ++++-----
+ .../loongarch/genopts/loongarch-strings       |  1 +
+ gcc/config/loongarch/genopts/loongarch.opt.in |  3 +++
+ gcc/config/loongarch/loongarch-cpu.cc         |  4 ++++
+ gcc/config/loongarch/loongarch-def.c          | 21 +++++++++++++++++++
+ gcc/config/loongarch/loongarch-def.h          |  8 ++++---
+ gcc/config/loongarch/loongarch-opts.cc        |  8 +++----
+ gcc/config/loongarch/loongarch-opts.h         |  4 +++-
+ gcc/config/loongarch/loongarch-str.h          |  1 +
+ gcc/config/loongarch/loongarch.cc             |  1 +
+ gcc/config/loongarch/loongarch.opt            |  3 +++
+ 11 files changed, 51 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 6d51bd93f3f..b88591b6fd8 100644
+--- a/gcc/config.gcc
+++ b/gcc/config.gcc
+@@ -5039,7 +5039,7 @@ case "${target}" in
+ 
+ 		# Perform initial sanity checks on --with-* options.
+ 		case ${with_arch} in
+-		"" | abi-default | loongarch64 | la464) ;; # OK, append here.
+		"" | abi-default | loongarch64 | la[46]64) ;; # OK, append here.
+ 		native)
+ 			if test x${host} != x${target}; then
+ 				echo "--with-arch=native is illegal for cross-compiler." 1>&2
+@@ -5088,7 +5088,7 @@ case "${target}" in
+ 		case ${abi_base}/${abi_ext} in
+ 		lp64*/base)
+ 			# architectures that support lp64* ABI
+-			arch_pattern="native|abi-default|loongarch64|la464"
+			arch_pattern="native|abi-default|loongarch64|la[46]64"
+ 			# default architecture for lp64* ABI
+ 			arch_default="abi-default"
+ 			;;
+@@ -5163,7 +5163,7 @@ case "${target}" in
+ 		# Check default with_tune configuration using with_arch.
+ 		case ${with_arch} in
+ 		loongarch64)
+-			tune_pattern="native|abi-default|loongarch64|la464"
+			tune_pattern="native|abi-default|loongarch64|la[46]64"
+ 			;;
+ 		*)
+ 			# By default, $with_tune == $with_arch
+@@ -5219,7 +5219,7 @@ case "${target}" in
+ 					# Fixed: use the default gcc configuration for all multilib
+ 					# builds by default.
+ 					with_multilib_default="" ;;
+-				arch,native|arch,loongarch64|arch,la464) # OK, append here.
+				arch,native|arch,loongarch64|arch,la[46]64) # OK, append here.
+ 					with_multilib_default="/march=${component}" ;;
+ 				arch,*)
+ 					with_multilib_default="/march=abi-default"
+@@ -5307,7 +5307,7 @@ case "${target}" in
+ 				if test x${parse_state} = x"arch"; then
+ 					# -march option
+ 					case ${component} in
+-					native | abi-default | loongarch64 | la464) # OK, append here.
+					native | abi-default | loongarch64 | la[46]64) # OK, append here.
+ 						# Append -march spec for each multilib variant.
+ 						loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
+ 						parse_state="opts"
+diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
+index 8e412f7536e..7bc4824007e 100644
+--- a/gcc/config/loongarch/genopts/loongarch-strings
+++ b/gcc/config/loongarch/genopts/loongarch-strings
+@@ -26,6 +26,7 @@ STR_CPU_NATIVE	      native
+ STR_CPU_ABI_DEFAULT   abi-default
+ STR_CPU_LOONGARCH64   loongarch64
+ STR_CPU_LA464	      la464
+STR_CPU_LA664	      la664
+ 
+ # Base architecture
+ STR_ISA_BASE_LA64V100 la64
+diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
+index 158701d327a..00b4733d75b 100644
+--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
+@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
+ EnumValue
+ Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
+ 
+EnumValue
+Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
+
+ m@@OPTSTR_ARCH@@=
+ Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
+ -m@@OPTSTR_ARCH@@=PROCESSOR	Generate code for the given PROCESSOR ISA.
+diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
+index 7a2866f60f9..f3a13414143 100644
+--- a/gcc/config/loongarch/loongarch-cpu.cc
+++ b/gcc/config/loongarch/loongarch-cpu.cc
+@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt)
+       native_cpu_type = CPU_LA464;
+       break;
+ 
+    case 0x0014d000:   /* LA664 */
+      native_cpu_type = CPU_LA664;
+      break;
+
+     default:
+       /* Unknown PRID.  */
+       if (tune_native_p)
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index 430ef8b2d95..067629141b6 100644
+--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
+@@ -28,6 +28,7 @@ loongarch_cpu_strings[N_TUNE_TYPES] = {
+   [CPU_ABI_DEFAULT]	  = STR_CPU_ABI_DEFAULT,
+   [CPU_LOONGARCH64]	  = STR_CPU_LOONGARCH64,
+   [CPU_LA464]		  = STR_CPU_LA464,
+  [CPU_LA664]		  = STR_CPU_LA664,
+ };
+ 
+ struct loongarch_isa
+@@ -42,6 +43,11 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = {
+       .fpu = ISA_EXT_FPU64,
+       .simd = ISA_EXT_SIMD_LASX,
+   },
+  [CPU_LA664] = {
+      .base = ISA_BASE_LA64V110,
+      .fpu = ISA_EXT_FPU64,
+      .simd = ISA_EXT_SIMD_LASX,
+  },
+ };
+ 
+ struct loongarch_cache
+@@ -58,6 +64,12 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
+       .l2d_size = 256,
+       .simultaneous_prefetches = 4,
+   },
+  [CPU_LA664] = {
+      .l1d_line_size = 64,
+      .l1d_size = 64,
+      .l2d_size = 256,
+      .simultaneous_prefetches = 4,
+  },
+ };
+ 
+ struct loongarch_align
+@@ -70,6 +82,10 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
+     .function = "32",
+     .label = "16",
+   },
+  [CPU_LA664] = {
+    .function = "32",
+    .label = "16",
+  },
+ };
+ 
+ 
+@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
+   [CPU_LA464] = {
+       DEFAULT_COSTS
+   },
+  [CPU_LA664] = {
+      DEFAULT_COSTS
+  },
+ };
+ 
+ /* RTX costs to use when optimizing for size.  */
+@@ -127,6 +146,7 @@ loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
+   [CPU_NATIVE]	      = 4,
+   [CPU_LOONGARCH64]   = 4,
+   [CPU_LA464]	      = 4,
+  [CPU_LA664]	      = 6,
+ };
+ 
+ int
+@@ -134,6 +154,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
+   [CPU_NATIVE]	      = 4,
+   [CPU_LOONGARCH64]   = 4,
+   [CPU_LA464]	      = 4,
+  [CPU_LA664]	      = 6,
+ };
+ 
+ /* Wiring string definitions from loongarch-str.h to global arrays
+diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
+index 6e2a6987910..db497f3ffe2 100644
+--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
+@@ -55,7 +55,8 @@ extern "C" {
+ /* enum isa_base */
+ extern const char* loongarch_isa_base_strings[];
+ #define ISA_BASE_LA64V100     0
+-#define N_ISA_BASE_TYPES      1
+#define ISA_BASE_LA64V110     1
+#define N_ISA_BASE_TYPES      2
+ 
+ /* enum isa_ext_* */
+ extern const char* loongarch_isa_ext_strings[];
+@@ -141,8 +142,9 @@ struct loongarch_target
+ #define CPU_ABI_DEFAULT   1
+ #define CPU_LOONGARCH64	  2
+ #define CPU_LA464	  3
+-#define N_ARCH_TYPES	  4
+-#define N_TUNE_TYPES	  4
+#define CPU_LA664	  4
+#define N_ARCH_TYPES	  5
+#define N_TUNE_TYPES	  5
+ 
+ /* parallel tables.  */
+ extern const char* loongarch_cpu_strings[];
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index e5921189a06..67a59152a01 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -552,17 +552,17 @@ isa_default_abi (const struct loongarch_isa *isa)
+   switch (isa->fpu)
+     {
+       case ISA_EXT_FPU64:
+-	if (isa->base == ISA_BASE_LA64V100)
+	if (isa->base >= ISA_BASE_LA64V100)
+ 	  abi.base = ABI_BASE_LP64D;
+ 	break;
+ 
+       case ISA_EXT_FPU32:
+-	if (isa->base == ISA_BASE_LA64V100)
+	if (isa->base >= ISA_BASE_LA64V100)
+ 	  abi.base = ABI_BASE_LP64F;
+ 	break;
+ 
+       case ISA_EXT_NONE:
+-	if (isa->base == ISA_BASE_LA64V100)
+	if (isa->base >= ISA_BASE_LA64V100)
+ 	  abi.base = ABI_BASE_LP64S;
+ 	break;
+ 
+@@ -582,7 +582,7 @@ isa_base_compat_p (const struct loongarch_isa *set1,
+   switch (set2->base)
+     {
+       case ISA_BASE_LA64V100:
+-	return (set1->base == ISA_BASE_LA64V100);
+	return (set1->base >= ISA_BASE_LA64V100);
+ 
+       default:
+ 	gcc_unreachable ();
+diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
+index 6dd309aad96..0e1b3e528a1 100644
+--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
+@@ -76,7 +76,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ #define TARGET_DOUBLE_FLOAT	  (la_target.isa.fpu == ISA_EXT_FPU64)
+ #define TARGET_DOUBLE_FLOAT_ABI	  (la_target.abi.base == ABI_BASE_LP64D)
+ 
+-#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100)
+#define TARGET_64BIT		  (la_target.isa.base == ISA_BASE_LA64V100 \
+				   || la_target.isa.base == ISA_BASE_LA64V110)
+ #define TARGET_ABI_LP64		  (la_target.abi.base == ABI_BASE_LP64D	\
+ 				   || la_target.abi.base == ABI_BASE_LP64F \
+ 				   || la_target.abi.base == ABI_BASE_LP64S)
+@@ -88,6 +89,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
+ 
+ /* TARGET_ macros for use in *.md template conditionals */
+ #define TARGET_uARCH_LA464	  (la_target.cpu_tune == CPU_LA464)
+#define TARGET_uARCH_LA664	  (la_target.cpu_tune == CPU_LA664)
+ 
+ /* Note: optimize_size may vary across functions,
+    while -m[no]-memcpy imposes a global constraint.  */
+diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
+index 072558c28f1..fc4f41bfc1e 100644
+--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
+@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
+ #define STR_CPU_ABI_DEFAULT "abi-default"
+ #define STR_CPU_LOONGARCH64 "loongarch64"
+ #define STR_CPU_LA464 "la464"
+#define STR_CPU_LA664 "la664"
+ 
+ #define STR_ISA_BASE_LA64V100 "la64"
+ 
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 22ca24a1878..4cd509f11c6 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -10177,6 +10177,7 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
+     {
+     case CPU_LOONGARCH64:
+     case CPU_LA464:
+    case CPU_LA664:
+       /* Vector part.  */
+       if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
+ 	{
+diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
+index a5988411fbb..7f129e53ba5 100644
+--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
+@@ -114,6 +114,9 @@ Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64)
+ EnumValue
+ Enum(cpu_type) String(la464) Value(CPU_LA464)
+ 
+EnumValue
+Enum(cpu_type) String(la664) Value(CPU_LA664)
+
+ march=
+ Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
+ -march=PROCESSOR	Generate code for the given PROCESSOR ISA.
+-- 
+2.33.0
+
--- a/LoongArch-Add-Loongson-ASX-base-instruction-support.patch
+++ b/LoongArch-Add-Loongson-ASX-base-instruction-support.patch
--- a/LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
+++ b/LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
--- a/LoongArch-Add-Loongson-SX-base-instruction-support.patch
+++ b/LoongArch-Add-Loongson-SX-base-instruction-support.patch
--- a/LoongArch-Add-Loongson-SX-directive-builtin-function.patch
+++ b/LoongArch-Add-Loongson-SX-directive-builtin-function.patch
--- a/LoongArch-Add-built-in-functions-description-of-Loon.patch
+++ b/LoongArch-Add-built-in-functions-description-of-Loon.patch
@ -0,0 +1,166 @@
+From 7cfe6e057045ac794afbe9097b1b211c0e1ea723 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 6 Apr 2023 16:02:07 +0800
+Subject: [PATCH 039/124] LoongArch: Add built-in functions description of
+ LoongArch Base instruction set instructions.
+
+gcc/ChangeLog:
+
+	* doc/extend.texi: Add section for LoongArch Base Built-in functions.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 129 insertions(+)
+
+diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
+index 3c101ca89..1d1bac255 100644
+--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
+@@ -14678,6 +14678,7 @@ instructions, but allow the compiler to schedule those calls.
+ * Blackfin Built-in Functions::
+ * BPF Built-in Functions::
+ * FR-V Built-in Functions::
+* LoongArch Base Built-in Functions::
+ * MIPS DSP Built-in Functions::
+ * MIPS Paired-Single Support::
+ * MIPS Loongson Built-in Functions::
+@@ -16128,6 +16129,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x}
+ into the data cache.  The instruction is issued in slot I1@.
+ @end table
+ 
+@node LoongArch Base Built-in Functions
+@subsection LoongArch Base Built-in Functions
+
+These built-in functions are available for LoongArch.
+
+Data Type Description:
+@itemize
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
+@end itemize
+
+The intrinsics provided are listed below:
+@smallexample
+    unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
+    void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
+    void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047)
+    unsigned int __builtin_loongarch_cpucfg (unsigned int)
+    void __builtin_loongarch_asrtle_d (long int, long int)
+    void __builtin_loongarch_asrtgt_d (long int, long int)
+    long int __builtin_loongarch_lddir_d (long int, imm0_31)
+    void __builtin_loongarch_ldpte_d (long int, imm0_31)
+
+    int __builtin_loongarch_crc_w_b_w (char, int)
+    int __builtin_loongarch_crc_w_h_w (short, int)
+    int __builtin_loongarch_crc_w_w_w (int, int)
+    int __builtin_loongarch_crc_w_d_w (long int, int)
+    int __builtin_loongarch_crcc_w_b_w (char, int)
+    int __builtin_loongarch_crcc_w_h_w (short, int)
+    int __builtin_loongarch_crcc_w_w_w (int, int)
+    int __builtin_loongarch_crcc_w_d_w (long int, int)
+
+    unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
+    unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
+    unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383)
+    unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
+    unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383)
+    unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383)
+
+    unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
+    unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
+    unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
+    unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
+    void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
+    void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
+    void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
+    void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
+
+    void __builtin_loongarch_dbar (imm0_32767)
+    void __builtin_loongarch_ibar (imm0_32767)
+
+    void __builtin_loongarch_syscall (imm0_32767)
+    void __builtin_loongarch_break (imm0_32767)
+@end smallexample
+
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
+but the access instruction is not distinguished. So GCC renames the control
+instructions when implementing intrinsics.
+
+Take the csrrd instruction as an example, built-in functions are implemented as follows:
+@smallexample
+  __builtin_loongarch_csrrd_w  // When reading the 32-bit control register use.
+  __builtin_loongarch_csrrd_d  // When reading the 64-bit control register use.
+@end smallexample
+
+For the convenience of use, the built-in functions are encapsulated,
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
+defined in the @code{larchintrin.h}. So if you call the following
+function you need to include @code{larchintrin.h}.
+
+@smallexample
+     typedef struct drdtime@{
+            unsigned long dvalue;
+            unsigned long dtimeid;
+     @} __drdtime_t;
+
+     typedef struct rdtime@{
+            unsigned int value;
+            unsigned int timeid;
+     @} __rdtime_t;
+@end smallexample
+
+@smallexample
+    __drdtime_t __rdtime_d (void)
+    __rdtime_t  __rdtimel_w (void)
+    __rdtime_t  __rdtimeh_w (void)
+    unsigned int  __movfcsr2gr (imm0_31)
+    void __movgr2fcsr (imm0_31, unsigned int)
+    void __cacop_d (imm0_31, unsigned long, imm_n2048_2047)
+    unsigned int  __cpucfg (unsigned int)
+    void __asrtle_d (long int, long int)
+    void __asrtgt_d (long int, long int)
+    long int  __lddir_d (long int, imm0_31)
+    void __ldpte_d (long int, imm0_31)
+
+    int  __crc_w_b_w (char, int)
+    int  __crc_w_h_w (short, int)
+    int  __crc_w_w_w (int, int)
+    int  __crc_w_d_w (long int, int)
+    int  __crcc_w_b_w (char, int)
+    int  __crcc_w_h_w (short, int)
+    int  __crcc_w_w_w (int, int)
+    int  __crcc_w_d_w (long int, int)
+
+    unsigned int  __csrrd_w (imm0_16383)
+    unsigned int  __csrwr_w (unsigned int, imm0_16383)
+    unsigned int  __csrxchg_w (unsigned int, unsigned int, imm0_16383)
+    unsigned long  __csrrd_d (imm0_16383)
+    unsigned long  __csrwr_d (unsigned long, imm0_16383)
+    unsigned long  __csrxchg_d (unsigned long, unsigned long, imm0_16383)
+
+    unsigned char   __iocsrrd_b (unsigned int)
+    unsigned short  __iocsrrd_h (unsigned int)
+    unsigned int  __iocsrrd_w (unsigned int)
+    unsigned long  __iocsrrd_d (unsigned int)
+    void __iocsrwr_b (unsigned char, unsigned int)
+    void __iocsrwr_h (unsigned short, unsigned int)
+    void __iocsrwr_w (unsigned int, unsigned int)
+    void __iocsrwr_d (unsigned long, unsigned int)
+
+    void __dbar (imm0_32767)
+    void __ibar (imm0_32767)
+
+    void __syscall (imm0_32767)
+    void __break (imm0_32767)
+@end smallexample
+
+ @node MIPS DSP Built-in Functions
+ @subsection MIPS DSP Built-in Functions
+ 
+-- 
+2.33.0
+
--- a/LoongArch-Add-fcopysign-instructions.patch
+++ b/LoongArch-Add-fcopysign-instructions.patch
@ -0,0 +1,107 @@
+From 41a4945886631a1b2898ae957389d5db18a07141 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Fri, 4 Nov 2022 15:12:22 +0800
+Subject: [PATCH 025/124] LoongArch: Add fcopysign instructions
+
+Add fcopysign.{s,d} with the names copysign{sf,df}3 so GCC will expand
+__builtin_copysign{f,} to a single instruction.
+
+Link: https://sourceware.org/pipermail/libc-alpha/2022-November/143177.html
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): New unspec.
+	(type): Add fcopysign.
+	(copysign<mode>3): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fcopysign.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 22 ++++++++++++++++++-
+ .../gcc.target/loongarch/fcopysign.c          | 16 ++++++++++++++
+ 2 files changed, 37 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fcopysign.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 214b14bdd..bda34d0f3 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -37,6 +37,7 @@
+   UNSPEC_FCLASS
+   UNSPEC_FMAX
+   UNSPEC_FMIN
+  UNSPEC_FCOPYSIGN
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -214,6 +215,7 @@
+ ;; fabs		floating point absolute value
+ ;; fneg		floating point negation
+ ;; fcmp		floating point compare
+;; fcopysign	floating point copysign
+ ;; fcvt		floating point convert
+ ;; fsqrt	floating point square root
+ ;; frsqrt       floating point reciprocal square root
+@@ -226,7 +228,7 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
+    frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+@@ -976,6 +978,24 @@
+    (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+;;  ....................
+;;
+;;	FLOATING POINT COPYSIGN
+;;
+;;  ....................
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
+		      (match_operand:ANYF 2 "register_operand" "f")]
+		     UNSPEC_FCOPYSIGN))]
+  "TARGET_HARD_FLOAT"
+  "fcopysign.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fcopysign")
+   (set_attr "mode" "<UNITMODE>")])
+
+
+;;
+ ;;  ...................
+ ;;
+ ;;  Count leading zeroes.
+diff --git a/gcc/testsuite/gcc.target/loongarch/fcopysign.c b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
+new file mode 100644
+index 000000000..058ba2cf5
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float" } */
+/* { dg-final { scan-assembler "fcopysign\\.s" } } */
+/* { dg-final { scan-assembler "fcopysign\\.d" } } */
+
+double
+my_copysign (double a, double b)
+{
+  return __builtin_copysign (a, b);
+}
+
+float
+my_copysignf (float a, float b)
+{
+  return __builtin_copysignf (a, b);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
+++ b/LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
@ -0,0 +1,123 @@
+From 2ae587a86bba31b91a127e353c31c9f861ff5326 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 8 Nov 2022 13:42:20 +0800
+Subject: [PATCH 030/124] LoongArch: Add flogb.{s,d} instructions and expand
+ logb{sf,df}2
+
+On LoongArch, flogb instructions extract the exponent of a non-negative
+floating point value, but produces NaN for negative values.  So we need
+to add a fabs instruction when we expand logb.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FLOGB): New unspec.
+	(type): Add flogb.
+	(logb_non_negative<mode>2): New instruction template.
+	(logb<mode>2): New define_expand.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/flogb.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md          | 35 ++++++++++++++++++++--
+ gcc/testsuite/gcc.target/loongarch/flogb.c | 18 +++++++++++
+ 2 files changed, 51 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/flogb.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index c141c9add..682ab9617 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -42,6 +42,7 @@
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+   UNSPEC_FSCALEB
+  UNSPEC_FLOGB
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -217,6 +218,7 @@
+ ;; fdiv		floating point divide
+ ;; frdiv	floating point reciprocal divide
+ ;; fabs		floating point absolute value
+;; flogb	floating point exponent extract
+ ;; fneg		floating point negation
+ ;; fcmp		floating point compare
+ ;; fcopysign	floating point copysign
+@@ -233,8 +235,8 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+-   fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
+   fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+ 
+@@ -1039,6 +1041,35 @@
+    (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+;;  ....................
+;;
+;;	FLOATING POINT EXPONENT EXTRACT
+;;
+;;  ....................
+
+(define_insn "logb_non_negative<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
+		     UNSPEC_FLOGB))]
+  "TARGET_HARD_FLOAT"
+  "flogb.<fmt>\t%0,%1"
+  [(set_attr "type" "flogb")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_expand "logb<mode>2"
+  [(set (match_operand:ANYF 0 "register_operand")
+	(unspec:ANYF [(abs:ANYF (match_operand:ANYF 1 "register_operand"))]
+		     UNSPEC_FLOGB))]
+  "TARGET_HARD_FLOAT"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+
+  emit_insn (gen_abs<mode>2 (tmp, operands[1]));
+  emit_insn (gen_logb_non_negative<mode>2 (operands[0], tmp));
+  DONE;
+})
+
+;;
+ ;;  ...................
+ ;;
+ ;;  Count leading zeroes.
+diff --git a/gcc/testsuite/gcc.target/loongarch/flogb.c b/gcc/testsuite/gcc.target/loongarch/flogb.c
+new file mode 100644
+index 000000000..1daefe54e
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/flogb.c
+@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler "fabs\\.s" } } */
+/* { dg-final { scan-assembler "fabs\\.d" } } */
+/* { dg-final { scan-assembler "flogb\\.s" } } */
+/* { dg-final { scan-assembler "flogb\\.d" } } */
+
+double
+my_logb (double a)
+{
+  return __builtin_logb (a);
+}
+
+float
+my_logbf (float a)
+{
+  return __builtin_logbf (a);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
+++ b/LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
@ -0,0 +1,155 @@
+From e3d69a3b7a4e00e8bba88b8b4abaa1c17bc083d5 Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Tue, 8 Nov 2022 12:14:35 +0800
+Subject: [PATCH 029/124] LoongArch: Add fscaleb.{s,d} instructions as
+ ldexp{sf,df}3
+
+This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
+-fno-math-errno.
+
+IMODE is added because we can't hard code SI for operand 2: fscaleb.d
+instruction always take the high half of both source registers into
+account.  See my_ldexp_long in the test case.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
+	(type): Add fscaleb.
+	(IMODE): New mode attr.
+	(ldexp<mode>3): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/fscaleb.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md            | 26 ++++++++++-
+ gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
+ 2 files changed, 72 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index eb127c346..c141c9add 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -41,6 +41,7 @@
+   UNSPEC_FTINT
+   UNSPEC_FTINTRM
+   UNSPEC_FTINTRP
+  UNSPEC_FSCALEB
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -220,6 +221,7 @@
+ ;; fcmp		floating point compare
+ ;; fcopysign	floating point copysign
+ ;; fcvt		floating point convert
+;; fscaleb	floating point scale
+ ;; fsqrt	floating point square root
+ ;; frsqrt       floating point reciprocal square root
+ ;; multi	multiword sequence (or user asm statements)
+@@ -231,8 +233,8 @@
+   "unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
+    prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
+    shift,slt,signext,clz,trap,imul,idiv,move,
+-   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
+-   frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
+   fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
+   (cond [(eq_attr "jirl" "!unset") (const_string "call")
+ 	 (eq_attr "got" "load") (const_string "load")
+ 
+@@ -418,6 +420,10 @@
+ ;; the controlling mode.
+ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
+ 
+;; This attribute gives the integer mode that has the same size of a
+;; floating-point mode.
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
+
+ ;; This code iterator allows signed and unsigned widening multiplications
+ ;; to use the same template.
+ (define_code_iterator any_extend [sign_extend zero_extend])
+@@ -1014,7 +1020,23 @@
+   "fcopysign.<fmt>\t%0,%1,%2"
+   [(set_attr "type" "fcopysign")
+    (set_attr "mode" "<UNITMODE>")])
+
+;;
+;;  ....................
+;;
+;;	FLOATING POINT SCALE
+;;
+;;  ....................
+ 
+(define_insn "ldexp<mode>3"
+  [(set (match_operand:ANYF 0 "register_operand" "=f")
+	(unspec:ANYF [(match_operand:ANYF    1 "register_operand" "f")
+		      (match_operand:<IMODE> 2 "register_operand" "f")]
+		     UNSPEC_FSCALEB))]
+  "TARGET_HARD_FLOAT"
+  "fscaleb.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fscaleb")
+   (set_attr "mode" "<UNITMODE>")])
+ 
+ ;;
+ ;;  ...................
+diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
+new file mode 100644
+index 000000000..f18470fbb
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
+@@ -0,0 +1,48 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
+
+double
+my_scalbln (double a, long b)
+{
+  return __builtin_scalbln (a, b);
+}
+
+double
+my_scalbn (double a, int b)
+{
+  return __builtin_scalbn (a, b);
+}
+
+double
+my_ldexp (double a, int b)
+{
+  return __builtin_ldexp (a, b);
+}
+
+float
+my_scalblnf (float a, long b)
+{
+  return __builtin_scalblnf (a, b);
+}
+
+float
+my_scalbnf (float a, int b)
+{
+  return __builtin_scalbnf (a, b);
+}
+
+float
+my_ldexpf (float a, int b)
+{
+  return __builtin_ldexpf (a, b);
+}
+
+/* b must be sign-extended */
+double
+my_ldexp_long (double a, long b)
+{
+  return __builtin_ldexp (a, b);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
+++ b/LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
@ -0,0 +1,220 @@
+From 76d599c6d8f9cf78b51cd76a7ca8fbe11e2cda2b Mon Sep 17 00:00:00 2001
+From: Xi Ruoyao <xry111@xry111.site>
+Date: Sun, 6 Nov 2022 23:16:49 +0800
+Subject: [PATCH 028/124] LoongArch: Add ftint{,rm,rp}.{w,l}.{s,d} instructions
+
+This allows to optimize the following builtins if -fno-math-errno:
+
+- __builtin_lrint{,f}
+- __builtin_lfloor{,f}
+- __builtin_lceil{,f}
+
+Inspired by
+https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605287.html.
+
+ANYFI is added so the compiler won't try ftint.l.s if -mfpu=32.  If we
+simply used GPR here an ICE would be triggered with __builtin_lrintf
+and -mfpu=32.
+
+ftint{rm,rp} instructions may raise inexact exception, so they can't be
+used if -fno-trapping-math -fno-fp-int-builtin-inexact.
+
+Note that the .w.{s,d} variants are not tested because we don't support
+ILP32 for now.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch.md (UNSPEC_FTINT): New unspec.
+	(UNSPEC_FTINTRM): Likewise.
+	(UNSPEC_FTINTRP): Likewise.
+	(LRINT): New define_int_iterator.
+	(lrint_pattern): New define_int_attr.
+	(lrint_submenmonic): Likewise.
+	(lrint_allow_inexact): Likewise.
+	(ANYFI): New define_mode_iterator.
+	(lrint<ANYF><ANYFI>): New instruction template.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/ftint.c: New test.
+	* gcc.target/loongarch/ftint-no-inexact.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch.md             | 34 ++++++++++++++
+ .../gcc.target/loongarch/ftint-no-inexact.c   | 44 +++++++++++++++++++
+ gcc/testsuite/gcc.target/loongarch/ftint.c    | 44 +++++++++++++++++++
+ 3 files changed, 122 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint.c
+
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index a14ab14ac..eb127c346 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -38,6 +38,9 @@
+   UNSPEC_FMAX
+   UNSPEC_FMIN
+   UNSPEC_FCOPYSIGN
+  UNSPEC_FTINT
+  UNSPEC_FTINTRM
+  UNSPEC_FTINTRP
+ 
+   ;; Override return address for exception handling.
+   UNSPEC_EH_RETURN
+@@ -374,6 +377,11 @@
+ (define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+ 			    (DF "TARGET_DOUBLE_FLOAT")])
+ 
+;; Iterator for fixed-point modes which can be hold by a hardware
+;; floating-point register.
+(define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
+			     (DI "TARGET_DOUBLE_FLOAT")])
+
+ ;; A mode for which moves involving FPRs may need to be split.
+ (define_mode_iterator SPLITF
+   [(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
+@@ -515,6 +523,19 @@
+ (define_code_attr sel [(eq "masknez") (ne "maskeqz")])
+ (define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
+ 
+;; Iterator and attributes for floating-point to fixed-point conversion
+;; instructions.
+(define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP])
+(define_int_attr lrint_pattern [(UNSPEC_FTINT "lrint")
+				(UNSPEC_FTINTRM "lfloor")
+				(UNSPEC_FTINTRP "lceil")])
+(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
+				    (UNSPEC_FTINTRM "rm")
+				    (UNSPEC_FTINTRP "rp")])
+(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
+				      (UNSPEC_FTINTRM "0")
+				      (UNSPEC_FTINTRP "0")])
+
+ ;;
+ ;;  ....................
+ ;;
+@@ -2022,6 +2043,19 @@
+   [(set_attr "type" "fcvt")
+    (set_attr "mode" "<MODE>")])
+ 
+;; Convert floating-point numbers to integers
+(define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
+  [(set (match_operand:ANYFI 0 "register_operand" "=f")
+	(unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
+		      LRINT))]
+  "TARGET_HARD_FLOAT &&
+   (<lrint_allow_inexact>
+    || flag_fp_int_builtin_inexact
+    || !flag_trapping_math)"
+  "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
+  [(set_attr "type" "fcvt")
+   (set_attr "mode" "<ANYF:MODE>")])
+
+ ;; Load the low word of operand 0 with operand 1.
+ (define_insn "load_low<mode>"
+   [(set (match_operand:SPLITF 0 "register_operand" "=f,f")
+diff --git a/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+new file mode 100644
+index 000000000..88b83a9c0
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
+@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+  return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+  return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+  return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+  return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+  return __builtin_lceilf (a);
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/ftint.c b/gcc/testsuite/gcc.target/loongarch/ftint.c
+new file mode 100644
+index 000000000..7a326a454
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/ftint.c
+@@ -0,0 +1,44 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact" } */
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrm\\.l\\.d" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.s" } } */
+/* { dg-final { scan-assembler "ftintrp\\.l\\.d" } } */
+
+long
+my_lrint (double a)
+{
+  return __builtin_lrint (a);
+}
+
+long
+my_lrintf (float a)
+{
+  return __builtin_lrintf (a);
+}
+
+long
+my_lfloor (double a)
+{
+  return __builtin_lfloor (a);
+}
+
+long
+my_lfloorf (float a)
+{
+  return __builtin_lfloorf (a);
+}
+
+long
+my_lceil (double a)
+{
+  return __builtin_lceil (a);
+}
+
+long
+my_lceilf (float a)
+{
+  return __builtin_lceilf (a);
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-new-code-model-medium.patch
+++ b/LoongArch-Add-new-code-model-medium.patch
--- a/LoongArch-Add-prefetch-instructions.patch
+++ b/LoongArch-Add-prefetch-instructions.patch
@ -0,0 +1,158 @@
+From 52a41006c2e8141a42de93ffcc2c040e034244b2 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Wed, 16 Nov 2022 09:25:14 +0800
+Subject: [PATCH 031/124] LoongArch: Add prefetch instructions.
+
+Enable sw prefetching at -O3 and higher.
+
+Co-Authored-By: xujiahao <xujiahao@loongson.cn>
+
+gcc/ChangeLog:
+
+	* config/loongarch/constraints.md (ZD): New constraint.
+	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
+	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
+	Define number of parallel prefetch.
+	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
+	Set up parameters to be used in prefetching algorithm.
+	* config/loongarch/loongarch.md (prefetch): New template.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/constraints.md   | 10 ++++++++++
+ gcc/config/loongarch/loongarch-def.c  |  2 ++
+ gcc/config/loongarch/loongarch-tune.h |  1 +
+ gcc/config/loongarch/loongarch.cc     | 28 +++++++++++++++++++++++++++
+ gcc/config/loongarch/loongarch.md     | 14 ++++++++++++++
+ 5 files changed, 55 insertions(+)
+
+diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
+index 43cb7b5f0..46f7f63ae 100644
+--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
+@@ -86,6 +86,10 @@
+ ;;    "ZB"
+ ;;      "An address that is held in a general-purpose register.
+ ;;      The offset is zero"
+;;    "ZD"
+;;	"An address operand whose address is formed by a base register
+;;	 and offset that is suitable for use in instructions with the same
+;;	 addressing mode as @code{preld}."
+ ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
+ ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
+ 
+@@ -190,3 +194,9 @@
+   The offset is zero"
+   (and (match_code "mem")
+        (match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+  "An address operand whose address is formed by a base register
+   and offset that is suitable for use in instructions with the same
+   addressing mode as @code{preld}."
+   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
+diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
+index cbf995d81..80ab10a52 100644
+--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
+@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
+       .l1d_line_size = 64,
+       .l1d_size = 64,
+       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
+   },
+   [CPU_LA464] = {
+       .l1d_line_size = 64,
+       .l1d_size = 64,
+       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
+   },
+ };
+ 
+diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
+index 6f3530f5c..8e3eb2947 100644
+--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
+@@ -45,6 +45,7 @@ struct loongarch_cache {
+     int l1d_line_size;  /* bytes */
+     int l1d_size;       /* KiB */
+     int l2d_size;       /* kiB */
+    int simultaneous_prefetches; /* number of parallel prefetch */
+ };
+ 
+ #endif /* LOONGARCH_TUNE_H */
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index d552b162a..622c9435b 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "context.h"
+ #include "builtins.h"
+ #include "rtl-iter.h"
+#include "opts.h"
+ 
+ /* This file should be included last.  */
+ #include "target-def.h"
+@@ -6099,6 +6100,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
+   if (loongarch_branch_cost == 0)
+     loongarch_branch_cost = loongarch_cost->branch_cost;
+ 
+  /* Set up parameters to be used in prefetching algorithm.  */
+  int simultaneous_prefetches
+    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_simultaneous_prefetches,
+		       simultaneous_prefetches);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_line_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l2_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+  /* Enable sw prefetching at -O3 and higher.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && !opts->x_optimize_size)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
+   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
+     error ("%qs cannot be used for compiling a shared library",
+ 	   "-mdirect-extern-access");
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 682ab9617..2fda53819 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -3282,6 +3282,20 @@
+ ;;  ....................
+ ;;
+ 
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "ZD")
+	     (match_operand 1 "const_int_operand" "n")
+	     (match_operand 2 "const_int_operand" "n"))]
+  ""
+{
+  switch (INTVAL (operands[1]))
+  {
+    case 0: return "preld\t0,%a0";
+    case 1: return "preld\t8,%a0";
+    default: gcc_unreachable ();
+  }
+})
+
+ (define_insn "nop"
+   [(const_int 0)]
+   ""
+-- 
+2.33.0
+
--- a/LoongArch-Add-support-code-model-extreme.patch
+++ b/LoongArch-Add-support-code-model-extreme.patch
@ -0,0 +1,794 @@
+From b1c92fb9dab678e4c9c23fa77185011494d145b9 Mon Sep 17 00:00:00 2001
+From: Lulu Cheng <chenglulu@loongson.cn>
+Date: Thu, 18 Aug 2022 17:26:13 +0800
+Subject: [PATCH 011/124] LoongArch: Add support code model extreme.
+
+Use five instructions to calculate a signed 64-bit offset relative to the pc.
+
+gcc/ChangeLog:
+
+	* config/loongarch/loongarch-opts.cc: Allow cmodel to be extreme.
+	* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
+	Add extreme support for TLS GD and LD types.
+	(loongarch_legitimize_tls_address): Add extreme support for TLS LE
+	and IE.
+	(loongarch_split_symbol): When compiling with -mcmodel=extreme,
+	the symbol address will be obtained through five instructions.
+	(loongarch_print_operand_reloc): Add support.
+	(loongarch_print_operand): Add support.
+	(loongarch_print_operand_address): Add support.
+	(loongarch_option_override_internal): Set '-mcmodel=extreme' option
+	incompatible with '-mno-explicit-relocs'.
+	* config/loongarch/loongarch.md (@lui_l_hi20<mode>):
+	Loads bits 12-31 of data into registers.
+	(lui_h_lo20): Load bits 32-51 of the data and spell bits 0-31 of
+	the source register.
+	(lui_h_hi12): Load bits 52-63 of the data and spell bits 0-51 of
+	the source register.
+	* config/loongarch/predicates.md: Symbols need to be decomposed
+	when defining the macro TARGET_CMODEL_EXTREME
+	* doc/invoke.texi: Modify the description information of cmodel in the document.
+	Document -W[no-]extreme-plt.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/func-call-1.c: Add option '-mcmodel=normal'.
+	* gcc.target/loongarch/func-call-2.c: Likewise.
+	* gcc.target/loongarch/func-call-3.c: Likewise.
+	* gcc.target/loongarch/func-call-4.c: Likewise.
+	* gcc.target/loongarch/func-call-5.c: Likewise.
+	* gcc.target/loongarch/func-call-6.c: Likewise.
+	* gcc.target/loongarch/func-call-7.c: Likewise.
+	* gcc.target/loongarch/func-call-8.c: Likewise.
+	* gcc.target/loongarch/relocs-symbol-noaddend.c: Likewise.
+	* gcc.target/loongarch/func-call-extreme-1.c: New test.
+	* gcc.target/loongarch/func-call-extreme-2.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/config/loongarch/loongarch-opts.cc        |   3 +-
+ gcc/config/loongarch/loongarch.cc             | 222 +++++++++++++++---
+ gcc/config/loongarch/loongarch.md             |  34 ++-
+ gcc/config/loongarch/predicates.md            |   9 +-
+ gcc/doc/invoke.texi                           |  50 +---
+ .../gcc.target/loongarch/func-call-1.c        |   2 +-
+ .../gcc.target/loongarch/func-call-2.c        |   2 +-
+ .../gcc.target/loongarch/func-call-3.c        |   2 +-
+ .../gcc.target/loongarch/func-call-4.c        |   2 +-
+ .../gcc.target/loongarch/func-call-5.c        |   2 +-
+ .../gcc.target/loongarch/func-call-6.c        |   2 +-
+ .../gcc.target/loongarch/func-call-7.c        |   2 +-
+ .../gcc.target/loongarch/func-call-8.c        |   2 +-
+ .../loongarch/func-call-extreme-1.c           |  32 +++
+ .../loongarch/func-call-extreme-2.c           |  32 +++
+ .../loongarch/relocs-symbol-noaddend.c        |   2 +-
+ 16 files changed, 318 insertions(+), 82 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+
+diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
+index 3f70943de..2ae89f234 100644
+--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
+@@ -376,14 +376,13 @@ fallback:
+ 
+   /* 5.  Target code model */
+   t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
+-  if (t.cmodel != CMODEL_NORMAL)
+  if (t.cmodel != CMODEL_NORMAL && t.cmodel != CMODEL_EXTREME)
+     {
+       warning (0, "%qs is not supported, now cmodel is set to %qs",
+ 	       loongarch_cmodel_strings[t.cmodel], "normal");
+       t.cmodel = CMODEL_NORMAL;
+     }
+ 
+-
+   /* Cleanup and return.  */
+   obstack_free (&msg_obstack, NULL);
+   *target = t;
+diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
+index 76bf55ea4..1a33f668f 100644
+--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
+@@ -2436,7 +2436,19 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+       /* Split tls symbol to high and low.  */
+       rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
+       high = loongarch_force_temporary (tmp, high);
+-      emit_insn (gen_tls_low (Pmode, a0, high, loc));
+
+      if (TARGET_CMODEL_EXTREME)
+	{
+	  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+	  rtx tmp1 = gen_reg_rtx (Pmode);
+	  emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
+	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
+	  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
+	  emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
+	}
+      else
+	emit_insn (gen_tls_low (Pmode, a0, high, loc));
+     }
+   else
+     {
+@@ -2449,14 +2461,44 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
+     }
+ 
+   if (flag_plt)
+-    insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
+    insn = emit_call_insn (gen_call_value_internal (v0,
+						    loongarch_tls_symbol,
+ 						    const0_rtx));
+   else
+     {
+       rtx dest = gen_reg_rtx (Pmode);
+-      rtx high = gen_reg_rtx (Pmode);
+-      loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+-      emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
+
+      if (TARGET_CMODEL_EXTREME)
+	{
+	  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+	  rtx tmp1 = gen_reg_rtx (Pmode);
+	  rtx high = gen_reg_rtx (Pmode);
+
+	  loongarch_emit_move (high,
+			       gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+	  loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
+						     gen_rtx_REG (Pmode, 0),
+						     loongarch_tls_symbol));
+	  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
+	  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
+	  loongarch_emit_move (dest,
+			       gen_rtx_MEM (Pmode,
+					    gen_rtx_PLUS (Pmode, high, tmp1)));
+	}
+      else
+	{
+	  if (TARGET_EXPLICIT_RELOCS)
+	    {
+	      rtx high = gen_reg_rtx (Pmode);
+	      loongarch_emit_move (high,
+				   gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
+	      emit_insn (gen_ld_from_got (Pmode, dest, high,
+					  loongarch_tls_symbol));
+	    }
+	  else
+	    loongarch_emit_move (dest, loongarch_tls_symbol);
+	}
+       insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
+     }
+ 
+@@ -2508,7 +2550,23 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	      tmp3 = gen_reg_rtx (Pmode);
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+-	      emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+
+	      if (TARGET_CMODEL_EXTREME)
+		{
+		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+		  rtx tmp3 = gen_reg_rtx (Pmode);
+		  emit_insn (gen_tls_low (Pmode, tmp3,
+					  gen_rtx_REG (Pmode, 0), tmp2));
+		  emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
+		  emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
+		  emit_move_insn (tmp1,
+				  gen_rtx_MEM (Pmode,
+					       gen_rtx_PLUS (Pmode,
+							     high, tmp3)));
+		}
+	      else
+		emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
+ 	    }
+ 	  else
+ 	    emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
+@@ -2530,11 +2588,18 @@ loongarch_legitimize_tls_address (rtx loc)
+ 	      rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
+ 	      high = loongarch_force_temporary (tmp3, high);
+ 	      emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
+
+	      if (TARGET_CMODEL_EXTREME)
+		{
+		  gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+		  emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
+		  emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
+		}
+ 	    }
+ 	  else
+ 	    emit_insn (loongarch_got_load_tls_le (tmp1, loc));
+ 	  emit_insn (gen_add3_insn (dest, tmp1, tp));
+-
+ 	}
+       break;
+ 
+@@ -2603,7 +2668,6 @@ bool
+ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+ {
+   enum loongarch_symbol_type symbol_type;
+-  rtx high;
+ 
+   /* If build with '-mno-explicit-relocs', don't split symbol.  */
+   if (!TARGET_EXPLICIT_RELOCS)
+@@ -2615,6 +2679,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+       || !loongarch_split_symbol_type (symbol_type))
+     return false;
+ 
+  rtx high, temp1 = NULL;
+
+   if (temp == NULL)
+     temp = gen_reg_rtx (Pmode);
+ 
+@@ -2622,20 +2688,42 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
+   high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
+   high = loongarch_force_temporary (temp, high);
+ 
+  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+    {
+      gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+      temp1 = gen_reg_rtx (Pmode);
+      emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
+					     addr));
+      emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
+      emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
+    }
+
+   if (low_out)
+     switch (symbol_type)
+       {
+       case SYMBOL_PCREL:
+-	*low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+-	break;
+	{
+	  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+	    *low_out = gen_rtx_PLUS (Pmode, high, temp1);
+	  else
+	    *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
+	  break;
+	}
+ 
+       case SYMBOL_GOT_DISP:
+ 	/* SYMBOL_GOT_DISP symbols are loaded from the GOT.  */
+ 	{
+-	  rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
+-	  rtx mem = gen_rtx_MEM (Pmode, low);
+-	  *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+-				     UNSPEC_LOAD_FROM_GOT);
+	  if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
+	    *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
+	  else
+	    {
+	      rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
+	      rtx mem = gen_rtx_MEM (Pmode, low);
+	      *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
+					 UNSPEC_LOAD_FROM_GOT);
+	    }
+
+ 	  break;
+ 	}
+ 
+@@ -4584,34 +4672,86 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
+    in context CONTEXT.  HI_RELOC indicates a high-part reloc.  */
+ 
+ static void
+-loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
+loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
+			       bool hi_reloc)
+ {
+   const char *reloc;
+ 
+  if (TARGET_CMODEL_EXTREME)
+    gcc_assert (TARGET_EXPLICIT_RELOCS);
+
+   switch (loongarch_classify_symbolic_expression (op))
+     {
+     case SYMBOL_PCREL:
+-      reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
+       break;
+ 
+     case SYMBOL_GOT_DISP:
+-      reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLS_IE:
+-      reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLS_LE:
+-      reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
+       break;
+ 
+     case SYMBOL_TLSGD:
+-      reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     case SYMBOL_TLSLDM:
+-      reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
+      if (hi64_part)
+	{
+	  if (TARGET_CMODEL_EXTREME)
+	    reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
+       break;
+ 
+     default:
+@@ -4637,6 +4777,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
+    'L'  Print the low-part relocation associated with OP.
+    'm'	Print one less than CONST_INT OP in decimal.
+    'N'	Print the inverse of the integer branch condition for comparison OP.
+   'r'  Print address 12-31bit relocation associated with OP.
+   'R'  Print address 32-51bit relocation associated with OP.
+    'T'	Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
+ 	      'z' for (eq:?I ...), 'n' for (ne:?I ...).
+    't'	Like 'T', but with the EQ/NE cases reversed
+@@ -4694,7 +4836,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+     case 'h':
+       if (code == HIGH)
+ 	op = XEXP (op, 0);
+-      loongarch_print_operand_reloc (file, op, true /* hi_reloc */);
+      loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+				     true /* hi_reloc */);
+      break;
+
+    case 'H':
+      loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+				     true /* hi_reloc */);
+       break;
+ 
+     case 'i':
+@@ -4703,7 +4851,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+       break;
+ 
+     case 'L':
+-      loongarch_print_operand_reloc (file, op, false /* lo_reloc */);
+      loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
+				     false /* lo_reloc */);
+       break;
+ 
+     case 'm':
+@@ -4718,6 +4867,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
+ 					    letter);
+       break;
+ 
+    case 'r':
+      loongarch_print_operand_reloc (file, op, false /* hi64_part */,
+				     true /* lo_reloc */);
+      break;
+
+    case 'R':
+      loongarch_print_operand_reloc (file, op, true /* hi64_part */,
+				     false /* lo_reloc */);
+      break;
+
+     case 't':
+     case 'T':
+       {
+@@ -4848,7 +5007,8 @@ loongarch_print_operand_address (FILE *file, machine_mode /* mode  */, rtx x)
+ 
+       case ADDRESS_LO_SUM:
+ 	fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
+-	loongarch_print_operand_reloc (file, addr.offset, false /* hi_reloc */);
+	loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
+				       false /* hi_reloc */);
+ 	return;
+ 
+       case ADDRESS_CONST_INT:
+@@ -5821,13 +5981,21 @@ loongarch_option_override_internal (struct gcc_options *opts)
+ 
+   switch (la_target.cmodel)
+     {
+-      case CMODEL_TINY_STATIC:
+       case CMODEL_EXTREME:
+	if (!TARGET_EXPLICIT_RELOCS)
+	  error ("code model %qs needs %s",
+		 "extreme", "-mexplicit-relocs");
+
+ 	if (opts->x_flag_plt)
+-	  error ("code model %qs and %qs not support %s mode",
+-		 "tiny-static", "extreme", "plt");
+	  {
+	    if (global_options_set.x_flag_plt)
+	      error ("code model %qs is not compatible with %s",
+		     "extreme", "-fplt");
+	    opts->x_flag_plt = 0;
+	  }
+ 	break;
+ 
+      case CMODEL_TINY_STATIC:
+       case CMODEL_NORMAL:
+       case CMODEL_TINY:
+       case CMODEL_LARGE:
+diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
+index 8e8868de9..8fc10444c 100644
+--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
+@@ -60,6 +60,9 @@
+ 
+   UNSPEC_LOAD_FROM_GOT
+   UNSPEC_ORI_L_LO12
+  UNSPEC_LUI_L_HI20
+  UNSPEC_LUI_H_LO20
+  UNSPEC_LUI_H_HI12
+   UNSPEC_TLS_LOW
+ ])
+ 
+@@ -1934,16 +1937,45 @@
+   [(set_attr "type" "move")]
+ )
+ 
+(define_insn "@lui_l_hi20<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec:P [(match_operand:P 1 "symbolic_operand")]
+	UNSPEC_LUI_L_HI20))]
+  ""
+  "lu12i.w\t%0,%r1"
+  [(set_attr "type" "move")]
+)
+
+ (define_insn "@ori_l_lo12<mode>"
+   [(set (match_operand:P 0 "register_operand" "=r")
+ 	(unspec:P [(match_operand:P 1 "register_operand" "r")
+-		    (match_operand:P 2 "symbolic_operand")]
+		   (match_operand:P 2 "symbolic_operand")]
+ 	UNSPEC_ORI_L_LO12))]
+   ""
+   "ori\t%0,%1,%L2"
+   [(set_attr "type" "move")]
+ )
+ 
+(define_insn "lui_h_lo20"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "0")
+		    (match_operand:DI 2 "symbolic_operand")]
+	UNSPEC_LUI_H_LO20))]
+  "TARGET_64BIT"
+  "lu32i.d\t%0,%R2"
+  [(set_attr "type" "move")]
+)
+
+(define_insn "lui_h_hi12"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+	UNSPEC_LUI_H_HI12))]
+  "TARGET_64BIT"
+  "lu52i.d\t%0,%1,%H2"
+  [(set_attr "type" "move")]
+)
+
+ ;; Convert floating-point numbers to integers
+ (define_insn "frint_<fmt>"
+   [(set (match_operand:ANYF 0 "register_operand" "=f")
+diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
+index cd3528c7c..e38c6fbdd 100644
+--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
+@@ -111,7 +111,7 @@
+   (match_code "const,symbol_ref,label_ref")
+ {
+   /* Split symbol to high and low if return false.
+-     If defined TARGET_CMODEL_LARGE, all symbol would be splited,
+     If defined TARGET_CMODEL_EXTREME, all symbol would be splited,
+      else if offset is not zero, the symbol would be splited.  */
+ 
+   enum loongarch_symbol_type symbol_type;
+@@ -126,10 +126,13 @@
+   switch (symbol_type)
+     {
+     case SYMBOL_PCREL:
+-      return 1;
+      if (TARGET_CMODEL_EXTREME)
+	return false;
+      else
+	return 1;
+ 
+     case SYMBOL_GOT_DISP:
+-      if (TARGET_CMODEL_LARGE || !flag_plt)
+      if (TARGET_CMODEL_EXTREME || !flag_plt)
+ 	return false;
+       else
+ 	return 1;
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 1de2b2bd4..c4f83e62a 100644
+--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
+@@ -1006,6 +1006,7 @@ Objective-C and Objective-C++ Dialects}.
+ -mcond-move-float  -mno-cond-move-float @gol
+ -memcpy  -mno-memcpy -mstrict-align -mno-strict-align @gol
+ -mmax-inline-memcpy-size=@var{n} @gol
+-mexplicit-relocs -mno-explicit-relocs @gol
+ -mcmodel=@var{code-model}}
+ 
+ @emph{M32R/D Options}
+@@ -24617,50 +24618,19 @@ less than or equal to @var{n} bytes.  The default value of @var{n} is 1024.
+ @item -mcmodel=@var{code-model}
+ Set the code model to one of:
+ @table @samp
+-@item tiny-static
+-@itemize @bullet
+-@item
+-local symbol and global strong symbol: The data section must be within +/-2MiB addressing space.
+-The text section must be within +/-128MiB addressing space.
+-@item
+-global weak symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+-
+-@item tiny
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2MiB addressing space.
+-The text section must be within +/-128MiB
+-addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+@item tiny-static (Not implemented yet)
+@item tiny (Not implemented yet)
+ 
+ @item normal
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2GiB addressing space.
+-The text section must be within +/-128MiB addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+The text segment must be within 128MB addressing space.  The data segment must
+be within 2GB addressing space.
+ 
+-@item large
+-@itemize @bullet
+-@item
+-local symbol: The data section must be within +/-2GiB addressing space.
+-The text section must be within +/-128GiB addressing space.
+-@item
+-global symbol: The got table must be within +/-2GiB addressing space.
+-@end itemize
+@item large (Not implemented yet)
+ 
+-@item extreme(Not implemented yet)
+-@itemize @bullet
+-@item
+-local symbol: The data and text section must be within +/-8EiB addressing space.
+-@item
+-global symbol: The data got table must be within +/-8EiB addressing space.
+-@end itemize
+@item extreme
+This mode does not limit the size of the code segment and data segment.
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
+@option{-mno-explicit-relocs}.
+ @end table
+ The default code model is @code{normal}.
+ 
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+index 01b8ea23f..76bf11b0c 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+index 4565baaec..4b468fef8 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+index 4f669a029..dd3a4882d 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+index 943adb640..f8158ec34 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+index 2c2a1c8a1..37994af43 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+index 4b0e4266e..8e366e376 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+index 51792711f..4177c3d96 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+index 330140d88..4254eaa16 100644
+--- a/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs" } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
+ /* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
+ /* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+new file mode 100644
+index 000000000..db1e0f853
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
+@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+  g ();
+}
+
+void
+test1 (void)
+{
+  f ();
+}
+
+void
+test2 (void)
+{
+  l ();
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+new file mode 100644
+index 000000000..21bf81ae8
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
+@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
+
+extern void g (void);
+void
+f (void)
+{}
+
+static void
+l (void)
+{}
+
+void
+test (void)
+{
+  g ();
+}
+
+void
+test1 (void)
+{
+  f ();
+}
+
+void
+test2 (void)
+{
+  l ();
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+index bfcc9bc33..3ec8bd229 100644
+--- a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+++ b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2" } */
+/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2 -mcmodel=normal" } */
+ /* { dg-final { scan-assembler "pcalau12i.*%pc_hi20\\(\.LANCHOR0\\)\n" } } */
+ /* { dg-final { scan-assembler "addi\.d.*%pc_lo12\\(\.LANCHOR0\\)\n" } } */
+ /* { dg-final { scan-assembler "ldptr.d\t\\\$r4,.*,0\n" } } */
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-ASX-builtin-functions.patch
+++ b/LoongArch-Add-tests-for-ASX-builtin-functions.patch
--- a/LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
--- a/LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
--- a/LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
--- a/LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
--- a/LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
+++ b/LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
--- a/LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
+++ b/LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
@ -0,0 +1,65 @@
+From 2ef90d604d7bae207d5b2067b4ce38d04d4835be Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Tue, 12 Sep 2023 16:00:48 +0800
+Subject: [PATCH 110/124] LoongArch: Add tests for ASX xvldrepl/xvstelm
+ instruction generation.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
+	* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/lasx/lasx-xvldrepl.c        | 16 ++++++++++++++++
+ .../loongarch/vector/lasx/lasx-xvstelm.c         | 14 ++++++++++++++
+ 2 files changed, 30 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+new file mode 100644
+index 000000000..105567951
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
+@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
+
+#define N 258
+
+float a[N], b[N], c[N];
+
+void
+test ()
+{
+  for (int i = 0; i < 256; i++)
+    {
+      a[i] = c[0] * b[i] + c[1];
+    }
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+new file mode 100644
+index 000000000..1a7b0e86f
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
+@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mlasx" } */
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
+
+#define LEN 256
+
+float a[LEN], b[LEN], c[LEN];
+
+void
+test ()
+{
+  for (int i = 0; i < LEN; i += 2)
+    a[i] = b[i] + c[i];
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
+++ b/LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
--- a/LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
+++ b/LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
--- a/LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
+++ b/LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
@ -0,0 +1,715 @@
+From 243656b5b87a3125c2a885d11f022a79cca98b39 Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 10:07:24 +0800
+Subject: [PATCH 082/124] LoongArch: Add tests for SX vector addition vsadd
+ instructions.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
+	* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/lsx/lsx-vsadd-1.c        | 335 +++++++++++++++++
+ .../loongarch/vector/lsx/lsx-vsadd-2.c        | 345 ++++++++++++++++++
+ 2 files changed, 680 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+new file mode 100644
+index 000000000..1bc27c983
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
+@@ -0,0 +1,335 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffff3c992b2e;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffff730f;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffff3c992b2e;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffff730f;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00007fff00007fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
+  *((unsigned long *)&__m128i_result[1]) = 0x00007fff00007fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000002bfd9461;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001021;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000001021;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000001000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000001000;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x80808080806b000b;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x80808080806b000b;
+  __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffff01ff01;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3c600000ff800000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffe;
+  *((unsigned long *)&__m128i_result[1]) = 0x3c5fffffff7fffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffefffeff00feff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00ff00ff00ff00ff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00ff00ff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa0000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x3ff0000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x40f3fa0000000000;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000008a0000008a;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000008900000009;
+  *((unsigned long *)&__m128i_op1[1]) = 0x63637687636316bb;
+  *((unsigned long *)&__m128i_op1[0]) = 0x6363636363636363;
+  *((unsigned long *)&__m128i_result[1]) = 0x6363771163631745;
+  *((unsigned long *)&__m128i_result[0]) = 0x636363ec6363636c;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000004;
+  __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000080000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000080000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffefefe6a;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000c2bac2c2;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000001fffffffe;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000000fefefe68;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000000c2bac2c2;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x027c027c000027c0;
+  *((unsigned long *)&__m128i_op1[1]) = 0x001ffff0003ffff0;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000fffefffefffef;
+  *((unsigned long *)&__m128i_result[1]) = 0x001ffff0003ffff0;
+  *((unsigned long *)&__m128i_result[0]) = 0x028c026bfff027af;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0007000000040000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0003000000010000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0007000000040000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0003000000010000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x3f8000003f800000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x3f8000003f800000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x3fffff0000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x3fffff0000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x7f7fff003f800000;
+  *((unsigned long *)&__m128i_result[0]) = 0x7f7fff003f800000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000820202020;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00fe01fc0005fff4;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000003a24;
+  *((unsigned long *)&__m128i_op1[0]) = 0x003dbe88077c78c1;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000820205a44;
+  *((unsigned long *)&__m128i_result[0]) = 0x013bc084078278b5;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000140001;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000140001;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x67eb85afb2ebb000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xc8847ef6ed3f2000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000100000001;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x67eb85b0b2ebb001;
+  *((unsigned long *)&__m128i_result[0]) = 0xc8847ef6ed3f2000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffff00000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffff000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000014eb54ab;
+  *((unsigned long *)&__m128i_op1[0]) = 0x14eb6a002a406a00;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffff14eb54ab;
+  *((unsigned long *)&__m128i_result[0]) = 0x14ea6a002a406a00;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xce9035c49ffff570;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000004;
+  *((unsigned long *)&__m128i_result[0]) = 0xce9035c49ffff574;
+  __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000010;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x000000000000000d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000400;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000000000040d;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000001300000013;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000001300000013;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000100000100;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000001000000ff;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000300000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000100010001;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfffffffffffffffa;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffa;
+  *((unsigned long *)&__m128i_result[1]) = 0x00000002fffffffb;
+  *((unsigned long *)&__m128i_result[0]) = 0x000000010000fffb;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  return 0;
+}
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+new file mode 100644
+index 000000000..67d189991
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
+@@ -0,0 +1,345 @@
+/* { dg-do run } */
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
+#include "../simd_correctness_check.h"
+#include <lsxintrin.h>
+
+int
+main ()
+{
+  __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
+  __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
+  __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
+
+  int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
+  long int long_op0, long_op1, long_op2, lont_out, lont_result;
+  long int long_int_out, long_int_result;
+  unsigned int unsigned_int_out, unsigned_int_result;
+  unsigned long int unsigned_long_int_out, unsigned_long_int_result;
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x10f917d72d3d01e4;
+  *((unsigned long *)&__m128i_op1[0]) = 0x203e16d116de012b;
+  *((unsigned long *)&__m128i_result[1]) = 0x10f917d72d3d01e4;
+  *((unsigned long *)&__m128i_result[0]) = 0x203e16d116de012b;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffebd06fffe820c;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fff7ffe7fff3506;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfffebd06fffe820c;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fff7ffe7fff3506;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffff0cffffff18;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefffefffeff6a0c;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x4f804f804f804f80;
+  *((unsigned long *)&__m128i_op1[0]) = 0x4f804f804f804f80;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffff60ca7104649;
+  *((unsigned long *)&__m128i_op0[0]) = 0xfffff790a15db63d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_result[1]) = 0xfffff60ca710464a;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffff790a15db63e;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffffffffe;
+  *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffff46;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00fe000100cf005f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff7fff7fff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x5f675e96e29a5a60;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
+  *((unsigned long *)&__m128i_result[1]) = 0x5fff5e97e2ff5abf;
+  *((unsigned long *)&__m128i_result[0]) = 0xfefffefffefffeff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000001000100010;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0001000100010058;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0001001100110068;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x7fffffff7fffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x7fffffff7fffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0x7fff010181010102;
+  *((unsigned long *)&__m128i_op1[0]) = 0x7fffffff81010102;
+  *((unsigned long *)&__m128i_result[1]) = 0xfeffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xfeffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffb81a6f70;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000d48eaa1a2;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffb81ae0bf;
+  *((unsigned long *)&__m128i_result[0]) = 0x00012c9748eaffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0177fff0fffffff0;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000011ff8bc;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
+  __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000200;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000200;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000001;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000d0000000d;
+  *((unsigned long *)&__m128i_op1[1]) = 0x8006000000040000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x8002000000000007;
+  *((unsigned long *)&__m128i_result[1]) = 0x8006000000040000;
+  *((unsigned long *)&__m128i_result[0]) = 0x8002000d00000014;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000014;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000014;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  int_out = __lsx_vpickve2gr_h (__m128i_op0, 0x1);
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000600007fff;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000008ffffa209;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000600007fff;
+  *((unsigned long *)&__m128i_result[0]) = 0x00000008ffffa209;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x636363633f3e47c1;
+  *((unsigned long *)&__m128i_op0[0]) = 0x41f8e080f1ef4eaa;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00000807bf0a1f80;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000800ecedee68;
+  *((unsigned long *)&__m128i_result[1]) = 0x63636b6afe486741;
+  *((unsigned long *)&__m128i_result[0]) = 0x41f8e880ffffffff;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000ebd20000714f;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00012c8a0000a58a;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000ffff0000e29e;
+  *((unsigned long *)&__m128i_result[0]) = 0x000259140000ffff;
+  __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfffffffeffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[0]) = 0xfffffffeffffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0c03e17edd781b11;
+  *((unsigned long *)&__m128i_op0[0]) = 0x342caf9be55700b5;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00040003ff83ff84;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00040003ff4dffca;
+  *((unsigned long *)&__m128i_result[1]) = 0x0c07e181ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x3430af9effffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffa8ff9f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000ffffffabff99;
+  *((unsigned long *)&__m128i_op1[1]) = 0x000100000002007d;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0001000000020001;
+  *((unsigned long *)&__m128i_result[1]) = 0x00010000ffab001c;
+  *((unsigned long *)&__m128i_result[0]) = 0x0001ffffffadff9a;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_result[1]) = 0x0800080008000800;
+  *((unsigned long *)&__m128i_result[0]) = 0x0800080008000800;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
+  *((unsigned long *)&__m128i_op0[0]) = 0x76f424887fffffff;
+  *((unsigned long *)&__m128i_op1[1]) = 0xc110000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0xc00d060000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0xc110000000000001;
+  *((unsigned long *)&__m128i_result[0]) = 0xffffffff7fffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x000000000000002f;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
+  *((unsigned long *)&__m128i_op1[1]) = 0xfbfbfb17fbfb38ea;
+  *((unsigned long *)&__m128i_op1[0]) = 0xfbfb47fbfbfb0404;
+  *((unsigned long *)&__m128i_result[1]) = 0xfbfbfb17fbfb3919;
+  *((unsigned long *)&__m128i_result[0]) = 0xfbfb47fbfbfb042d;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x8080808080808081;
+  *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
+  *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x80808080ffffffff;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00123fff00120012;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0012001200120012;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x000000000005003a;
+  *((unsigned long *)&__m128i_result[1]) = 0x00123fff00120012;
+  *((unsigned long *)&__m128i_result[0]) = 0x001200120017004c;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0xbfd10d0d7b6b6b73;
+  *((unsigned long *)&__m128i_op1[0]) = 0xc5c534920000c4ed;
+  *((unsigned long *)&__m128i_result[1]) = 0xbfd10d0d7b6b6b73;
+  *((unsigned long *)&__m128i_result[0]) = 0xc5c534920000c4ed;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x000aa822a79308f6;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000000084d12ce;
+  *((unsigned long *)&__m128i_op1[1]) = 0x000aa822a79308f6;
+  *((unsigned long *)&__m128i_op1[0]) = 0x03aa558e1d37b5a1;
+  *((unsigned long *)&__m128i_result[1]) = 0x00155044ffffffff;
+  *((unsigned long *)&__m128i_result[0]) = 0x03aa558e2584c86f;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x021b7d24c9678a35;
+  *((unsigned long *)&__m128i_op0[0]) = 0x030298a6a1030a49;
+  *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
+  *((unsigned long *)&__m128i_result[0]) = 0x030298a6a1030a49;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x00007a8000000480;
+  *((unsigned long *)&__m128i_op0[0]) = 0x00000485000004cc;
+  *((unsigned long *)&__m128i_op1[1]) = 0x00007a8000000480;
+  *((unsigned long *)&__m128i_op1[0]) = 0x00000485000004cc;
+  *((unsigned long *)&__m128i_result[1]) = 0x0000f50000000900;
+  *((unsigned long *)&__m128i_result[0]) = 0x0000090a00000998;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
+  *((unsigned long *)&__m128i_op1[1]) = 0x004eff6200d2ff76;
+  *((unsigned long *)&__m128i_op1[0]) = 0xff70002800be00a0;
+  *((unsigned long *)&__m128i_result[1]) = 0x004eff6200d2ff76;
+  *((unsigned long *)&__m128i_result[0]) = 0xff70002800be00a0;
+  __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
+  ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
+
+  return 0;
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
+++ b/LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
--- a/LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
+++ b/LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
--- a/LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
+++ b/LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
--- a/LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
+++ b/LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
--- a/LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
--- a/LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
--- a/LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
--- a/LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
--- a/LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
--- a/LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
--- a/LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
--- a/LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
--- a/LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
+++ b/LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
--- a/LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
+++ b/LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
--- a/LoongArch-Add-tests-of-mstrict-align-option.patch
+++ b/LoongArch-Add-tests-of-mstrict-align-option.patch
@ -0,0 +1,37 @@
+From f07b91862055533d779fbf76c12cb7c0ae75b53d Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 09:35:24 +0800
+Subject: [PATCH 076/124] LoongArch: Add tests of -mstrict-align option.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/strict-align.c: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c
+new file mode 100644
+index 000000000..040d84958
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
+@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
+
+void
+foo (float *restrict x, float *restrict y)
+{
+  x[0] = x[0] + y[0];
+  x[1] = x[1] + y[1];
+  x[2] = x[2] + y[2];
+  x[3] = x[3] + y[3];
+}
+-- 
+2.33.0
+
--- a/LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
+++ b/LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
@ -0,0 +1,131 @@
+From aebd03c944312be767f03d129eeebc0c4cdf5b4a Mon Sep 17 00:00:00 2001
+From: Xiaolong Chen <chenxiaolong@loongson.cn>
+Date: Mon, 11 Sep 2023 09:36:35 +0800
+Subject: [PATCH 077/124] LoongArch: Add testsuite framework for Loongson
+ SX/ASX.
+
+gcc/testsuite/ChangeLog:
+
+	* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
+	* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
+
+Signed-off-by: Peng Fan <fanpeng@loongson.cn>
+Signed-off-by: ticat_fp <fanpeng@loongson.cn>
+---
+ .../loongarch/vector/loongarch-vector.exp     | 42 +++++++++++++++
+ .../loongarch/vector/simd_correctness_check.h | 54 +++++++++++++++++++
+ 2 files changed, 96 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+ create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+new file mode 100644
+index 000000000..2c37aa91d
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
+@@ -0,0 +1,42 @@
+#Copyright(C) 2023 Free Software Foundation, Inc.
+
+#This program is free software; you can redistribute it and / or modify
+#it under the terms of the GNU General Public License as published by
+#the Free Software Foundation; either version 3 of the License, or
+#(at your option) any later version.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with GCC; see the file COPYING3.If not see
+# <http: //www.gnu.org/licenses/>.
+
+#GCC testsuite that uses the `dg.exp' driver.
+
+#Exit immediately if this isn't a LoongArch target.
+if ![istarget loongarch*-*-*] then {
+    return
+}
+
+#Load support procs.
+load_lib gcc-dg.exp
+
+#If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " "
+}
+
+#Initialize `dg'.
+dg-init
+
+#Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
+	" -mlsx" $DEFAULT_CFLAGS
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
+	" -mlasx" $DEFAULT_CFLAGS
+# All done.
+dg-finish
+diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+new file mode 100644
+index 000000000..eb7fbd59c
+--- /dev/null
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
+@@ -0,0 +1,54 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define ASSERTEQ_64(line, ref, res)                                           \
+  do                                                                          \
+    {                                                                         \
+      int fail = 0;                                                           \
+      for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
+        {                                                                     \
+          long *temp_ref = &ref[i], *temp_res = &res[i];                      \
+          if (abs (*temp_ref - *temp_res) > 0)                                \
+            {                                                                 \
+              printf (" error: %s at line %ld , expected " #ref               \
+                      "[%ld]:0x%lx, got: 0x%lx\n",                            \
+                      __FILE__, line, i, *temp_ref, *temp_res);               \
+              fail = 1;                                                       \
+            }                                                                 \
+        }                                                                     \
+      if (fail == 1)                                                          \
+        abort ();                                                             \
+    }                                                                         \
+  while (0)
+
+#define ASSERTEQ_32(line, ref, res)                                           \
+  do                                                                          \
+    {                                                                         \
+      int fail = 0;                                                           \
+      for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i)             \
+        {                                                                     \
+          int *temp_ref = &ref[i], *temp_res = &res[i];                       \
+          if (abs (*temp_ref - *temp_res) > 0)                                \
+            {                                                                 \
+              printf (" error: %s at line %ld , expected " #ref               \
+                      "[%ld]:0x%x, got: 0x%x\n",                              \
+                      __FILE__, line, i, *temp_ref, *temp_res);               \
+              fail = 1;                                                       \
+            }                                                                 \
+        }                                                                     \
+      if (fail == 1)                                                          \
+        abort ();                                                             \
+    }                                                                         \
+  while (0)
+
+#define ASSERTEQ_int(line, ref, res)                                          \
+  do                                                                          \
+    {                                                                         \
+      if (ref != res)                                                         \
+        {                                                                     \
+          printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
+                  line, ref, res);                                            \
+        }                                                                     \
+    }                                                                         \
+  while (0)
+-- 
+2.33.0
+
--- a/Show More
+++ b/Show More