!436 [Sync]Sync from openEuler-24.03-LTS
From: @jubo-run Reviewed-by: @huang-xiaoquan Signed-off-by: @huang-xiaoquan
This commit is contained in:
commit
14054239e6
1191
0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
1191
0029-Struct-Reorg-Add-Safe-Structure-Pointer-Compression.patch
Normal file
File diff suppressed because it is too large
Load Diff
1232
0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
Normal file
1232
0030-Struct-Reorg-Add-unsafe-structure-pointer-compressio.patch
Normal file
File diff suppressed because it is too large
Load Diff
550
0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
550
0031-AutoBOLT-Support-saving-feedback-count-info-to-ELF-s.patch
Normal file
@ -0,0 +1,550 @@
|
||||
From 72531376df5ed93c2d945469368ba5514eca8407 Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||
Date: Tue, 5 Dec 2023 15:33:08 +0800
|
||||
Subject: [PATCH] [AutoBOLT] Support saving feedback count info to ELF segment
|
||||
1/3
|
||||
|
||||
---
|
||||
gcc/common.opt | 8 +
|
||||
gcc/final.cc | 405 ++++++++++++++++++++++++++++++++++++++++++++++++-
|
||||
gcc/opts.cc | 61 ++++++++
|
||||
3 files changed, 473 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index b01df919e..e69947fc2 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2546,6 +2546,14 @@ freorder-functions
|
||||
Common Var(flag_reorder_functions) Optimization
|
||||
Reorder functions to improve code placement.
|
||||
|
||||
+fauto-bolt
|
||||
+Common Var(flag_auto_bolt)
|
||||
+Generate profile from AutoFDO or PGO and do BOLT optimization after linkage.
|
||||
+
|
||||
+fauto-bolt=
|
||||
+Common Joined RejectNegative
|
||||
+Specify the feedback data directory required by BOLT-plugin. The default is the current directory.
|
||||
+
|
||||
frerun-cse-after-loop
|
||||
Common Var(flag_rerun_cse_after_loop) Optimization
|
||||
Add a common subexpression elimination pass after loop optimizations.
|
||||
diff --git a/gcc/final.cc b/gcc/final.cc
|
||||
index a9868861b..d4c4fa08f 100644
|
||||
--- a/gcc/final.cc
|
||||
+++ b/gcc/final.cc
|
||||
@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "rtl-iter.h"
|
||||
#include "print-rtl.h"
|
||||
#include "function-abi.h"
|
||||
+#include "insn-codes.h"
|
||||
#include "common/common-target.h"
|
||||
|
||||
#ifdef XCOFF_DEBUGGING_INFO
|
||||
@@ -4266,7 +4267,403 @@ leaf_renumber_regs_insn (rtx in_rtx)
|
||||
}
|
||||
}
|
||||
#endif
|
||||
-
|
||||
+
|
||||
+#define ASM_FDO_SECTION_PREFIX ".text.fdo."
|
||||
+
|
||||
+#define ASM_FDO_CALLER_FLAG ".fdo.caller "
|
||||
+#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
|
||||
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
|
||||
+
|
||||
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
|
||||
+
|
||||
+/* Return the relative offset address of the start instruction of BB,
|
||||
+ return -1 if it is empty instruction. */
|
||||
+
|
||||
+static int
|
||||
+get_bb_start_addr (basic_block bb)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ /* The jump target of call is not in this function, so
|
||||
+ it should be excluded. */
|
||||
+ if (CALL_P (insn))
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+
|
||||
+ int insn_code = recog_memoized (insn);
|
||||
+
|
||||
+ /* The instruction NOP in llvm-bolt belongs to the previous
|
||||
+ BB, so it needs to be skipped. */
|
||||
+ if (insn_code != CODE_FOR_nop)
|
||||
+ {
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+ }
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the relative offet address of the end instruction of BB,
|
||||
+ return -1 if it is empty or call instruction. */
|
||||
+
|
||||
+static int
|
||||
+get_bb_end_addr (basic_block bb)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ int num_succs = EDGE_COUNT (bb->succs);
|
||||
+ FOR_BB_INSNS_REVERSE (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ /* The jump target of call is not in this function, so
|
||||
+ it should be excluded. */
|
||||
+ if (CALL_P (insn))
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ if ((num_succs == 1)
|
||||
+ || ((num_succs == 2) && any_condjump_p (insn)))
|
||||
+ {
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ }
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the end address of cfun. */
|
||||
+
|
||||
+static int
|
||||
+get_function_end_addr ()
|
||||
+{
|
||||
+ rtx_insn *insn = get_last_insn ();
|
||||
+ for (; insn != get_insns (); insn = PREV_INSN (insn))
|
||||
+ {
|
||||
+ if (!INSN_P (insn))
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ return INSN_ADDRESSES (INSN_UID (insn));
|
||||
+ }
|
||||
+
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
+/* Return the function profile status string. */
|
||||
+
|
||||
+static const char *
|
||||
+get_function_profile_status ()
|
||||
+{
|
||||
+ const char *profile_status[] = {
|
||||
+ "PROFILE_ABSENT",
|
||||
+ "PROFILE_GUESSED",
|
||||
+ "PROFILE_READ",
|
||||
+ "PROFILE_LAST" /* Last value, used by profile streaming. */
|
||||
+ };
|
||||
+
|
||||
+ return profile_status[profile_status_for_fn (cfun)];
|
||||
+}
|
||||
+
|
||||
+/* Return the count from the feedback data, such as PGO or ADDO. */
|
||||
+
|
||||
+inline static gcov_type
|
||||
+get_fdo_count (profile_count count)
|
||||
+{
|
||||
+ return count.quality () >= GUESSED
|
||||
+ ? count.to_gcov_type () : 0;
|
||||
+}
|
||||
+
|
||||
+/* Return the profile quality string. */
|
||||
+
|
||||
+static const char *
|
||||
+get_fdo_count_quality (profile_count count)
|
||||
+{
|
||||
+ const char *profile_quality[] = {
|
||||
+ "UNINITIALIZED_PROFILE",
|
||||
+ "GUESSED_LOCAL",
|
||||
+ "GUESSED_GLOBAL0",
|
||||
+ "GUESSED_GLOBAL0_ADJUSTED",
|
||||
+ "GUESSED",
|
||||
+ "AFDO",
|
||||
+ "ADJUSTED",
|
||||
+ "PRECISE"
|
||||
+ };
|
||||
+
|
||||
+ return profile_quality[count.quality ()];
|
||||
+}
|
||||
+
|
||||
+static const char *
|
||||
+alias_local_functions (const char *fnname)
|
||||
+{
|
||||
+ if (TREE_PUBLIC (cfun->decl))
|
||||
+ {
|
||||
+ return fnname;
|
||||
+ }
|
||||
+ return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
||||
+}
|
||||
+
|
||||
+/* Return function bind type string. */
|
||||
+
|
||||
+static const char *
|
||||
+simple_get_function_bind ()
|
||||
+{
|
||||
+ const char *function_bind[] = {
|
||||
+ "GLOBAL",
|
||||
+ "WEAK",
|
||||
+ "LOCAL",
|
||||
+ "UNKNOWN"
|
||||
+ };
|
||||
+
|
||||
+ if (TREE_PUBLIC (cfun->decl))
|
||||
+ {
|
||||
+ if (!(DECL_WEAK (cfun->decl)))
|
||||
+ {
|
||||
+ return function_bind[0];
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return function_bind[1];
|
||||
+ }
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ return function_bind[2];
|
||||
+ }
|
||||
+
|
||||
+ return function_bind[3];
|
||||
+}
|
||||
+
|
||||
+/* Dumo the callee functions insn in bb by CALL_P (insn). */
|
||||
+
|
||||
+static void
|
||||
+dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
||||
+{
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (insn && CALL_P (insn))
|
||||
+ {
|
||||
+ tree callee = get_call_fndecl (insn);
|
||||
+
|
||||
+ if (callee)
|
||||
+ {
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n",
|
||||
+ INSN_ADDRESSES (INSN_UID (insn)));
|
||||
+
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLEE_FLAG,
|
||||
+ alias_local_functions (get_fnname_from_decl (callee)));
|
||||
+
|
||||
+ fprintf (asm_out_file,
|
||||
+ "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ call_count);
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "call: %x --> %s \n",
|
||||
+ INSN_ADDRESSES (INSN_UID (insn)),
|
||||
+ alias_local_functions
|
||||
+ (get_fnname_from_decl (callee)));
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the edge info into asm. */
|
||||
+static int
|
||||
+dump_edge_jump_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||
+{
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+ gcov_type edge_total_count = 0;
|
||||
+
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ gcov_type edge_count = get_fdo_count (e->count ());
|
||||
+ edge_total_count += edge_count;
|
||||
+
|
||||
+ int edge_start_addr = get_bb_end_addr (e->src);
|
||||
+ int edge_end_addr = get_bb_start_addr(e->dest);
|
||||
+
|
||||
+ if (edge_start_addr == -1 || edge_end_addr == -1)
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ /* This is a reserved assert for the original design. If this
|
||||
+ assert is found, use the address of the previous instruction
|
||||
+ as edge_start_addr. */
|
||||
+ gcc_assert (edge_start_addr != edge_end_addr);
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "edge: %x --> %x = (%ld)\n",
|
||||
+ edge_start_addr, edge_end_addr, edge_count);
|
||||
+ }
|
||||
+
|
||||
+ if (edge_count > 0)
|
||||
+ {
|
||||
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_start_addr);
|
||||
+ fprintf(asm_out_file, "\t.string \"%x\"\n", edge_end_addr);
|
||||
+ fprintf(asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ edge_count);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ gcov_type call_count = MAX (edge_total_count, bb_count);
|
||||
+ if (call_count > 0)
|
||||
+ {
|
||||
+ dump_direct_callee_info_to_asm (bb, call_count);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the bb info into asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||
+{
|
||||
+ int bb_start_addr = get_bb_start_addr (bb);
|
||||
+ if (bb_start_addr != -1)
|
||||
+ {
|
||||
+ fprintf (asm_out_file, "\t.string \"%x\"\n", bb_start_addr);
|
||||
+ fprintf (asm_out_file, "\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
+ bb_count);
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump the function info into asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_function_info_to_asm (const char *fnname)
|
||||
+{
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
||||
+ ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
||||
+ fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
+ ASM_FDO_CALLER_BIND_FLAG, simple_get_function_bind ());
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
||||
+ alias_local_functions (fnname));
|
||||
+ fprintf (dump_file, " file: %s\n",
|
||||
+ dump_base_name);
|
||||
+ fprintf (dump_file, "profile_status: %s\n",
|
||||
+ get_function_profile_status ());
|
||||
+ fprintf (dump_file, " size: %x\n",
|
||||
+ get_function_end_addr ());
|
||||
+ fprintf (dump_file, " function_bind: %s\n",
|
||||
+ simple_get_function_bind ());
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* Dump function profile into form AutoFDO or PGO to asm. */
|
||||
+
|
||||
+static void
|
||||
+dump_fdo_info_to_asm (const char *fnname)
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+
|
||||
+ dump_function_info_to_asm (fnname);
|
||||
+
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ gcov_type bb_count = get_fdo_count (bb->count);
|
||||
+ if (bb_count == 0)
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "BB: %x --> %x = (%ld) [%s]\n",
|
||||
+ get_bb_start_addr (bb), get_bb_end_addr (bb),
|
||||
+ bb_count, get_fdo_count_quality (bb->count));
|
||||
+ }
|
||||
+
|
||||
+ if (flag_profile_use)
|
||||
+ {
|
||||
+ dump_edge_jump_info_to_asm (bb, bb_count);
|
||||
+ }
|
||||
+ else if (flag_auto_profile)
|
||||
+ {
|
||||
+ dump_bb_info_to_asm (bb, bb_count);
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* When -fauto-bolt option is turnded on, the .text.fdo section
|
||||
+ will be generated in the *.s file if there is feedback information
|
||||
+ from PGO or AutoFDO. This section will parserd in BOLT-plugin. */
|
||||
+
|
||||
+static void
|
||||
+dump_profile_to_elf_sections ()
|
||||
+{
|
||||
+ if (!flag_function_sections)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should work with -ffunction-section");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (!flag_ipa_ra)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should work with -fipa-ra");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_jumps)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not supported with -falign-jumps");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_labels)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not spported with -falign-loops");
|
||||
+ return;
|
||||
+ }
|
||||
+ if (flag_align_loops)
|
||||
+ {
|
||||
+ error ("-fauto-bolt is not supported with -falign-loops");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Return if no feedback data. */
|
||||
+ if (!flag_profile_use && !flag_auto_profile)
|
||||
+ {
|
||||
+ error ("-fauto-bolt should use with -profile-use or -fauto-profile");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Avoid empty functions. */
|
||||
+ if (TREE_CODE (cfun->decl) != FUNCTION_DECL)
|
||||
+ {
|
||||
+ return;
|
||||
+ }
|
||||
+ int flags = SECTION_DEBUG | SECTION_EXCLUDE;
|
||||
+ const char *fnname = get_fnname_from_decl (current_function_decl);
|
||||
+ char *profile_fnname = NULL;
|
||||
+
|
||||
+ asprintf (&profile_fnname, "%s%s", ASM_FDO_SECTION_PREFIX, fnname);
|
||||
+ switch_to_section (get_section (profile_fnname, flags, NULL));
|
||||
+ dump_fdo_info_to_asm (fnname);
|
||||
+
|
||||
+ if (profile_fnname)
|
||||
+ {
|
||||
+ free (profile_fnname);
|
||||
+ profile_fnname = NULL;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Turn the RTL into assembly. */
|
||||
static unsigned int
|
||||
rest_of_handle_final (void)
|
||||
@@ -4334,6 +4731,12 @@ rest_of_handle_final (void)
|
||||
targetm.asm_out.destructor (XEXP (DECL_RTL (current_function_decl), 0),
|
||||
decl_fini_priority_lookup
|
||||
(current_function_decl));
|
||||
+
|
||||
+ if (flag_auto_bolt)
|
||||
+ {
|
||||
+ dump_profile_to_elf_sections ();
|
||||
+ }
|
||||
+
|
||||
return 0;
|
||||
}
|
||||
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index b868d189e..6d57e7d69 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -1279,6 +1279,10 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||
if (opts->x_flag_vtable_verify && opts->x_flag_lto)
|
||||
sorry ("vtable verification is not supported with LTO");
|
||||
|
||||
+ /* Currently -fauto-bolt is not supported for LTO. */
|
||||
+ if (opts->x_flag_auto_bolt && opts->x_flag_lto)
|
||||
+ sorry ("%<-fauto-bolt%> is not supported with LTO");
|
||||
+
|
||||
/* Control IPA optimizations based on different -flive-patching level. */
|
||||
if (opts->x_flag_live_patching)
|
||||
control_options_for_live_patching (opts, opts_set,
|
||||
@@ -1291,6 +1295,58 @@ finish_options (struct gcc_options *opts, struct gcc_options *opts_set,
|
||||
= (opts->x_flag_unroll_loops
|
||||
|| opts->x_flag_peel_loops
|
||||
|| opts->x_optimize >= 3);
|
||||
+
|
||||
+ if (opts->x_flag_auto_bolt)
|
||||
+ {
|
||||
+ /* Record the function section to facilitate the feedback
|
||||
+ data storage. */
|
||||
+ if (!opts->x_flag_function_sections)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should work with %<-ffunction-sections%>,"
|
||||
+ " enabling %<-ffunction-sections%>");
|
||||
+ opts->x_flag_function_sections = true;
|
||||
+ }
|
||||
+
|
||||
+ /* Cancel the internal alignment of the function. The binary
|
||||
+ optimizer bolt will cancel the internal alignment optimization
|
||||
+ of the function, so the alignment is meaningless at this time,
|
||||
+ and if not, it will bring trouble to the calculation of the
|
||||
+ offset address of the instruction. */
|
||||
+ if (opts->x_flag_align_jumps)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-jumps%>,"
|
||||
+ " disabling %<-falign-jumps%>");
|
||||
+ opts->x_flag_align_jumps = false;
|
||||
+ }
|
||||
+
|
||||
+ if (opts->x_flag_align_labels)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-labels%>,"
|
||||
+ " disabling %<-falign-labels%>");
|
||||
+ opts->x_flag_align_labels = false;
|
||||
+ }
|
||||
+
|
||||
+ if (opts->x_flag_align_loops)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should not work with %<-falign-loops%>,"
|
||||
+ " disabling %<-falign-loops%>");
|
||||
+ opts->x_flag_align_loops = false;
|
||||
+ }
|
||||
+
|
||||
+ /* When parsing instructions in RTL phase, we need to know
|
||||
+ the call information of instructions to avoid being optimized. */
|
||||
+ if (!opts->x_flag_ipa_ra)
|
||||
+ {
|
||||
+ inform (loc,
|
||||
+ "%<-fauto-bolt%> should work with %<-fipa-ra%>,"
|
||||
+ " enabling %<-fipa-ra%>");
|
||||
+ opts->x_flag_ipa_ra = true;
|
||||
+ }
|
||||
+ }
|
||||
|
||||
/* With -fcx-limited-range, we do cheap and quick complex arithmetic. */
|
||||
if (opts->x_flag_cx_limited_range)
|
||||
@@ -3226,6 +3282,11 @@ common_handle_option (struct gcc_options *opts,
|
||||
&opts->x_flag_align_functions,
|
||||
&opts->x_str_align_functions);
|
||||
break;
|
||||
+
|
||||
+ case OPT_fauto_bolt_:
|
||||
+ case OPT_fauto_bolt:
|
||||
+ /* Deferred. */
|
||||
+ break;
|
||||
|
||||
case OPT_ftabstop_:
|
||||
/* It is documented that we silently ignore silly values. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
34094
0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
34094
0032-AutoBOLT-Add-bolt-linker-plugin-2-3.patch
Normal file
File diff suppressed because it is too large
Load Diff
345
0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
345
0033-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
Normal file
@ -0,0 +1,345 @@
|
||||
From 94242286383a80e6ab83d824a4d7ea23ea311f75 Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||
Date: Mon, 22 Jan 2024 15:38:24 +0800
|
||||
Subject: [PATCH] [AutoBOLT] Enable BOLT linker plugin on aarch64 3/3
|
||||
|
||||
---
|
||||
Makefile.def | 10 ++++++++++
|
||||
configure | 27 ++++++++++++++++++++++++++-
|
||||
configure.ac | 22 +++++++++++++++++++++-
|
||||
gcc/config.host | 1 +
|
||||
gcc/config.in | 13 +++++++++++++
|
||||
gcc/configure | 10 ++++++++--
|
||||
gcc/configure.ac | 4 ++++
|
||||
gcc/gcc.cc | 23 +++++++++++++++++++++++
|
||||
8 files changed, 106 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/Makefile.def b/Makefile.def
|
||||
index 72d585496..0ba868890 100644
|
||||
--- a/Makefile.def
|
||||
+++ b/Makefile.def
|
||||
@@ -145,6 +145,9 @@ host_modules= { module= gnattools; };
|
||||
host_modules= { module= lto-plugin; bootstrap=true;
|
||||
extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
|
||||
extra_make_flags='@extra_linker_plugin_flags@'; };
|
||||
+host_modules= { module= bolt-plugin; bootstrap=true;
|
||||
+ extra_configure_flags='--enable-shared @extra_linker_plugin_flags@ @extra_linker_plugin_configure_flags@';
|
||||
+ extra_make_flags='@extra_linker_plugin_flags@'; };
|
||||
host_modules= { module= libcc1; extra_configure_flags=--enable-shared; };
|
||||
host_modules= { module= gotools; };
|
||||
host_modules= { module= libctf; bootstrap=true; };
|
||||
@@ -349,6 +352,7 @@ dependencies = { module=configure-gcc; on=all-mpfr; };
|
||||
dependencies = { module=configure-gcc; on=all-mpc; };
|
||||
dependencies = { module=configure-gcc; on=all-isl; };
|
||||
dependencies = { module=configure-gcc; on=all-lto-plugin; };
|
||||
+dependencies = { module=configure-gcc; on=all-bolt-plugin; };
|
||||
dependencies = { module=configure-gcc; on=all-binutils; };
|
||||
dependencies = { module=configure-gcc; on=all-gas; };
|
||||
dependencies = { module=configure-gcc; on=all-ld; };
|
||||
@@ -374,6 +378,7 @@ dependencies = { module=all-gcc; on=all-libdecnumber; hard=true; };
|
||||
dependencies = { module=all-gcc; on=all-libiberty; };
|
||||
dependencies = { module=all-gcc; on=all-fixincludes; };
|
||||
dependencies = { module=all-gcc; on=all-lto-plugin; };
|
||||
+dependencies = { module=all-gcc; on=all-bolt-plugin; };
|
||||
dependencies = { module=all-gcc; on=all-libiconv; };
|
||||
dependencies = { module=info-gcc; on=all-build-libiberty; };
|
||||
dependencies = { module=dvi-gcc; on=all-build-libiberty; };
|
||||
@@ -381,8 +386,10 @@ dependencies = { module=pdf-gcc; on=all-build-libiberty; };
|
||||
dependencies = { module=html-gcc; on=all-build-libiberty; };
|
||||
dependencies = { module=install-gcc ; on=install-fixincludes; };
|
||||
dependencies = { module=install-gcc ; on=install-lto-plugin; };
|
||||
+dependencies = { module=install-gcc ; on=install-bolt-plugin; };
|
||||
dependencies = { module=install-strip-gcc ; on=install-strip-fixincludes; };
|
||||
dependencies = { module=install-strip-gcc ; on=install-strip-lto-plugin; };
|
||||
+dependencies = { module=install-strip-gcc ; on=install-strip-bolt-plugin; };
|
||||
|
||||
dependencies = { module=configure-libcpp; on=configure-libiberty; hard=true; };
|
||||
dependencies = { module=configure-libcpp; on=configure-intl; };
|
||||
@@ -401,6 +408,9 @@ dependencies = { module=all-gnattools; on=all-target-libstdc++-v3; };
|
||||
dependencies = { module=all-lto-plugin; on=all-libiberty; };
|
||||
dependencies = { module=all-lto-plugin; on=all-libiberty-linker-plugin; };
|
||||
|
||||
+dependencies = { module=all-bolt-plugin; on=all-libiberty; };
|
||||
+dependencies = { module=all-bolt-plugin; on=all-libiberty-linker-plugin; };
|
||||
+
|
||||
dependencies = { module=configure-libcc1; on=configure-gcc; };
|
||||
dependencies = { module=all-libcc1; on=all-gcc; };
|
||||
|
||||
diff --git a/configure b/configure
|
||||
index 5dcaab14a..aff62c464 100755
|
||||
--- a/configure
|
||||
+++ b/configure
|
||||
@@ -826,6 +826,7 @@ with_isl
|
||||
with_isl_include
|
||||
with_isl_lib
|
||||
enable_isl_version_check
|
||||
+enable_bolt
|
||||
enable_lto
|
||||
enable_linker_plugin_configure_flags
|
||||
enable_linker_plugin_flags
|
||||
@@ -1550,6 +1551,7 @@ Optional Features:
|
||||
enable the PGO build
|
||||
--disable-isl-version-check
|
||||
disable check for isl version
|
||||
+ --enable-bolt enable bolt optimization support
|
||||
--enable-lto enable link time optimization support
|
||||
--enable-linker-plugin-configure-flags=FLAGS
|
||||
additional flags for configuring linker plugins
|
||||
@@ -8564,6 +8566,15 @@ fi
|
||||
|
||||
|
||||
|
||||
+# Check for BOLT support.
|
||||
+# Check whether --enable-bolt was given.
|
||||
+if test "${enable_bolt+set}" = set; then :
|
||||
+ enableval=$enable_bolt; enable_bolt=$enableval
|
||||
+else
|
||||
+ enable_bolt=no; default_enable_bolt=no
|
||||
+fi
|
||||
+
|
||||
+
|
||||
# Check for LTO support.
|
||||
# Check whether --enable-lto was given.
|
||||
if test "${enable_lto+set}" = set; then :
|
||||
@@ -8593,6 +8604,16 @@ if test $target_elf = yes; then :
|
||||
# ELF platforms build the lto-plugin always.
|
||||
build_lto_plugin=yes
|
||||
|
||||
+ # ELF platforms can build the bolt-plugin.
|
||||
+ # NOT BUILD BOLT BY DEFAULT.
|
||||
+ case $target in
|
||||
+ aarch64*-*-linux*)
|
||||
+ if test $enable_bolt = yes; then :
|
||||
+ build_bolt_plugin=yes
|
||||
+ fi
|
||||
+ ;;
|
||||
+ esac
|
||||
+
|
||||
else
|
||||
if test x"$default_enable_lto" = x"yes" ; then
|
||||
case $target in
|
||||
@@ -8780,6 +8801,10 @@ if test -d ${srcdir}/gcc; then
|
||||
fi
|
||||
fi
|
||||
|
||||
+ if test "${build_bolt_plugin}" = "yes" ; then
|
||||
+ configdirs="$configdirs bolt-plugin"
|
||||
+ fi
|
||||
+
|
||||
# If we're building an offloading compiler, add the LTO front end.
|
||||
if test x"$enable_as_accelerator_for" != x ; then
|
||||
case ,${enable_languages}, in
|
||||
@@ -9202,7 +9227,7 @@ fi
|
||||
extra_host_libiberty_configure_flags=
|
||||
extra_host_zlib_configure_flags=
|
||||
case " $configdirs " in
|
||||
- *" lto-plugin "* | *" libcc1 "*)
|
||||
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
|
||||
# When these are to be built as shared libraries, the same applies to
|
||||
# libiberty.
|
||||
extra_host_libiberty_configure_flags=--enable-shared
|
||||
diff --git a/configure.ac b/configure.ac
|
||||
index 85977482a..f310d75ca 100644
|
||||
--- a/configure.ac
|
||||
+++ b/configure.ac
|
||||
@@ -1863,6 +1863,12 @@ fi
|
||||
AC_SUBST(isllibs)
|
||||
AC_SUBST(islinc)
|
||||
|
||||
+# Check for BOLT support.
|
||||
+AC_ARG_ENABLE(bolt,
|
||||
+[AS_HELP_STRING([--enable-bolt], [enable bolt optimization support])],
|
||||
+enable_bolt=$enableval,
|
||||
+enable_bolt=no; default_enable_bolt=no)
|
||||
+
|
||||
# Check for LTO support.
|
||||
AC_ARG_ENABLE(lto,
|
||||
[AS_HELP_STRING([--enable-lto], [enable link time optimization support])],
|
||||
@@ -1871,6 +1877,16 @@ enable_lto=yes; default_enable_lto=yes)
|
||||
|
||||
ACX_ELF_TARGET_IFELSE([# ELF platforms build the lto-plugin always.
|
||||
build_lto_plugin=yes
|
||||
+
|
||||
+ # ELF platforms can build the bolt-plugin.
|
||||
+ # NOT BUILD BOLT BY DEFAULT.
|
||||
+ case $target in
|
||||
+ aarch64*-*-linux*)
|
||||
+ if test $enable_bolt = yes; then :
|
||||
+ build_bolt_plugin=yes
|
||||
+ fi
|
||||
+ ;;
|
||||
+ esac
|
||||
],[if test x"$default_enable_lto" = x"yes" ; then
|
||||
case $target in
|
||||
*-apple-darwin[[912]]* | *-cygwin* | *-mingw* | *djgpp*) ;;
|
||||
@@ -2049,6 +2065,10 @@ if test -d ${srcdir}/gcc; then
|
||||
fi
|
||||
fi
|
||||
|
||||
+ if test "${build_bolt_plugin}" = "yes" ; then
|
||||
+ configdirs="$configdirs bolt-plugin"
|
||||
+ fi
|
||||
+
|
||||
# If we're building an offloading compiler, add the LTO front end.
|
||||
if test x"$enable_as_accelerator_for" != x ; then
|
||||
case ,${enable_languages}, in
|
||||
@@ -2457,7 +2477,7 @@ fi
|
||||
extra_host_libiberty_configure_flags=
|
||||
extra_host_zlib_configure_flags=
|
||||
case " $configdirs " in
|
||||
- *" lto-plugin "* | *" libcc1 "*)
|
||||
+ *" lto-plugin "* | *" libcc1 "* | *" bolt-plugin "*)
|
||||
# When these are to be built as shared libraries, the same applies to
|
||||
# libiberty.
|
||||
extra_host_libiberty_configure_flags=--enable-shared
|
||||
diff --git a/gcc/config.host b/gcc/config.host
|
||||
index 4ca300f11..bf7dcb4cc 100644
|
||||
--- a/gcc/config.host
|
||||
+++ b/gcc/config.host
|
||||
@@ -75,6 +75,7 @@ out_host_hook_obj=host-default.o
|
||||
host_can_use_collect2=yes
|
||||
use_long_long_for_widest_fast_int=no
|
||||
host_lto_plugin_soname=liblto_plugin.so
|
||||
+host_bolt_plugin_soname=libbolt_plugin.so
|
||||
|
||||
# Unsupported hosts list. Generally, only include hosts known to fail here,
|
||||
# since we allow hosts not listed to be supported generically.
|
||||
diff --git a/gcc/config.in b/gcc/config.in
|
||||
index 64c27c9cf..6bb25b25b 100644
|
||||
--- a/gcc/config.in
|
||||
+++ b/gcc/config.in
|
||||
@@ -24,6 +24,13 @@
|
||||
#endif
|
||||
|
||||
|
||||
+/* Define to the name of the BOLT plugin DSO that must be passed to the
|
||||
+ linker's -plugin=LIB option. */
|
||||
+#ifndef USED_FOR_TARGET
|
||||
+#undef BOLTPLUGINSONAME
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
/* Define to the root for URLs about GCC changes. */
|
||||
#ifndef USED_FOR_TARGET
|
||||
#undef CHANGES_ROOT_URL
|
||||
@@ -2208,6 +2215,12 @@
|
||||
#endif
|
||||
|
||||
|
||||
+/* Define which stat syscall is able to handle 64bit indodes. */
|
||||
+#ifndef USED_FOR_TARGET
|
||||
+#undef HOST_STAT_FOR_64BIT_INODES
|
||||
+#endif
|
||||
+
|
||||
+
|
||||
/* Define as const if the declaration of iconv() needs const. */
|
||||
#ifndef USED_FOR_TARGET
|
||||
#undef ICONV_CONST
|
||||
diff --git a/gcc/configure b/gcc/configure
|
||||
index 98bbf0f85..30f386789 100755
|
||||
--- a/gcc/configure
|
||||
+++ b/gcc/configure
|
||||
@@ -13578,6 +13578,12 @@ case $use_collect2 in
|
||||
esac
|
||||
|
||||
|
||||
+cat >>confdefs.h <<_ACEOF
|
||||
+#define BOLTPLUGINSONAME "${host_bolt_plugin_soname}"
|
||||
+_ACEOF
|
||||
+
|
||||
+
|
||||
+
|
||||
cat >>confdefs.h <<_ACEOF
|
||||
#define LTOPLUGINSONAME "${host_lto_plugin_soname}"
|
||||
_ACEOF
|
||||
@@ -19668,7 +19674,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<_LT_EOF
|
||||
-#line 19671 "configure"
|
||||
+#line 19677 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
@@ -19774,7 +19780,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<_LT_EOF
|
||||
-#line 19777 "configure"
|
||||
+#line 19783 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
diff --git a/gcc/configure.ac b/gcc/configure.ac
|
||||
index c74f4b555..dd6cd60f8 100644
|
||||
--- a/gcc/configure.ac
|
||||
+++ b/gcc/configure.ac
|
||||
@@ -2531,6 +2531,10 @@ case $use_collect2 in
|
||||
;;
|
||||
esac
|
||||
|
||||
+AC_DEFINE_UNQUOTED(BOLTPLUGINSONAME,"${host_bolt_plugin_soname}",
|
||||
+[Define to the name of the BOLT plugin DSO that must be
|
||||
+ passed to the linker's -plugin=LIB option.])
|
||||
+
|
||||
AC_DEFINE_UNQUOTED(LTOPLUGINSONAME,"${host_lto_plugin_soname}",
|
||||
[Define to the name of the LTO plugin DSO that must be
|
||||
passed to the linker's -plugin=LIB option.])
|
||||
diff --git a/gcc/gcc.cc b/gcc/gcc.cc
|
||||
index fbcc9d033..b0d03430e 100644
|
||||
--- a/gcc/gcc.cc
|
||||
+++ b/gcc/gcc.cc
|
||||
@@ -1156,6 +1156,8 @@ proper position among the other output files. */
|
||||
%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
|
||||
%(linker) " \
|
||||
LINK_PLUGIN_SPEC \
|
||||
+ "%{fauto-bolt|fauto-bolt=*|fbolt-use|fbolt-use=*: \
|
||||
+ -plugin %(linker_auto_bolt_plugin_file) }"\
|
||||
"%{flto|flto=*:%<fcompare-debug*} \
|
||||
%{flto} %{fno-lto} %{flto=*} %l " LINK_PIE_SPEC \
|
||||
"%{fuse-ld=*:-fuse-ld=%*} " LINK_COMPRESS_DEBUG_SPEC \
|
||||
@@ -1210,6 +1212,7 @@ static const char *endfile_spec = ENDFILE_SPEC;
|
||||
static const char *startfile_spec = STARTFILE_SPEC;
|
||||
static const char *linker_name_spec = LINKER_NAME;
|
||||
static const char *linker_plugin_file_spec = "";
|
||||
+static const char *linker_auto_bolt_plugin_file_spec = "";
|
||||
static const char *lto_wrapper_spec = "";
|
||||
static const char *lto_gcc_spec = "";
|
||||
static const char *post_link_spec = POST_LINK_SPEC;
|
||||
@@ -1723,6 +1726,8 @@ static struct spec_list static_specs[] =
|
||||
INIT_STATIC_SPEC ("multilib_reuse", &multilib_reuse),
|
||||
INIT_STATIC_SPEC ("linker", &linker_name_spec),
|
||||
INIT_STATIC_SPEC ("linker_plugin_file", &linker_plugin_file_spec),
|
||||
+ INIT_STATIC_SPEC ("linker_auto_bolt_plugin_file",
|
||||
+ &linker_auto_bolt_plugin_file_spec),
|
||||
INIT_STATIC_SPEC ("lto_wrapper", <o_wrapper_spec),
|
||||
INIT_STATIC_SPEC ("lto_gcc", <o_gcc_spec),
|
||||
INIT_STATIC_SPEC ("post_link", &post_link_spec),
|
||||
@@ -9118,6 +9123,24 @@ driver::maybe_run_linker (const char *argv0) const
|
||||
}
|
||||
#endif
|
||||
set_static_spec_shared (<o_gcc_spec, argv0);
|
||||
+
|
||||
+ /* Set bolt-plugin. */
|
||||
+ const char *fauto_bolt = "fauto-bolt";
|
||||
+ const char *fbolt_use = "fbolt-use";
|
||||
+ if (switch_matches (fauto_bolt, fauto_bolt + strlen (fauto_bolt), 1)
|
||||
+ || switch_matches (fbolt_use, fbolt_use + strlen (fbolt_use), 1))
|
||||
+ {
|
||||
+ linker_auto_bolt_plugin_file_spec = find_a_file (&exec_prefixes,
|
||||
+ BOLTPLUGINSONAME, X_OK, false);
|
||||
+ if (!linker_auto_bolt_plugin_file_spec)
|
||||
+ {
|
||||
+ fatal_error (input_location,
|
||||
+ "-fauto-bolt or -fbolt-use is used, but %s is not found",
|
||||
+ BOLTPLUGINSONAME);
|
||||
+
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
}
|
||||
|
||||
/* Rebuild the COMPILER_PATH and LIBRARY_PATH environment variables
|
||||
--
|
||||
2.33.0
|
||||
|
||||
312
0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
Normal file
312
0034-Autofdo-Enable-discrimibator-and-MCF-algorithm-on-Au.patch
Normal file
@ -0,0 +1,312 @@
|
||||
From b020447c840c6e22440a9b9063298a06333fd2f1 Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao <zhaozhenyu17@huawei.com>
|
||||
Date: Sat, 23 Mar 2024 22:56:09 +0800
|
||||
Subject: [PATCH] [Autofdo]Enable discrimibator and MCF algorithm on Autofdo
|
||||
|
||||
---
|
||||
gcc/auto-profile.cc | 171 +++++++++++++++++++++++++++++++++++++++++++-
|
||||
gcc/cfghooks.cc | 7 ++
|
||||
gcc/opts.cc | 5 +-
|
||||
gcc/tree-inline.cc | 14 ++++
|
||||
4 files changed, 193 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/auto-profile.cc b/gcc/auto-profile.cc
|
||||
index 2b34b80b8..f45f0ec66 100644
|
||||
--- a/gcc/auto-profile.cc
|
||||
+++ b/gcc/auto-profile.cc
|
||||
@@ -466,6 +466,17 @@ string_table::get_index (const char *name) const
|
||||
if (name == NULL)
|
||||
return -1;
|
||||
string_index_map::const_iterator iter = map_.find (name);
|
||||
+ /* Function name may be duplicate. Try to distinguish by the
|
||||
+ #file_name#function_name defined by the autofdo tool chain. */
|
||||
+ if (iter == map_.end ())
|
||||
+ {
|
||||
+ char* file_name = get_original_name (lbasename (dump_base_name));
|
||||
+ char* file_func_name
|
||||
+ = concat ("#", file_name, "#", name, NULL);
|
||||
+ iter = map_.find (file_func_name);
|
||||
+ free (file_name);
|
||||
+ free (file_func_name);
|
||||
+ }
|
||||
if (iter == map_.end ())
|
||||
return -1;
|
||||
|
||||
@@ -654,7 +665,7 @@ function_instance::read_function_instance (function_instance_stack *stack,
|
||||
|
||||
for (unsigned i = 0; i < num_pos_counts; i++)
|
||||
{
|
||||
- unsigned offset = gcov_read_unsigned () & 0xffff0000;
|
||||
+ unsigned offset = gcov_read_unsigned ();
|
||||
unsigned num_targets = gcov_read_unsigned ();
|
||||
gcov_type count = gcov_read_counter ();
|
||||
s->pos_counts[offset].count = count;
|
||||
@@ -733,6 +744,10 @@ autofdo_source_profile::get_count_info (gimple *stmt, count_info *info) const
|
||||
function_instance *s = get_function_instance_by_inline_stack (stack);
|
||||
if (s == NULL)
|
||||
return false;
|
||||
+ if (s->get_count_info (stack[0].second + stmt->bb->discriminator, info))
|
||||
+ {
|
||||
+ return true;
|
||||
+ }
|
||||
return s->get_count_info (stack[0].second, info);
|
||||
}
|
||||
|
||||
@@ -1395,6 +1410,66 @@ afdo_propagate (bb_set *annotated_bb)
|
||||
}
|
||||
}
|
||||
|
||||
+/* Process the following scene when the branch probability
|
||||
+ inversion when do function afdo_propagate (). E.g.
|
||||
+ BB_NUM (sample count)
|
||||
+ BB1 (1000)
|
||||
+ / \
|
||||
+ BB2 (10) BB3 (0)
|
||||
+ \ /
|
||||
+ BB4
|
||||
+ In afdo_propagate ().count of BB3 is calculated by
|
||||
+ COUNT (BB3) = 990 (990 = COUNT (BB1) - COUNT (BB2) = 1000 - 10)
|
||||
+ In fact, BB3 may be colder than BB2 by sample count.
|
||||
+ This function allocate source BB count to wach succ BB by sample
|
||||
+ rate, E.g.
|
||||
+ BB2_COUNT = BB1_COUNT * (BB2_COUNT / (BB2_COUNT + BB3_COUNT)) */
|
||||
+
|
||||
+static void
|
||||
+afdo_preprocess_bb_count ()
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+ FOR_ALL_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ if (bb->count.ipa_p () && EDGE_COUNT (bb->succs) > 1
|
||||
+ && bb->count > profile_count::zero ().afdo ())
|
||||
+ {
|
||||
+ basic_block bb1 = EDGE_SUCC (bb, 0)->dest;
|
||||
+ basic_block bb2 = EDGE_SUCC (bb, 1)->dest;
|
||||
+ if (single_succ_edge (bb1) && single_succ_edge (bb2)
|
||||
+ && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest)
|
||||
+ {
|
||||
+ gcov_type max_count = 0;
|
||||
+ gcov_type total_count = 0;
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ if (!e->dest->count.ipa_p ())
|
||||
+ {
|
||||
+ continue;
|
||||
+ }
|
||||
+ max_count = MAX (max_count, e->dest->count.to_gcov_type ());
|
||||
+ total_count += e->dest->count.to_gcov_type ();
|
||||
+ }
|
||||
+ /* Only bb_count > max_count * 2, branch probability will
|
||||
+ inversion. */
|
||||
+ if (max_count > 0 && bb->count.to_gcov_type () > max_count * 2)
|
||||
+ {
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ gcov_type target_count = bb->count.to_gcov_type ()
|
||||
+ * e->dest->count.to_gcov_type ()/ total_count;
|
||||
+ e->dest->count
|
||||
+ = profile_count::from_gcov_type
|
||||
+ (target_count).afdo ();
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
/* Propagate counts on control flow graph and calculate branch
|
||||
probabilities. */
|
||||
|
||||
@@ -1420,6 +1495,7 @@ afdo_calculate_branch_prob (bb_set *annotated_bb)
|
||||
}
|
||||
|
||||
afdo_find_equiv_class (annotated_bb);
|
||||
+ afdo_preprocess_bb_count ();
|
||||
afdo_propagate (annotated_bb);
|
||||
|
||||
FOR_EACH_BB_FN (bb, cfun)
|
||||
@@ -1523,6 +1599,83 @@ afdo_vpt_for_early_inline (stmt_set *promoted_stmts)
|
||||
return false;
|
||||
}
|
||||
|
||||
+/* Preparation before executing MCF algorithm. */
|
||||
+
|
||||
+static void
|
||||
+afdo_init_mcf ()
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+ edge e;
|
||||
+ edge_iterator ei;
|
||||
+
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "\n init calling mcf_smooth_cfg (). \n");
|
||||
+ }
|
||||
+
|
||||
+ /* Step1: when use mcf, BB id must be continous,
|
||||
+ so we need compact_blocks (). */
|
||||
+ compact_blocks ();
|
||||
+
|
||||
+ /* Step2: allocate memory for MCF input data. */
|
||||
+ bb_gcov_counts.safe_grow_cleared (cfun->cfg->x_last_basic_block);
|
||||
+ edge_gcov_counts = new hash_map<edge, gcov_type>;
|
||||
+
|
||||
+ /* Step3: init MCF input data from cfg. */
|
||||
+ FOR_ALL_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ /* Init BB count for MCF. */
|
||||
+ bb_gcov_count (bb) = bb->count.to_gcov_type ();
|
||||
+
|
||||
+ gcov_type total_count = 0;
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ total_count += e->dest->count.to_gcov_type ();
|
||||
+ }
|
||||
+
|
||||
+ /* If there is no sample in each successor blocks, source
|
||||
+ BB samples are allocated to each edge by branch static prob. */
|
||||
+
|
||||
+ FOR_EACH_EDGE (e, ei, bb->succs)
|
||||
+ {
|
||||
+ if (total_count == 0)
|
||||
+ {
|
||||
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||
+ * e->probability.to_reg_br_prob_base () / REG_BR_PROB_BASE;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ edge_gcov_count (e) = e->src->count.to_gcov_type ()
|
||||
+ * e->dest->count.to_gcov_type () / total_count;
|
||||
+ }
|
||||
+ }
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+
|
||||
+/* Free the resources used by MCF and reset BB count from MCF result.
|
||||
+ branch probability has been updated in mcf_smooth_cfg (). */
|
||||
+
|
||||
+static void
|
||||
+afdo_process_after_mcf ()
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+ /* Reset BB count from MCF result. */
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ if (bb_gcov_count (bb))
|
||||
+ {
|
||||
+ bb->count
|
||||
+ = profile_count::from_gcov_type (bb_gcov_count (bb)).afdo ();
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Clean up MCF resource. */
|
||||
+ bb_gcov_counts.release ();
|
||||
+ delete edge_gcov_counts;
|
||||
+ edge_gcov_counts = NULL;
|
||||
+}
|
||||
+
|
||||
/* Annotate auto profile to the control flow graph. Do not annotate value
|
||||
profile for stmts in PROMOTED_STMTS. */
|
||||
|
||||
@@ -1574,8 +1727,20 @@ afdo_annotate_cfg (const stmt_set &promoted_stmts)
|
||||
afdo_source_profile->mark_annotated (cfun->function_end_locus);
|
||||
if (max_count > profile_count::zero ())
|
||||
{
|
||||
- /* Calculate, propagate count and probability information on CFG. */
|
||||
- afdo_calculate_branch_prob (&annotated_bb);
|
||||
+ /* 1 means -fprofile-correction is enbaled manually, and MCF
|
||||
+ algorithm will be used to calculate count and probability.
|
||||
+ Otherwise, use the default calculate algorithm. */
|
||||
+ if (flag_profile_correction == 1)
|
||||
+ {
|
||||
+ afdo_init_mcf ();
|
||||
+ mcf_smooth_cfg ();
|
||||
+ afdo_process_after_mcf ();
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ /* Calculate, propagate count and probability information on CFG. */
|
||||
+ afdo_calculate_branch_prob (&annotated_bb);
|
||||
+ }
|
||||
}
|
||||
update_max_bb_count ();
|
||||
profile_status_for_fn (cfun) = PROFILE_READ;
|
||||
diff --git a/gcc/cfghooks.cc b/gcc/cfghooks.cc
|
||||
index c0b7bdcd9..323663010 100644
|
||||
--- a/gcc/cfghooks.cc
|
||||
+++ b/gcc/cfghooks.cc
|
||||
@@ -542,6 +542,9 @@ split_block_1 (basic_block bb, void *i)
|
||||
return NULL;
|
||||
|
||||
new_bb->count = bb->count;
|
||||
+ /* Copy discriminator from original bb for distinguishes among
|
||||
+ several basic blocks that share a common locus, allowing for
|
||||
+ more accurate autofdo. */
|
||||
new_bb->discriminator = bb->discriminator;
|
||||
|
||||
if (dom_info_available_p (CDI_DOMINATORS))
|
||||
@@ -1113,6 +1116,10 @@ duplicate_block (basic_block bb, edge e, basic_block after, copy_bb_data *id)
|
||||
move_block_after (new_bb, after);
|
||||
|
||||
new_bb->flags = (bb->flags & ~BB_DUPLICATED);
|
||||
+ /* Copy discriminator from original bb for distinguishes among
|
||||
+ several basic blocks that share a common locus, allowing for
|
||||
+ more accurate autofdo. */
|
||||
+ new_bb->discriminator = bb->discriminator;
|
||||
FOR_EACH_EDGE (s, ei, bb->succs)
|
||||
{
|
||||
/* Since we are creating edges from a new block to successors
|
||||
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||||
index 2bba88140..4b4925331 100644
|
||||
--- a/gcc/opts.cc
|
||||
+++ b/gcc/opts.cc
|
||||
@@ -3014,7 +3014,10 @@ common_handle_option (struct gcc_options *opts,
|
||||
/* FALLTHRU */
|
||||
case OPT_fauto_profile:
|
||||
enable_fdo_optimizations (opts, opts_set, value);
|
||||
- SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction, value);
|
||||
+ /* 2 is special and means flag_profile_correction trun on by
|
||||
+ -fauto-profile. */
|
||||
+ SET_OPTION_IF_UNSET (opts, opts_set, flag_profile_correction,
|
||||
+ (value ? 2 : 0));
|
||||
break;
|
||||
|
||||
case OPT_fipa_struct_reorg_:
|
||||
diff --git a/gcc/tree-inline.cc b/gcc/tree-inline.cc
|
||||
index f892cee3f..f50dbbc52 100644
|
||||
--- a/gcc/tree-inline.cc
|
||||
+++ b/gcc/tree-inline.cc
|
||||
@@ -2038,6 +2038,10 @@ copy_bb (copy_body_data *id, basic_block bb,
|
||||
basic_block_info automatically. */
|
||||
copy_basic_block = create_basic_block (NULL, (basic_block) prev->aux);
|
||||
copy_basic_block->count = bb->count.apply_scale (num, den);
|
||||
+ /* Copy discriminator from original bb for distinguishes among
|
||||
+ several basic blocks that share a common locus, allowing for
|
||||
+ more accurate autofdo. */
|
||||
+ copy_basic_block->discriminator = bb->discriminator;
|
||||
|
||||
copy_gsi = gsi_start_bb (copy_basic_block);
|
||||
|
||||
@@ -3058,6 +3062,16 @@ copy_cfg_body (copy_body_data * id,
|
||||
den += e->count ();
|
||||
ENTRY_BLOCK_PTR_FOR_FN (cfun)->count = den;
|
||||
}
|
||||
+ /* When autofdo uses PMU as the sampling unit, the number of
|
||||
+ ENTRY_BLOCK_PTR_FOR_FN cannot be obtained directly and will
|
||||
+ be zero. It using for adjust_for_ipa_scaling will cause the
|
||||
+ inlined BB count incorrectly overestimated. So set den equal
|
||||
+ to num, which is the source inline BB count to avoid
|
||||
+ overestimated. */
|
||||
+ if (den == profile_count::zero ().afdo ())
|
||||
+ {
|
||||
+ den = num;
|
||||
+ }
|
||||
|
||||
profile_count::adjust_for_ipa_scaling (&num, &den);
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
194
0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
Normal file
194
0035-Add-insn-defs-and-correct-costs-for-cmlt-generation.patch
Normal file
@ -0,0 +1,194 @@
|
||||
From aa39a66f6029fe16a656d7c6339908b953fb1e04 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 22 Feb 2024 11:27:43 +0300
|
||||
Subject: [PATCH 01/18] Add insn defs and correct costs for cmlt generation
|
||||
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 48 +++++++++++++++++++++++++++++
|
||||
gcc/config/aarch64/aarch64.cc | 15 +++++++++
|
||||
gcc/config/aarch64/aarch64.opt | 4 +++
|
||||
gcc/config/aarch64/iterators.md | 3 +-
|
||||
gcc/config/aarch64/predicates.md | 25 +++++++++++++++
|
||||
gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++
|
||||
6 files changed, 114 insertions(+), 1 deletion(-)
|
||||
create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index ee7f0b89c..82f73805f 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -6454,6 +6454,54 @@
|
||||
[(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
|
||||
)
|
||||
|
||||
+;; Use cmlt to replace vector arithmetic operations like this (SImode example):
|
||||
+;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
|
||||
+;; TODO: maybe extend to scalar operations or other cm** instructions.
|
||||
+
|
||||
+(define_insn "*aarch64_cmlt_as_arith<mode>"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
||||
+ (minus:<V_INT_EQUIV>
|
||||
+ (ashift:<V_INT_EQUIV>
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
|
||||
+ (match_operand:VDQHSD 4 "half_size_operand"))
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_dup 1)
|
||||
+ (match_dup 2))
|
||||
+ (match_dup 3))))]
|
||||
+ "TARGET_SIMD && flag_cmlt_arith"
|
||||
+ "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
|
||||
+ [(set_attr "type" "neon_compare_zero")]
|
||||
+)
|
||||
+
|
||||
+;; The helper definition that allows combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*arch64_cmlt_tmp<mode>"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
|
||||
+ "TARGET_SIMD && flag_cmlt_arith"
|
||||
+ "#"
|
||||
+ "&& reload_completed"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand")))
|
||||
+ (set (match_dup 0)
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "neon_compare_zero")]
|
||||
+)
|
||||
+
|
||||
(define_insn_and_split "aarch64_cm<optab>di"
|
||||
[(set (match_operand:DI 0 "register_operand" "=w,w,r")
|
||||
(neg:DI
|
||||
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||||
index a3da4ca30..04072ca25 100644
|
||||
--- a/gcc/config/aarch64/aarch64.cc
|
||||
+++ b/gcc/config/aarch64/aarch64.cc
|
||||
@@ -14064,6 +14064,21 @@ cost_minus:
|
||||
return true;
|
||||
}
|
||||
|
||||
+ /* Detect aarch64_cmlt_as_arith instruction. Now only this pattern
|
||||
+ matches the condition. The costs of cmlt and sub instructions
|
||||
+ are comparable, so we are not increasing the cost here. */
|
||||
+ if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT
|
||||
+ && GET_CODE (op1) == AND)
|
||||
+ {
|
||||
+ rtx op0_subop0 = XEXP (op0, 0);
|
||||
+ if (rtx_equal_p (op0_subop0, op1))
|
||||
+ {
|
||||
+ rtx lshrt_op = XEXP (op0_subop0, 0);
|
||||
+ if (GET_CODE (lshrt_op) == LSHIFTRT)
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Look for SUB (extended register). */
|
||||
if (is_a <scalar_int_mode> (mode)
|
||||
&& aarch64_rtx_arith_op_extract_p (op1))
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index a64b927e9..101664c7c 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -262,6 +262,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0.
|
||||
This option is for use with fstack-protector-strong and not for use in
|
||||
user-land code.
|
||||
|
||||
+mcmlt-arith
|
||||
+Target Var(flag_cmlt_arith) Optimization Init(0)
|
||||
+Use SIMD cmlt instruction to perform some arithmetic/logic calculations.
|
||||
+
|
||||
TargetVariable
|
||||
long aarch64_stack_protector_guard_offset = 0
|
||||
|
||||
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
||||
index 26a840d7f..967e6b0b1 100644
|
||||
--- a/gcc/config/aarch64/iterators.md
|
||||
+++ b/gcc/config/aarch64/iterators.md
|
||||
@@ -1485,7 +1485,8 @@
|
||||
(V2DI "2s")])
|
||||
|
||||
;; Register suffix narrowed modes for VQN.
|
||||
-(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
|
||||
+(define_mode_attr V2ntype [(V4HI "8b") (V2SI "4h")
|
||||
+ (V8HI "16b") (V4SI "8h")
|
||||
(V2DI "4s")])
|
||||
|
||||
;; Widened modes of vector modes.
|
||||
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
index c308015ac..07c14aacb 100644
|
||||
--- a/gcc/config/aarch64/predicates.md
|
||||
+++ b/gcc/config/aarch64/predicates.md
|
||||
@@ -49,6 +49,31 @@
|
||||
return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
|
||||
})
|
||||
|
||||
+(define_predicate "half_size_minus_one_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size - 1);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "half_size_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "cmlt_arith_mask_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ unsigned long long mask = ((unsigned long long) 1 << size) | 1;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == mask);
|
||||
+})
|
||||
+
|
||||
(define_predicate "subreg_lowpart_operator"
|
||||
(ior (match_code "truncate")
|
||||
(and (match_code "subreg")
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
new file mode 100755
|
||||
index 000000000..b4c9a37ff
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -mcmlt-arith" } */
|
||||
+
|
||||
+/* The test checks usage of cmlt insns for arithmetic/logic calculations
|
||||
+ * in foo (). It's inspired by sources of x264 codec. */
|
||||
+
|
||||
+typedef unsigned short int uint16_t;
|
||||
+typedef unsigned int uint32_t;
|
||||
+
|
||||
+void foo( uint32_t *a, uint32_t *b)
|
||||
+{
|
||||
+ for (unsigned i = 0; i < 4; i++)
|
||||
+ {
|
||||
+ uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1))
|
||||
+ &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
|
||||
+ b[i] = (a[i]+s)^s;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
560
0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
Normal file
560
0036-rtl-ifcvt-introduce-rtl-ifcvt-enchancements.patch
Normal file
@ -0,0 +1,560 @@
|
||||
From 4cae948c1c00ad7a59f0f234f809fbd9a0208eb4 Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 28 Feb 2024 23:05:12 +0800
|
||||
Subject: [PATCH 02/18] [rtl-ifcvt] introduce rtl ifcvt enchancements new
|
||||
option: -fifcvt-allow-complicated-cmps: allows ifcvt to deal
|
||||
with complicated cmps like
|
||||
|
||||
cmp reg1 (reg2 + reg3)
|
||||
|
||||
can increase compilation time
|
||||
new param:
|
||||
-param=ifcvt-allow-register-renaming=[0,1,2]
|
||||
1 : allows ifcvt to rename registers in then and else bb
|
||||
2 : allows to rename registers in condition and else/then bb
|
||||
can increase compilation time and register pressure
|
||||
---
|
||||
gcc/common.opt | 4 +
|
||||
gcc/ifcvt.cc | 291 +++++++++++++++---
|
||||
gcc/params.opt | 4 +
|
||||
.../gcc.c-torture/execute/ifcvt-renaming-1.c | 35 +++
|
||||
gcc/testsuite/gcc.dg/ifcvt-6.c | 27 ++
|
||||
5 files changed, 311 insertions(+), 50 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index c7c6bc256..aa00fb7b0 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -3691,4 +3691,8 @@ fipa-ra
|
||||
Common Var(flag_ipa_ra) Optimization
|
||||
Use caller save register across calls if possible.
|
||||
|
||||
+fifcvt-allow-complicated-cmps
|
||||
+Common Var(flag_ifcvt_allow_complicated_cmps) Optimization
|
||||
+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
|
||||
+
|
||||
; This comment is to ensure we retain the blank line above.
|
||||
diff --git a/gcc/ifcvt.cc b/gcc/ifcvt.cc
|
||||
index 2c1eba312..584db7b55 100644
|
||||
--- a/gcc/ifcvt.cc
|
||||
+++ b/gcc/ifcvt.cc
|
||||
@@ -886,7 +886,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
|
||||
}
|
||||
|
||||
/* Don't even try if the comparison operands or the mode of X are weird. */
|
||||
- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
|
||||
+ if (!flag_ifcvt_allow_complicated_cmps
|
||||
+ && (cond_complex
|
||||
+ || !SCALAR_INT_MODE_P (GET_MODE (x))))
|
||||
return NULL_RTX;
|
||||
|
||||
return emit_store_flag (x, code, XEXP (cond, 0),
|
||||
@@ -1965,7 +1967,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
|
||||
/* Currently support only simple single sets in test_bb. */
|
||||
if (!sset
|
||||
|| !noce_operand_ok (SET_DEST (sset))
|
||||
- || contains_ccmode_rtx_p (SET_DEST (sset))
|
||||
+ || (!flag_ifcvt_allow_complicated_cmps
|
||||
+ && contains_ccmode_rtx_p (SET_DEST (sset)))
|
||||
|| !noce_operand_ok (SET_SRC (sset)))
|
||||
return false;
|
||||
|
||||
@@ -1979,13 +1982,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
|
||||
in this function. */
|
||||
|
||||
static bool
|
||||
-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
+bbs_ok_for_cmove_arith (basic_block bb_a,
|
||||
+ basic_block bb_b,
|
||||
+ rtx to_rename,
|
||||
+ bitmap conflict_regs)
|
||||
{
|
||||
rtx_insn *a_insn;
|
||||
bitmap bba_sets = BITMAP_ALLOC (®_obstack);
|
||||
-
|
||||
+ bitmap intersections = BITMAP_ALLOC (®_obstack);
|
||||
df_ref def;
|
||||
df_ref use;
|
||||
+ rtx_insn *last_a = last_active_insn (bb_a, FALSE);
|
||||
|
||||
FOR_BB_INSNS (bb_a, a_insn)
|
||||
{
|
||||
@@ -1995,18 +2002,15 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
rtx sset_a = single_set (a_insn);
|
||||
|
||||
if (!sset_a)
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
/* Record all registers that BB_A sets. */
|
||||
FOR_EACH_INSN_DEF (def, a_insn)
|
||||
- if (!(to_rename && DF_REF_REG (def) == to_rename))
|
||||
+ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
|
||||
bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
}
|
||||
|
||||
+ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
|
||||
rtx_insn *b_insn;
|
||||
-
|
||||
FOR_BB_INSNS (bb_b, b_insn)
|
||||
{
|
||||
if (!active_insn_p (b_insn))
|
||||
@@ -2015,10 +2019,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
rtx sset_b = single_set (b_insn);
|
||||
|
||||
if (!sset_b)
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
|
||||
/* Make sure this is a REG and not some instance
|
||||
of ZERO_EXTRACT or SUBREG or other dangerous stuff.
|
||||
@@ -2030,25 +2031,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
if (MEM_P (SET_DEST (sset_b)))
|
||||
gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
|
||||
else if (!REG_P (SET_DEST (sset_b)))
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
|
||||
- /* If the insn uses a reg set in BB_A return false. */
|
||||
+ /* If the insn uses a reg set in BB_A return false
|
||||
+ or try to collect register list for renaming. */
|
||||
FOR_EACH_INSN_USE (use, b_insn)
|
||||
{
|
||||
- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
|
||||
+ if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
|
||||
{
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
+ if (param_ifcvt_allow_register_renaming < 1)
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
+
|
||||
+ /* Those regs should be renamed. We can't rename CC reg, but
|
||||
+ possibly we can provide combined comparison in the future. */
|
||||
+ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
+ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
|
||||
}
|
||||
}
|
||||
-
|
||||
}
|
||||
|
||||
BITMAP_FREE (bba_sets);
|
||||
+ BITMAP_FREE (intersections);
|
||||
return true;
|
||||
+
|
||||
+end_cmove_arith_check_and_fail:
|
||||
+ BITMAP_FREE (bba_sets);
|
||||
+ BITMAP_FREE (intersections);
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Emit copies of all the active instructions in BB except the last.
|
||||
@@ -2103,6 +2113,142 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* This function tries to rename regs that intersect with considered bb
|
||||
+ inside condition expression. Condition expression will be moved down
|
||||
+ if the optimization will be applied, so it is essential to be sure that
|
||||
+ all intersected registers will be renamed otherwise transformation
|
||||
+ can't be applied. Function returns true if renaming was successful
|
||||
+ and optimization can proceed futher. */
|
||||
+
|
||||
+static bool
|
||||
+noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
+{
|
||||
+ bool success = true;
|
||||
+ if (bitmap_empty_p (cond_rename_regs))
|
||||
+ return true;
|
||||
+ if (param_ifcvt_allow_register_renaming < 2)
|
||||
+ return false;
|
||||
+ df_ref use;
|
||||
+ rtx_insn *cmp_insn = if_info->cond_earliest;
|
||||
+ /* Jump instruction as a condion currently unsupported. */
|
||||
+ if (JUMP_P (cmp_insn))
|
||||
+ return false;
|
||||
+ rtx_insn *before_cmp = PREV_INSN (cmp_insn);
|
||||
+ start_sequence ();
|
||||
+ rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
|
||||
+ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
|
||||
+ FOR_EACH_INSN_USE (use, cmp_insn)
|
||||
+ {
|
||||
+ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
|
||||
+ {
|
||||
+ rtx use_reg = DF_REF_REG (use);
|
||||
+ rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
|
||||
+ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
|
||||
+ {
|
||||
+ end_sequence ();
|
||||
+ return false;
|
||||
+ }
|
||||
+ noce_emit_move_insn (tmp, use_reg);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ emit_insn (PATTERN (copy_of_cmp));
|
||||
+ rtx_insn *seq = get_insns ();
|
||||
+ unshare_all_rtl_in_chain (seq);
|
||||
+ end_sequence ();
|
||||
+
|
||||
+ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
|
||||
+ delete_insn_and_edges (cmp_insn);
|
||||
+ rtx_insn *insn;
|
||||
+ FOR_BB_INSNS (cmp_block, insn)
|
||||
+ df_insn_rescan (insn);
|
||||
+
|
||||
+ if_info->cond = noce_get_condition (if_info->jump,
|
||||
+ ©_of_cmp,
|
||||
+ if_info->then_else_reversed);
|
||||
+ if_info->cond_earliest = copy_of_cmp;
|
||||
+ if_info->rev_cond = NULL_RTX;
|
||||
+
|
||||
+ return success;
|
||||
+}
|
||||
+
|
||||
+/* This function tries to rename regs that intersect with considered bb.
|
||||
+ return true if the renaming was successful and optimization can
|
||||
+ proceed futher, false otherwise. */
|
||||
+static bool
|
||||
+noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
+{
|
||||
+ if (bitmap_empty_p (rename_regs))
|
||||
+ return true;
|
||||
+ rtx_insn *insn;
|
||||
+ rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
|
||||
+ bool res = true;
|
||||
+ start_sequence ();
|
||||
+ FOR_BB_INSNS (test_bb, insn)
|
||||
+ {
|
||||
+ if (!active_insn_p (insn))
|
||||
+ continue;
|
||||
+ /* Only ssets are supported for now. */
|
||||
+ rtx sset = single_set (insn);
|
||||
+ gcc_assert (sset);
|
||||
+ rtx x = SET_DEST (sset);
|
||||
+ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
|
||||
+ continue;
|
||||
+ /* Do not need to rename dest in the last instruction
|
||||
+ it will be renamed anyway. */
|
||||
+ if (insn == last_insn)
|
||||
+ continue;
|
||||
+ machine_mode mode = GET_MODE (x);
|
||||
+ rtx tmp = gen_reg_rtx (mode);
|
||||
+ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
|
||||
+ {
|
||||
+ gcc_assert (insn != last_insn);
|
||||
+ /* We can generate additional move for such case,
|
||||
+ but it will increase register preasure.
|
||||
+ For now just stop transformation. */
|
||||
+ rtx result_rtx = SET_DEST (single_set (last_insn));
|
||||
+ if (REG_P (result_rtx) && (x != result_rtx))
|
||||
+ {
|
||||
+ res = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ if (!validate_replace_rtx (x, tmp, insn))
|
||||
+ gcc_unreachable ();
|
||||
+ noce_emit_move_insn (tmp,x);
|
||||
+ }
|
||||
+ set_used_flags (insn);
|
||||
+ rtx_insn *rename_candidate;
|
||||
+ for (rename_candidate = NEXT_INSN (insn);
|
||||
+ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
|
||||
+ rename_candidate = NEXT_INSN (rename_candidate))
|
||||
+ {
|
||||
+ if (!reg_overlap_mentioned_p (x, rename_candidate))
|
||||
+ continue;
|
||||
+
|
||||
+ int replace_res = TRUE;
|
||||
+ if (rename_candidate == last_insn)
|
||||
+ {
|
||||
+ validate_replace_src_group (x, tmp, rename_candidate);
|
||||
+ replace_res = apply_change_group ();
|
||||
+ }
|
||||
+ else
|
||||
+ replace_res = validate_replace_rtx (x, tmp, rename_candidate);
|
||||
+ gcc_assert (replace_res);
|
||||
+ set_used_flags (rename_candidate);
|
||||
+ }
|
||||
+ set_used_flags (x);
|
||||
+ set_used_flags (tmp);
|
||||
+ }
|
||||
+ rtx_insn *seq = get_insns ();
|
||||
+ unshare_all_rtl_in_chain (seq);
|
||||
+ end_sequence ();
|
||||
+ emit_insn_before_setloc (seq, first_active_insn (test_bb),
|
||||
+ INSN_LOCATION (first_active_insn (test_bb)));
|
||||
+ FOR_BB_INSNS (test_bb, insn)
|
||||
+ df_insn_rescan (insn);
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
/* Try more complex cases involving conditional_move. */
|
||||
|
||||
static int
|
||||
@@ -2185,11 +2331,30 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
std::swap (then_bb, else_bb);
|
||||
}
|
||||
}
|
||||
-
|
||||
+ bitmap else_bb_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
+ bitmap then_bb_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
if (then_bb && else_bb
|
||||
- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x)
|
||||
- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x)))
|
||||
- return FALSE;
|
||||
+ && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
|
||||
+ if_info->orig_x,
|
||||
+ then_bb_rename_regs)
|
||||
+ || !bbs_ok_for_cmove_arith (else_bb, then_bb,
|
||||
+ if_info->orig_x,
|
||||
+ else_bb_rename_regs)))
|
||||
+ {
|
||||
+ BITMAP_FREE (then_bb_rename_regs);
|
||||
+ BITMAP_FREE (else_bb_rename_regs);
|
||||
+ return FALSE;
|
||||
+ }
|
||||
+ bool prepass_renaming = noce_rename_regs_in_bb (then_bb,
|
||||
+ then_bb_rename_regs)
|
||||
+ && noce_rename_regs_in_bb (else_bb,
|
||||
+ else_bb_rename_regs);
|
||||
+
|
||||
+ BITMAP_FREE (then_bb_rename_regs);
|
||||
+ BITMAP_FREE (else_bb_rename_regs);
|
||||
+
|
||||
+ if (!prepass_renaming)
|
||||
+ return FALSE;
|
||||
|
||||
start_sequence ();
|
||||
|
||||
@@ -3072,7 +3237,8 @@ noce_operand_ok (const_rtx op)
|
||||
|
||||
static bool
|
||||
bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
- unsigned int *cost, bool *simple_p)
|
||||
+ unsigned int *cost, bool *simple_p,
|
||||
+ bitmap cond_rename_regs)
|
||||
{
|
||||
if (!test_bb)
|
||||
return false;
|
||||
@@ -3112,8 +3278,9 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
rtx_insn *prev_last_insn = PREV_INSN (last_insn);
|
||||
gcc_assert (prev_last_insn);
|
||||
|
||||
- /* For now, disallow setting x multiple times in test_bb. */
|
||||
- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn))
|
||||
+ if (REG_P (x)
|
||||
+ && reg_set_between_p (x, first_insn, prev_last_insn)
|
||||
+ && param_ifcvt_allow_register_renaming < 1)
|
||||
return false;
|
||||
|
||||
bitmap test_bb_temps = BITMAP_ALLOC (®_obstack);
|
||||
@@ -3125,25 +3292,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
rtx_insn *insn;
|
||||
FOR_BB_INSNS (test_bb, insn)
|
||||
{
|
||||
- if (insn != last_insn)
|
||||
- {
|
||||
- if (!active_insn_p (insn))
|
||||
- continue;
|
||||
+ if (insn == last_insn)
|
||||
+ continue;
|
||||
+ if (!active_insn_p (insn))
|
||||
+ continue;
|
||||
|
||||
- if (!insn_valid_noce_process_p (insn, cc))
|
||||
- goto free_bitmap_and_fail;
|
||||
+ if (!insn_valid_noce_process_p (insn, cc))
|
||||
+ goto free_bitmap_and_fail;
|
||||
|
||||
- rtx sset = single_set (insn);
|
||||
- gcc_assert (sset);
|
||||
+ rtx sset = single_set (insn);
|
||||
+ gcc_assert (sset);
|
||||
|
||||
- if (contains_mem_rtx_p (SET_SRC (sset))
|
||||
- || !REG_P (SET_DEST (sset))
|
||||
- || reg_overlap_mentioned_p (SET_DEST (sset), cond))
|
||||
- goto free_bitmap_and_fail;
|
||||
+ if (contains_mem_rtx_p (SET_SRC (sset))
|
||||
+ || !REG_P (SET_DEST (sset)))
|
||||
+ goto free_bitmap_and_fail;
|
||||
|
||||
- potential_cost += pattern_cost (sset, speed_p);
|
||||
- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
+ if (reg_overlap_mentioned_p (SET_DEST (sset), cond))
|
||||
+ {
|
||||
+ if (param_ifcvt_allow_register_renaming < 1)
|
||||
+ goto free_bitmap_and_fail;
|
||||
+ rtx sset_dest = SET_DEST (sset);
|
||||
+ if (REG_P (sset_dest)
|
||||
+ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC))
|
||||
+ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest));
|
||||
+ else
|
||||
+ goto free_bitmap_and_fail;
|
||||
}
|
||||
+ potential_cost += pattern_cost (sset, speed_p);
|
||||
+ if (SET_DEST (sset) != SET_DEST (last_set))
|
||||
+ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
}
|
||||
|
||||
/* If any of the intermediate results in test_bb are live after test_bb
|
||||
@@ -3777,15 +3954,29 @@ noce_process_if_block (struct noce_if_info *if_info)
|
||||
|
||||
bool speed_p = optimize_bb_for_speed_p (test_bb);
|
||||
unsigned int then_cost = 0, else_cost = 0;
|
||||
+ bitmap cond_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
|
||||
- &if_info->then_simple))
|
||||
- return false;
|
||||
+ &if_info->then_simple, cond_rename_regs))
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
if (else_bb
|
||||
&& !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
|
||||
- &if_info->else_simple))
|
||||
- return false;
|
||||
+ &if_info->else_simple, cond_rename_regs))
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
+ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ cond = if_info->cond;
|
||||
if (speed_p)
|
||||
if_info->original_cost += average_cost (then_cost, else_cost,
|
||||
find_edge (test_bb, then_bb));
|
||||
@@ -5823,12 +6014,13 @@ if_convert (bool after_combine)
|
||||
{
|
||||
basic_block bb;
|
||||
int pass;
|
||||
-
|
||||
if (optimize == 1)
|
||||
{
|
||||
df_live_add_problem ();
|
||||
df_live_set_all_dirty ();
|
||||
}
|
||||
+ free_dominance_info (CDI_DOMINATORS);
|
||||
+ cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
|
||||
/* Record whether we are after combine pass. */
|
||||
ifcvt_after_combine = after_combine;
|
||||
@@ -5933,7 +6125,6 @@ rest_of_handle_if_conversion (void)
|
||||
dump_reg_info (dump_file);
|
||||
dump_flow_info (dump_file, dump_flags);
|
||||
}
|
||||
- cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
if_convert (false);
|
||||
if (num_updated_if_blocks)
|
||||
/* Get rid of any dead CC-related instructions. */
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index d2196dc68..ba87f820b 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -669,6 +669,10 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
|
||||
Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
|
||||
Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
|
||||
|
||||
+-param=ifcvt-allow-register-renaming=
|
||||
+Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
|
||||
+Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created.
|
||||
+
|
||||
-param=max-sched-extend-regions-iters=
|
||||
Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization
|
||||
The maximum number of iterations through CFG to extend regions.
|
||||
diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
new file mode 100644
|
||||
index 000000000..65c4d4140
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
@@ -0,0 +1,35 @@
|
||||
+
|
||||
+extern void abort(void);
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+int foo (int x, int y, int z, int a, int b)
|
||||
+{
|
||||
+ if (a < 2) {
|
||||
+ if (a == 0) {
|
||||
+ if (x - y < 0)
|
||||
+ x = x - y + z;
|
||||
+ else
|
||||
+ x = x - y;
|
||||
+ }
|
||||
+ else {
|
||||
+ if (x + y >= z)
|
||||
+ x = x + y - z;
|
||||
+ else
|
||||
+ x = x + y;
|
||||
+ }
|
||||
+ }
|
||||
+ return x;
|
||||
+}
|
||||
+
|
||||
+int main(void) {
|
||||
+ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2
|
||||
+ abort ();
|
||||
+ if (foo (50,10,7,0,1) != 40) // x - y = 40
|
||||
+ abort ();
|
||||
+ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8
|
||||
+ abort ();
|
||||
+ if (foo (5,10,70,1,1) != 15) // x + y = 15
|
||||
+ abort ();
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
new file mode 100644
|
||||
index 000000000..be9a67b3f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
@@ -0,0 +1,27 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
+/* { dg-options "-fdump-rtl-ce1 -O2 --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 -fifcvt-allow-complicated-cmps" } */
|
||||
+
|
||||
+typedef unsigned int uint16_t;
|
||||
+
|
||||
+uint16_t
|
||||
+foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a,
|
||||
+ uint16_t b, uint16_t c, uint16_t d) {
|
||||
+ int i = 1;
|
||||
+ int j = 1;
|
||||
+ if (a > b) {
|
||||
+ j = x;
|
||||
+ if (b > c)
|
||||
+ i = y;
|
||||
+ else
|
||||
+ i = z;
|
||||
+ }
|
||||
+ else {
|
||||
+ j = y;
|
||||
+ if (c > d)
|
||||
+ i = z;
|
||||
+ }
|
||||
+ return i * j;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */
|
||||
+
|
||||
--
|
||||
2.33.0
|
||||
|
||||
109
0037-Perform-early-if-conversion-of-simple-arithmetic.patch
Normal file
109
0037-Perform-early-if-conversion-of-simple-arithmetic.patch
Normal file
@ -0,0 +1,109 @@
|
||||
From 310eade1450995b55d9f8120561022fbf164b2ec Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Thu, 12 Jan 2023 14:52:49 +0300
|
||||
Subject: [PATCH 03/18] Perform early if-conversion of simple arithmetic
|
||||
|
||||
---
|
||||
gcc/common.opt | 4 ++++
|
||||
gcc/match.pd | 25 +++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++
|
||||
3 files changed, 66 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index aa00fb7b0..dac477c04 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1821,6 +1821,10 @@ fif-conversion2
|
||||
Common Var(flag_if_conversion2) Optimization
|
||||
Perform conversion of conditional jumps to conditional execution.
|
||||
|
||||
+fif-conversion-gimple
|
||||
+Common Var(flag_if_conversion_gimple) Optimization
|
||||
+Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
|
||||
+
|
||||
fstack-reuse=
|
||||
Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
|
||||
-fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 6f24d5079..3cbaf2a5b 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -4278,6 +4278,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
)
|
||||
)
|
||||
+
|
||||
+(if (flag_if_conversion_gimple)
|
||||
+ (for simple_op (plus minus bit_and bit_ior bit_xor)
|
||||
+ (simplify
|
||||
+ (cond @0 (simple_op @1 INTEGER_CST@2) @1)
|
||||
+ (switch
|
||||
+ /* a = cond ? a + 1 : a -> a = a + ((int) cond) */
|
||||
+ (if (integer_onep (@2))
|
||||
+ (simple_op @1 (convert (convert:boolean_type_node @0))))
|
||||
+ /* a = cond ? a + powerof2cst : a ->
|
||||
+ a = a + ((int) cond) << log2 (powerof2cst) */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
|
||||
+ (with
|
||||
+ {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
+ }
|
||||
+ (simple_op @1 (lshift (convert (convert:boolean_type_node @0))
|
||||
+ { shift; })
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+)
|
||||
#endif
|
||||
|
||||
#if GIMPLE
|
||||
diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
new file mode 100644
|
||||
index 000000000..0f7c87e5c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
|
||||
+
|
||||
+int test_int (int optimizable_int) {
|
||||
+ if (optimizable_int > 5)
|
||||
+ ++optimizable_int;
|
||||
+ return optimizable_int;
|
||||
+}
|
||||
+
|
||||
+int test_int_pow2 (int optimizable_int_pow2) {
|
||||
+ if (optimizable_int_pow2 <= 4)
|
||||
+ optimizable_int_pow2 += 1024;
|
||||
+ return optimizable_int_pow2;
|
||||
+}
|
||||
+
|
||||
+int test_int_non_pow2 (int not_optimizable_int_non_pow2) {
|
||||
+ if (not_optimizable_int_non_pow2 == 1)
|
||||
+ not_optimizable_int_non_pow2 += 513;
|
||||
+ return not_optimizable_int_non_pow2;
|
||||
+}
|
||||
+
|
||||
+float test_float (float not_optimizable_float) {
|
||||
+ if (not_optimizable_float > 5)
|
||||
+ not_optimizable_float += 1;
|
||||
+ return not_optimizable_float;
|
||||
+}
|
||||
+
|
||||
+/* Expecting if-else block in test_float and test_int_non_pow2 only. */
|
||||
+/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */
|
||||
+
|
||||
+/* Expecting shifted result only for optimizable_int_pow2. */
|
||||
+/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
252
0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
Normal file
252
0038-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
Normal file
@ -0,0 +1,252 @@
|
||||
From 6684509e81e4341675c73a7dc853180229a8abcb Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Tue, 24 Jan 2023 16:43:40 +0300
|
||||
Subject: [PATCH 04/18] Add option to allow matching uaddsub overflow for widen
|
||||
ops too.
|
||||
|
||||
---
|
||||
gcc/common.opt | 5 ++
|
||||
gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
|
||||
gcc/tree-ssa-math-opts.cc | 43 ++++++++--
|
||||
3 files changed, 184 insertions(+), 7 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index dac477c04..39c90604e 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -3106,6 +3106,11 @@ freciprocal-math
|
||||
Common Var(flag_reciprocal_math) SetByCombined Optimization
|
||||
Same as -fassociative-math for expressions which include division.
|
||||
|
||||
+fuaddsub-overflow-match-all
|
||||
+Common Var(flag_uaddsub_overflow_match_all)
|
||||
+Match unsigned add/sub overflow even if the target does not support
|
||||
+the corresponding instruction.
|
||||
+
|
||||
; Nonzero means that unsafe floating-point math optimizations are allowed
|
||||
; for the sake of speed. IEEE compliance is not guaranteed, and operations
|
||||
; are allowed to assume that their arguments and results are "normal"
|
||||
diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
|
||||
new file mode 100644
|
||||
index 000000000..96c26d308
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/uaddsub.c
|
||||
@@ -0,0 +1,143 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+typedef struct uint256_t
|
||||
+{
|
||||
+ uint128_t lo;
|
||||
+ uint128_t hi;
|
||||
+} uint256_t;
|
||||
+
|
||||
+uint16_t add16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t tmp = a + b;
|
||||
+ uint8_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint16_t res = overflow;
|
||||
+ res <<= 8;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t add32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t tmp = a + b;
|
||||
+ uint16_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint32_t res = overflow;
|
||||
+ res <<= 16;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t add64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t tmp = a + b;
|
||||
+ uint32_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint64_t res = overflow;
|
||||
+ res <<= 32;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t add128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t tmp = a + b;
|
||||
+ uint64_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint128_t res = overflow;
|
||||
+ res <<= 64;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint256_t add256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t tmp = a + b;
|
||||
+ uint128_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint256_t res;
|
||||
+ res.hi = overflow;
|
||||
+ res.lo = tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint16_t sub16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t tmp = a - b;
|
||||
+ uint8_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint16_t res = overflow;
|
||||
+ res <<= 8;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t sub32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t tmp = a - b;
|
||||
+ uint16_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint32_t res = overflow;
|
||||
+ res <<= 16;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t sub64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t tmp = a - b;
|
||||
+ uint32_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint64_t res = overflow;
|
||||
+ res <<= 32;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t sub128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t tmp = a - b;
|
||||
+ uint64_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint128_t res = overflow;
|
||||
+ res <<= 64;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint256_t sub256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t tmp = a - b;
|
||||
+ uint128_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint256_t res;
|
||||
+ res.hi = overflow;
|
||||
+ res.lo = tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
||||
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
|
||||
index 232e903b0..55d6ee8ae 100644
|
||||
--- a/gcc/tree-ssa-math-opts.cc
|
||||
+++ b/gcc/tree-ssa-math-opts.cc
|
||||
@@ -3468,6 +3468,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
|
||||
}
|
||||
}
|
||||
|
||||
+/* Check if the corresponding operation has wider equivalent on the target. */
|
||||
+
|
||||
+static bool
|
||||
+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
|
||||
+{
|
||||
+ machine_mode wider_mode;
|
||||
+ FOR_EACH_WIDER_MODE (wider_mode, mode)
|
||||
+ {
|
||||
+ machine_mode next_mode;
|
||||
+ if (optab_handler (op, wider_mode) != CODE_FOR_nothing
|
||||
+ || (op == smul_optab
|
||||
+ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
|
||||
+ && (find_widening_optab_handler ((unsignedp
|
||||
+ ? umul_widen_optab
|
||||
+ : smul_widen_optab),
|
||||
+ next_mode, mode))))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
|
||||
/* Helper function of match_arith_overflow. For MUL_OVERFLOW, if we have
|
||||
a check for non-zero like:
|
||||
@@ -3903,15 +3924,22 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|
||||
|| code == MINUS_EXPR
|
||||
|| code == MULT_EXPR
|
||||
|| code == BIT_NOT_EXPR);
|
||||
+ int unsignedp = TYPE_UNSIGNED (type);
|
||||
if (!INTEGRAL_TYPE_P (type)
|
||||
- || !TYPE_UNSIGNED (type)
|
||||
- || has_zero_uses (lhs)
|
||||
- || (code != PLUS_EXPR
|
||||
- && code != MULT_EXPR
|
||||
- && optab_handler (code == MINUS_EXPR ? usubv4_optab : uaddv4_optab,
|
||||
- TYPE_MODE (type)) == CODE_FOR_nothing))
|
||||
+ || !unsignedp
|
||||
+ || has_zero_uses (lhs))
|
||||
return false;
|
||||
|
||||
+ if (code == PLUS_EXPR || code == MINUS_EXPR)
|
||||
+ {
|
||||
+ machine_mode mode = TYPE_MODE (type);
|
||||
+ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
|
||||
+ if (optab_handler (op, mode) == CODE_FOR_nothing
|
||||
+ && (!flag_uaddsub_overflow_match_all
|
||||
+ || !wider_optab_check_p (op, mode, unsignedp)))
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
tree rhs1 = gimple_assign_rhs1 (stmt);
|
||||
tree rhs2 = gimple_assign_rhs2 (stmt);
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
|
||||
@@ -3986,7 +4014,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|
||||
|| (code != MULT_EXPR && (code == BIT_NOT_EXPR ? use_seen : !use_seen))
|
||||
|| (code == PLUS_EXPR
|
||||
&& optab_handler (uaddv4_optab,
|
||||
- TYPE_MODE (type)) == CODE_FOR_nothing)
|
||||
+ TYPE_MODE (type)) == CODE_FOR_nothing
|
||||
+ && !flag_uaddsub_overflow_match_all)
|
||||
|| (code == MULT_EXPR
|
||||
&& optab_handler (cast_stmt ? mulv4_optab : umulv4_optab,
|
||||
TYPE_MODE (type)) == CODE_FOR_nothing))
|
||||
--
|
||||
2.33.0
|
||||
|
||||
488
0039-Match-double-sized-mul-pattern.patch
Normal file
488
0039-Match-double-sized-mul-pattern.patch
Normal file
@ -0,0 +1,488 @@
|
||||
From e7b22f97f960b62e555dfd6f2e3ae43973fcbb3e Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Wed, 25 Jan 2023 15:04:07 +0300
|
||||
Subject: [PATCH 05/18] Match double sized mul pattern
|
||||
|
||||
---
|
||||
gcc/match.pd | 136 +++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-2.c | 62 ++++++++++
|
||||
gcc/tree-ssa-math-opts.cc | 80 ++++++++++++
|
||||
4 files changed, 419 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 3cbaf2a5b..61866cb90 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -7895,3 +7895,139 @@ and,
|
||||
== TYPE_UNSIGNED (TREE_TYPE (@3))))
|
||||
&& single_use (@4)
|
||||
&& single_use (@5))))
|
||||
+
|
||||
+/* Match multiplication with double sized result.
|
||||
+
|
||||
+ Consider the following calculations:
|
||||
+ arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo)
|
||||
+ * (2^(bit_size/2) * arg1_hi + arg1_lo)
|
||||
+ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
|
||||
+ + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi)
|
||||
+ + arg0_lo * arg1_lo
|
||||
+
|
||||
+ The products of high and low parts fits in bit_size values, thus they are
|
||||
+ placed in high and low parts of result respectively.
|
||||
+
|
||||
+ The sum of the mixed products may overflow, so we need a detection for that.
|
||||
+ Also it has a bit_size/2 offset, thus it intersects with both high and low
|
||||
+ parts of result. Overflow detection constant is bit_size/2 due to this.
|
||||
+
|
||||
+ With this info:
|
||||
+ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
|
||||
+ + 2^(bit_size/2) * middle
|
||||
+ + 2^bit_size * possible_middle_overflow
|
||||
+ + arg0_lo * arg1_lo
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow)
|
||||
+ + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo)
|
||||
+ + arg0_lo * arg1_lo
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi
|
||||
+ + possible_middle_overflow)
|
||||
+ + 2^(bit_size/2) * middle_lo
|
||||
+ + arg0_lo * arg1_lo
|
||||
+
|
||||
+ The last sum can produce overflow for the high result part. With this:
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow
|
||||
+ + possible_res_lo_overflow + middle_hi)
|
||||
+ + res_lo
|
||||
+ = res_hi + res_lo
|
||||
+
|
||||
+ This formula is quite big to fit into one match pattern with all of the
|
||||
+ combinations of terms inside it. There are many helpers for better code
|
||||
+ readability.
|
||||
+
|
||||
+ The simplification basis is res_hi: assuming that res_lo only is not
|
||||
+ real practical case for such calculations.
|
||||
+
|
||||
+ Overflow handling is done via matching complex calculations:
|
||||
+ the realpart and imagpart are quite handy here. */
|
||||
+/* Match low and high parts of the argument. */
|
||||
+(match (double_size_mul_arg_lo @0 @1)
|
||||
+ (bit_and @0 INTEGER_CST@1)
|
||||
+ (if (wi::to_wide (@1)
|
||||
+ == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type)))))
|
||||
+(match (double_size_mul_arg_hi @0 @1)
|
||||
+ (rshift @0 INTEGER_CST@1)
|
||||
+ (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2)))
|
||||
+
|
||||
+/* Match various argument parts products. */
|
||||
+(match (double_size_mul_lolo @0 @1)
|
||||
+ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+(match (double_size_mul_hihi @0 @1)
|
||||
+ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+(match (double_size_mul_lohi @0 @1)
|
||||
+ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+
|
||||
+/* Match complex middle sum. */
|
||||
+(match (double_size_mul_middle_complex @0 @1)
|
||||
+ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0))
|
||||
+ (if (num_imm_uses (@2) == 2)))
|
||||
+
|
||||
+/* Match real middle results. */
|
||||
+(match (double_size_mul_middle @0 @1)
|
||||
+ (realpart@2 (double_size_mul_middle_complex @0 @1))
|
||||
+ (if (num_imm_uses (@2) == 2)))
|
||||
+(match (double_size_mul_middleres_lo @0 @1)
|
||||
+ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3))))
|
||||
+(match (double_size_mul_middleres_hi @0 @1)
|
||||
+ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3))))
|
||||
+
|
||||
+/* Match low result part. */
|
||||
+/* Number of uses may be < 2 in case when we are interested in
|
||||
+ high part only. */
|
||||
+(match (double_size_mul_res_lo_complex @0 @1)
|
||||
+ (IFN_ADD_OVERFLOW:c@2
|
||||
+ (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1))
|
||||
+ (if (num_imm_uses (@2) <= 2)))
|
||||
+(match (double_size_mul_res_lo @0 @1)
|
||||
+ (realpart (double_size_mul_res_lo_complex @0 @1)))
|
||||
+
|
||||
+/* Match overflow terms. */
|
||||
+(match (double_size_mul_overflow_check_lo @0 @1 @5)
|
||||
+ (convert@4 (ne@3
|
||||
+ (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop))
|
||||
+ (if (single_use (@2) && single_use (@3) && single_use (@4))))
|
||||
+(match (double_size_mul_overflow_check_hi @0 @1)
|
||||
+ (lshift@6 (convert@5 (ne@4
|
||||
+ (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop))
|
||||
+ INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3) && single_use (@4) && single_use (@5)
|
||||
+ && single_use (@6))))
|
||||
+
|
||||
+/* Match all possible permutations for high result part calculations. */
|
||||
+(for op1 (double_size_mul_hihi
|
||||
+ double_size_mul_overflow_check_hi
|
||||
+ double_size_mul_middleres_hi)
|
||||
+ op2 (double_size_mul_overflow_check_hi
|
||||
+ double_size_mul_middleres_hi
|
||||
+ double_size_mul_hihi)
|
||||
+ op3 (double_size_mul_middleres_hi
|
||||
+ double_size_mul_hihi
|
||||
+ double_size_mul_overflow_check_hi)
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2
|
||||
+ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1))
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3)
|
||||
+ (plus:c@4 (op1:c @0 @1)
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (op1:c @0 @1)
|
||||
+ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3)
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (op1:c @0 @1)
|
||||
+ (plus:c@4 (op2:c @0 @1)
|
||||
+ (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5)))))
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
new file mode 100644
|
||||
index 000000000..4d475cc8a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
@@ -0,0 +1,141 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
|
||||
+ proper overflow detection in some cases. */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+
|
||||
+uint16_t mul16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t a_lo = a & 0xF;
|
||||
+ uint8_t b_lo = b & 0xF;
|
||||
+ uint8_t a_hi = a >> 4;
|
||||
+ uint8_t b_hi = b >> 4;
|
||||
+ uint8_t lolo = a_lo * b_lo;
|
||||
+ uint8_t lohi = a_lo * b_hi;
|
||||
+ uint8_t hilo = a_hi * b_lo;
|
||||
+ uint8_t hihi = a_hi * b_hi;
|
||||
+ uint8_t middle = hilo + lohi;
|
||||
+ uint8_t middle_hi = middle >> 4;
|
||||
+ uint8_t middle_lo = middle << 4;
|
||||
+ uint8_t res_lo = lolo + middle_lo;
|
||||
+ uint8_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10 : 0);
|
||||
+ uint16_t res = ((uint16_t) res_hi) << 8;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t mul32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t a_lo = a & 0xFF;
|
||||
+ uint16_t b_lo = b & 0xFF;
|
||||
+ uint16_t a_hi = a >> 8;
|
||||
+ uint16_t b_hi = b >> 8;
|
||||
+ uint16_t lolo = a_lo * b_lo;
|
||||
+ uint16_t lohi = a_lo * b_hi;
|
||||
+ uint16_t hilo = a_hi * b_lo;
|
||||
+ uint16_t hihi = a_hi * b_hi;
|
||||
+ uint16_t middle = hilo + lohi;
|
||||
+ uint16_t middle_hi = middle >> 8;
|
||||
+ uint16_t middle_lo = middle << 8;
|
||||
+ uint16_t res_lo = lolo + middle_lo;
|
||||
+ uint16_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x100 : 0);
|
||||
+ uint32_t res = ((uint32_t) res_hi) << 16;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t mul64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10000 : 0);
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t mul128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t a_lo = a & 0xFFFFFFFF;
|
||||
+ uint64_t b_lo = b & 0xFFFFFFFF;
|
||||
+ uint64_t a_hi = a >> 32;
|
||||
+ uint64_t b_hi = b >> 32;
|
||||
+ uint64_t lolo = a_lo * b_lo;
|
||||
+ uint64_t lohi = a_lo * b_hi;
|
||||
+ uint64_t hilo = a_hi * b_lo;
|
||||
+ uint64_t hihi = a_hi * b_hi;
|
||||
+ uint64_t middle = hilo + lohi;
|
||||
+ uint64_t middle_hi = middle >> 32;
|
||||
+ uint64_t middle_lo = middle << 32;
|
||||
+ uint64_t res_lo = lolo + middle_lo;
|
||||
+ uint64_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x100000000 : 0);
|
||||
+ uint128_t res = ((uint128_t) res_hi) << 64;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t mul64_perm (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
|
||||
+ res_hi = middle < hilo ? res_hi + 0x10000 : res_hi;
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t mul128_perm (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t a_lo = a & 0xFFFFFFFF;
|
||||
+ uint64_t b_lo = b & 0xFFFFFFFF;
|
||||
+ uint64_t a_hi = a >> 32;
|
||||
+ uint64_t b_hi = b >> 32;
|
||||
+ uint64_t lolo = a_lo * b_lo;
|
||||
+ uint64_t lohi = a_lo * b_hi;
|
||||
+ uint64_t hilo = a_hi * b_lo;
|
||||
+ uint64_t hihi = a_hi * b_hi;
|
||||
+ uint64_t middle = hilo + lohi;
|
||||
+ uint64_t middle_hi = middle >> 32;
|
||||
+ uint64_t middle_lo = middle << 32;
|
||||
+ uint64_t res_lo = lolo + middle_lo;
|
||||
+ uint64_t res_hi = hihi + middle_hi;
|
||||
+ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
|
||||
+ res_hi = middle < hilo ? res_hi + 0x100000000 : res_hi;
|
||||
+ uint128_t res = ((uint128_t) res_hi) << 64;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
new file mode 100644
|
||||
index 000000000..cc6e5af25
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
@@ -0,0 +1,62 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* fif-conversion-gimple is required for proper overflow detection
|
||||
+ in some cases. */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+typedef struct uint256_t
|
||||
+{
|
||||
+ uint128_t lo;
|
||||
+ uint128_t hi;
|
||||
+} uint256_t;
|
||||
+
|
||||
+uint64_t mul64_double_use (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10000 : 0);
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res + lolo;
|
||||
+}
|
||||
+
|
||||
+uint256_t mul256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t a_lo = a & 0xFFFFFFFFFFFFFFFF;
|
||||
+ uint128_t b_lo = b & 0xFFFFFFFFFFFFFFFF;
|
||||
+ uint128_t a_hi = a >> 64;
|
||||
+ uint128_t b_hi = b >> 64;
|
||||
+ uint128_t lolo = a_lo * b_lo;
|
||||
+ uint128_t lohi = a_lo * b_hi;
|
||||
+ uint128_t hilo = a_hi * b_lo;
|
||||
+ uint128_t hihi = a_hi * b_hi;
|
||||
+ uint128_t middle = hilo + lohi;
|
||||
+ uint128_t middle_hi = middle >> 64;
|
||||
+ uint128_t middle_lo = middle << 64;
|
||||
+ uint128_t res_lo = lolo + middle_lo;
|
||||
+ uint128_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ /* Constant is to big warning WA */
|
||||
+ uint128_t overflow_tmp = (middle < hilo ? 1 : 0);
|
||||
+ overflow_tmp <<= 64;
|
||||
+ res_hi += overflow_tmp;
|
||||
+ uint256_t res;
|
||||
+ res.lo = res_lo;
|
||||
+ res.hi = res_hi;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "double sized mul optimized" "widening_mul" } } */
|
||||
diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
|
||||
index 55d6ee8ae..2c06b8a60 100644
|
||||
--- a/gcc/tree-ssa-math-opts.cc
|
||||
+++ b/gcc/tree-ssa-math-opts.cc
|
||||
@@ -210,6 +210,9 @@ static struct
|
||||
|
||||
/* Number of highpart multiplication ops inserted. */
|
||||
int highpart_mults_inserted;
|
||||
+
|
||||
+ /* Number of optimized double sized multiplications. */
|
||||
+ int double_sized_mul_optimized;
|
||||
} widen_mul_stats;
|
||||
|
||||
/* The instance of "struct occurrence" representing the highest
|
||||
@@ -4893,6 +4896,78 @@ optimize_spaceship (gimple *stmt)
|
||||
}
|
||||
|
||||
|
||||
+/* Pattern matcher for double sized multiplication defined in match.pd. */
|
||||
+extern bool gimple_double_size_mul_candidate (tree, tree*, tree (*)(tree));
|
||||
+
|
||||
+static bool
|
||||
+convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt)
|
||||
+{
|
||||
+ gimple *use_stmt, *complex_res_lo;
|
||||
+ gimple_stmt_iterator insert_before;
|
||||
+ imm_use_iterator use_iter;
|
||||
+ tree match[4]; // arg0, arg1, res_hi, complex_res_lo
|
||||
+ tree arg0, arg1, widen_mult, new_type, tmp;
|
||||
+ tree lhs = gimple_assign_lhs (stmt);
|
||||
+ location_t loc = UNKNOWN_LOCATION;
|
||||
+ machine_mode mode;
|
||||
+
|
||||
+ if (!gimple_double_size_mul_candidate (lhs, match, NULL))
|
||||
+ return false;
|
||||
+
|
||||
+ new_type = build_nonstandard_integer_type (
|
||||
+ TYPE_PRECISION (TREE_TYPE (match[0])) * 2, 1);
|
||||
+ mode = TYPE_MODE (new_type);
|
||||
+
|
||||
+ /* Early return if the target multiplication doesn't exist on target. */
|
||||
+ if (optab_handler (smul_optab, mode) == CODE_FOR_nothing
|
||||
+ && !wider_optab_check_p (smul_optab, mode, 1))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Determine the point where the wide multiplication
|
||||
+ should be inserted. Complex low res is OK since it is required
|
||||
+ by both high and low part getters, thus it dominates both of them. */
|
||||
+ complex_res_lo = SSA_NAME_DEF_STMT (match[3]);
|
||||
+ insert_before = gsi_for_stmt (complex_res_lo);
|
||||
+ gsi_next (&insert_before);
|
||||
+
|
||||
+ /* Create the widen multiplication. */
|
||||
+ arg0 = build_and_insert_cast (&insert_before, loc, new_type, match[0]);
|
||||
+ arg1 = build_and_insert_cast (&insert_before, loc, new_type, match[1]);
|
||||
+ widen_mult = build_and_insert_binop (&insert_before, loc, "widen_mult",
|
||||
+ MULT_EXPR, arg0, arg1);
|
||||
+
|
||||
+ /* Find the mult low part getter. */
|
||||
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match[3])
|
||||
+ if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
|
||||
+ break;
|
||||
+
|
||||
+ /* Create high and low (if needed) parts extractors. */
|
||||
+ /* Low part. */
|
||||
+ if (use_stmt)
|
||||
+ {
|
||||
+ loc = gimple_location (use_stmt);
|
||||
+ tmp = build_and_insert_cast (&insert_before, loc,
|
||||
+ TREE_TYPE (gimple_get_lhs (use_stmt)),
|
||||
+ widen_mult);
|
||||
+ gassign *new_stmt = gimple_build_assign (gimple_get_lhs (use_stmt),
|
||||
+ NOP_EXPR, tmp);
|
||||
+ gsi_replace (&insert_before, new_stmt, true);
|
||||
+ }
|
||||
+
|
||||
+ /* High part. */
|
||||
+ loc = gimple_location (stmt);
|
||||
+ tmp = build_and_insert_binop (gsi, loc, "widen_mult_hi",
|
||||
+ RSHIFT_EXPR, widen_mult,
|
||||
+ build_int_cst (new_type,
|
||||
+ TYPE_PRECISION (new_type) / 2));
|
||||
+ tmp = build_and_insert_cast (gsi, loc, TREE_TYPE (lhs), tmp);
|
||||
+ gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, tmp);
|
||||
+ gsi_replace (gsi, new_stmt, true);
|
||||
+
|
||||
+ widen_mul_stats.double_sized_mul_optimized++;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Find integer multiplications where the operands are extended from
|
||||
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
|
||||
or MULT_HIGHPART_EXPR where appropriate. */
|
||||
@@ -4987,6 +5062,9 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
|
||||
break;
|
||||
|
||||
case PLUS_EXPR:
|
||||
+ if (convert_double_size_mul (&gsi, stmt))
|
||||
+ break;
|
||||
+ __attribute__ ((fallthrough));
|
||||
case MINUS_EXPR:
|
||||
if (!convert_plusminus_to_widen (&gsi, stmt, code))
|
||||
match_arith_overflow (&gsi, stmt, code, m_cfg_changed_p);
|
||||
@@ -5091,6 +5169,8 @@ pass_optimize_widening_mul::execute (function *fun)
|
||||
widen_mul_stats.divmod_calls_inserted);
|
||||
statistics_counter_event (fun, "highpart multiplications inserted",
|
||||
widen_mul_stats.highpart_mults_inserted);
|
||||
+ statistics_counter_event (fun, "double sized mul optimized",
|
||||
+ widen_mul_stats.double_sized_mul_optimized);
|
||||
|
||||
return cfg_changed ? TODO_cleanup_cfg : 0;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2387
0040-Port-icp-patch-to-GCC-12.patch
Normal file
2387
0040-Port-icp-patch-to-GCC-12.patch
Normal file
File diff suppressed because it is too large
Load Diff
100
0041-Port-fixes-in-icp-to-GCC-12.patch
Normal file
100
0041-Port-fixes-in-icp-to-GCC-12.patch
Normal file
@ -0,0 +1,100 @@
|
||||
From aaa117a9ff58fb208e8c8859e075ca425f995f63 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Tue, 27 Feb 2024 07:43:57 +0800
|
||||
Subject: [PATCH 07/18] Port fixes in icp to GCC 12
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.cc | 37 ++++++++++++++++++++++++++++++-------
|
||||
1 file changed, 30 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc
|
||||
index 383839189..318535d06 100644
|
||||
--- a/gcc/ipa-devirt.cc
|
||||
+++ b/gcc/ipa-devirt.cc
|
||||
@@ -4431,6 +4431,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
|
||||
if (!map->count (ftype_uid))
|
||||
return;
|
||||
type_set* s = (*map)[ftype_uid];
|
||||
+ if (!s)
|
||||
+ {
|
||||
+ fprintf (dump_file, "%d (no set)", ftype_uid);
|
||||
+ return;
|
||||
+ }
|
||||
for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
|
||||
fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
|
||||
}
|
||||
@@ -4696,12 +4701,19 @@ maybe_register_aliases (tree type1, tree type2)
|
||||
if (register_ailas_type (type1, type2, ta_map))
|
||||
analyze_pointees (type1, type2);
|
||||
}
|
||||
+ unsigned type1_uid = TYPE_UID (type1);
|
||||
+ unsigned type2_uid = TYPE_UID (type2);
|
||||
+ if (type_uid_map->count (type1_uid) == 0)
|
||||
+ (*type_uid_map)[type1_uid] = type1;
|
||||
+ if (type_uid_map->count (type2_uid) == 0)
|
||||
+ (*type_uid_map)[type2_uid] = type2;
|
||||
+
|
||||
/* If function and non-function type pointers alias,
|
||||
the function type is unsafe. */
|
||||
if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
|
||||
- unsafe_types->insert (TYPE_UID (type1));
|
||||
+ unsafe_types->insert (type1_uid);
|
||||
if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
|
||||
- unsafe_types->insert (TYPE_UID (type2));
|
||||
+ unsafe_types->insert (type2_uid);
|
||||
|
||||
/* Try to figure out with pointers to incomplete types. */
|
||||
if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
|
||||
@@ -4825,10 +4837,12 @@ compare_block_and_init_type (tree block, tree t1)
|
||||
static void
|
||||
analyze_global_var (varpool_node *var)
|
||||
{
|
||||
- var->get_constructor();
|
||||
tree decl = var->decl;
|
||||
- if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
|
||||
- || integer_zerop (DECL_INITIAL (decl)))
|
||||
+ if (decl || !DECL_INITIAL (decl))
|
||||
+ return;
|
||||
+ var->get_constructor ();
|
||||
+ if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
|
||||
+ || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
|
||||
return;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
@@ -4998,7 +5012,9 @@ analyze_assign_stmt (gimple *stmt)
|
||||
{
|
||||
rhs = TREE_OPERAND (rhs, 0);
|
||||
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|
||||
- || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
|
||||
+ || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
|
||||
+ || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
|
||||
+ || TREE_CODE (rhs) == RESULT_DECL)
|
||||
rhs_type = build_pointer_type (TREE_TYPE (rhs));
|
||||
else if (TREE_CODE (rhs) == COMPONENT_REF)
|
||||
{
|
||||
@@ -5012,7 +5028,12 @@ analyze_assign_stmt (gimple *stmt)
|
||||
gcc_assert (POINTER_TYPE_P (rhs_type));
|
||||
}
|
||||
else
|
||||
- gcc_unreachable();
|
||||
+ {
|
||||
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
|
||||
+ get_tree_code_name (TREE_CODE (rhs)));
|
||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
}
|
||||
else
|
||||
rhs_type = TREE_TYPE (rhs);
|
||||
@@ -5710,6 +5731,8 @@ merge_fs_map_for_ftype_aliases ()
|
||||
decl_set *d_set = it1->second;
|
||||
tree type = (*type_uid_map)[it1->first];
|
||||
type_set *set = (*fta_map)[it1->first];
|
||||
+ if (!set)
|
||||
+ continue;
|
||||
for (type_set::const_iterator it2 = set->begin ();
|
||||
it2 != set->end (); it2++)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1245
0042-Add-split-complex-instructions-pass.patch
Normal file
1245
0042-Add-split-complex-instructions-pass.patch
Normal file
File diff suppressed because it is too large
Load Diff
1426
0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
Normal file
1426
0043-Extending-and-refactoring-of-pass_split_complex_inst.patch
Normal file
File diff suppressed because it is too large
Load Diff
378
0044-Port-maxmin-patch-to-GCC-12.patch
Normal file
378
0044-Port-maxmin-patch-to-GCC-12.patch
Normal file
@ -0,0 +1,378 @@
|
||||
From a3013c074cd2ab5f71eb98a587a627f38c68656c Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 22 Feb 2024 17:07:24 +0800
|
||||
Subject: [PATCH 12/18] Port maxmin patch to GCC 12
|
||||
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 256 ++++++++++++++++++++++++++
|
||||
gcc/config/aarch64/predicates.md | 19 ++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin.c | 46 +++++
|
||||
3 files changed, 321 insertions(+)
|
||||
create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index 82f73805f..de92802f5 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -1138,6 +1138,82 @@
|
||||
[(set_attr "type" "neon_compare<q>,neon_shift_imm<q>")]
|
||||
)
|
||||
|
||||
+;; Simplify the extension with following truncation for shift+neg operation.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 1)
|
||||
+ (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
|
||||
+ (match_dup 2)))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (ashiftrt:V8HI
|
||||
+ (neg:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand" "w"))
|
||||
+ (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
|
||||
+ {
|
||||
+ /* Reduce the shift amount to smaller mode. */
|
||||
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
|
||||
+ - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
|
||||
+ operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+;; The helper definition that allows combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_operand:V4SI 1 "register_operand" "w"))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_operand:V4SI 3 "register_operand" "w"))
|
||||
+ (match_dup 2)))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_dup 1))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (set (match_operand:V4SI 3 "register_operand" "=w")
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_dup 3))
|
||||
+ (match_dup 2)))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 1))
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 3))))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
(define_insn "*aarch64_simd_sra<mode>"
|
||||
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
||||
(plus:VDQ_I
|
||||
@@ -1714,6 +1790,26 @@
|
||||
}
|
||||
)
|
||||
|
||||
+(define_insn "vec_pack_trunc_shifted_<mode>"
|
||||
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
|
||||
+ (vec_concat:<VNARROWQ2>
|
||||
+ (truncate:<VNARROWQ>
|
||||
+ (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
|
||||
+ (match_operand:VQN 2 "half_size_operand" "w")))
|
||||
+ (truncate:<VNARROWQ>
|
||||
+ (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
|
||||
+ (match_operand:VQN 4 "half_size_operand" "w")))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ {
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
|
||||
+ else
|
||||
+ return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
|
||||
+ }
|
||||
+ [(set_attr "type" "neon_permute<q>")
|
||||
+ (set_attr "length" "4")]
|
||||
+)
|
||||
+
|
||||
(define_insn "aarch64_shrn<mode>_insn_le"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
|
||||
(vec_concat:<VNARROWQ2>
|
||||
@@ -6652,6 +6748,166 @@
|
||||
[(set_attr "type" "neon_tst<q>")]
|
||||
)
|
||||
|
||||
+;; Simplify the extension with following truncation for cmtst-like operation.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (plus:V4HI
|
||||
+ (truncate:V4HI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
|
||||
+ (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
|
||||
+ (plus:V4HI
|
||||
+ (truncate:V4HI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_dup 1)
|
||||
+ (match_dup 2))
|
||||
+ (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
|
||||
+ (match_dup 4)))
|
||||
+ (match_dup 5))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 6 "register_operand" "=w")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (plus:V8HI
|
||||
+ (eq:V8HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand" "w")
|
||||
+ (match_dup 6))
|
||||
+ (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
|
||||
+ (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ {
|
||||
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
|
||||
+ operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
|
||||
+ int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
|
||||
+ operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
|
||||
+
|
||||
+ operands[6] = gen_reg_rtx (V8HImode);
|
||||
+ }
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "neon_tst_q")]
|
||||
+)
|
||||
+
|
||||
+;; Three helper definitions that allow combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
|
||||
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
|
||||
+ (set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 5)
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
|
||||
+ (set (match_dup 0)
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ operands[5] = gen_reg_rtx (V8HImode);
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
|
||||
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
|
||||
+ (set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 5)
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
|
||||
+ (set (match_dup 0)
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ operands[5] = gen_reg_rtx (V8HImode);
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (not:V4SI
|
||||
+ (match_operand:V4SI 1 "register_operand" "w")))
|
||||
+ (truncate:V4HI
|
||||
+ (not:V4SI
|
||||
+ (match_operand:V4SI 2 "register_operand" "w")))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
|
||||
+ (not:V4SI
|
||||
+ (match_dup 1)))
|
||||
+ (set (match_operand:V4SI 2 "register_operand" "=w")
|
||||
+ (not:V4SI
|
||||
+ (match_dup 2)))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 1))
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 2))))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
(define_insn_and_split "aarch64_cmtstdi"
|
||||
[(set (match_operand:DI 0 "register_operand" "=w,r")
|
||||
(neg:DI
|
||||
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
index 07c14aacb..1b8496c07 100644
|
||||
--- a/gcc/config/aarch64/predicates.md
|
||||
+++ b/gcc/config/aarch64/predicates.md
|
||||
@@ -118,6 +118,25 @@
|
||||
(match_test "aarch64_simd_valid_immediate (op, NULL,
|
||||
AARCH64_CHECK_ORR)"))))
|
||||
|
||||
+(define_predicate "aarch64_bic_imm_for_maxmin"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
|
||||
+ return false;
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
|
||||
+ return CONST_INT_P (op)
|
||||
+ && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
|
||||
+})
|
||||
+
|
||||
+(define_predicate "maxmin_arith_shift_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
|
||||
+})
|
||||
+
|
||||
(define_predicate "aarch64_reg_or_bic_imm"
|
||||
(ior (match_operand 0 "register_operand")
|
||||
(and (match_code "const_vector")
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
new file mode 100755
|
||||
index 000000000..06bce7029
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
@@ -0,0 +1,46 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fdump-rtl-combine-all" } */
|
||||
+
|
||||
+/* The test checks usage of smax/smin insns for clip evaluation and
|
||||
+ * uzp1/uzp2 insns for vector element narrowing. It's inspired by
|
||||
+ * sources of x264 codec. */
|
||||
+
|
||||
+typedef unsigned char uint8_t;
|
||||
+typedef long int intptr_t;
|
||||
+typedef signed short int int16_t;
|
||||
+
|
||||
+static __attribute__((always_inline)) inline uint8_t clip (int x )
|
||||
+{
|
||||
+ return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
|
||||
+}
|
||||
+
|
||||
+void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
+ intptr_t stride, int width, int height, int16_t *buf)
|
||||
+{
|
||||
+ const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
|
||||
+ for( int y = 0; y < height; y++ ) {
|
||||
+ for( int x = -2; x < width+3; x++ ) {
|
||||
+ int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
|
||||
+ + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
|
||||
+ dstv[x] = clip ( (v + 16) >> 5 );
|
||||
+ buf[x+2] = v + pad;
|
||||
+ }
|
||||
+ for( int x = 0; x < width; x++ )
|
||||
+ dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
|
||||
+ + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
|
||||
+ - 32*pad + 512) >> 10);
|
||||
+ for( int x = 0; x < width; x++ )
|
||||
+ dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
|
||||
+ + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
|
||||
+ + 16) >> 5);
|
||||
+ dsth += stride;
|
||||
+ dstv += stride;
|
||||
+ dstc += stride;
|
||||
+ src += stride;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
|
||||
+/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
239
0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
Normal file
239
0045-Port-moving-minmask-pattern-to-gimple-to-GCC-12.patch
Normal file
@ -0,0 +1,239 @@
|
||||
From 11da40d18e35219961226d40f11b0702b8649044 Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Thu, 22 Feb 2024 17:13:27 +0800
|
||||
Subject: [PATCH 13/18] Port moving minmask pattern to gimple to GCC 12
|
||||
|
||||
---
|
||||
gcc/common.opt | 4 +
|
||||
gcc/match.pd | 104 ++++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin-1.c | 15 ++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin-2.c | 14 ++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin.c | 19 +++--
|
||||
5 files changed, 151 insertions(+), 5 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 6c6fabb31..3a5004271 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1846,6 +1846,10 @@ fif-conversion-gimple
|
||||
Common Var(flag_if_conversion_gimple) Optimization
|
||||
Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
|
||||
|
||||
+fconvert-minmax
|
||||
+Common Var(flag_convert_minmax) Optimization
|
||||
+Convert saturating clipping to min max.
|
||||
+
|
||||
fstack-reuse=
|
||||
Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
|
||||
-fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 61866cb90..3a19e93b3 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -8031,3 +8031,107 @@ and,
|
||||
(plus:c@4 (op2:c @0 @1)
|
||||
(plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
|
||||
(if (single_use (@4) && single_use (@5)))))
|
||||
+
|
||||
+/* MinMax pattern matching helpers. More info on the transformation below. */
|
||||
+
|
||||
+/* Match (a & 0b11..100..0) pattern. */
|
||||
+(match (minmax_cmp_arg @0 @1)
|
||||
+ (bit_and @0 INTEGER_CST@1)
|
||||
+ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
|
||||
+
|
||||
+/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
|
||||
+ This statement is blocking for the transformation of unsigned integers.
|
||||
+ Do type check here to avoid unnecessary duplications. */
|
||||
+(match (minmax_sat_arg @0)
|
||||
+ (rshift (negate @0) INTEGER_CST@1)
|
||||
+ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
|
||||
+ && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
|
||||
+
|
||||
+/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
|
||||
+ The matched pattern can be described as saturated clipping.
|
||||
+
|
||||
+ The pattern supports truncation via both casts and bit_and.
|
||||
+ Also there are patterns for possible inverted conditions. */
|
||||
+(if (flag_convert_minmax)
|
||||
+/* Truncation via casts. Unfortunately convert? cannot be applied here
|
||||
+ because convert and cond take different number of arguments. */
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0)))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (minmax_sat_arg @0))))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (minmax_sat_arg @0)))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ /* Truncation via bit_and with mask. Same concerns on convert? here. */
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
|
||||
+ (convert? @0)))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
|
||||
+ (convert? @0))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; }))))))
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
new file mode 100644
|
||||
index 000000000..859ff7df8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
+
|
||||
+#include <inttypes.h>
|
||||
+
|
||||
+__attribute__((noinline))
|
||||
+void test (int32_t *restrict a, int32_t *restrict x)
|
||||
+{
|
||||
+ for (int i = 0; i < 4; i++)
|
||||
+ a[i] = ((((-x[i]) >> 31) ^ x[i])
|
||||
+ & (-((int32_t)((x[i] & (~((1 << 8)-1))) == 0)))) ^ ((-x[i]) >> 31);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not {smax\t} } } */
|
||||
+/* { dg-final { scan-assembler-not {smin\t} } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
new file mode 100644
|
||||
index 000000000..63d4d85b3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
+
|
||||
+#include <inttypes.h>
|
||||
+
|
||||
+__attribute__((noinline))
|
||||
+void test (int8_t *restrict a, int32_t *restrict x)
|
||||
+{
|
||||
+ for (int i = 0; i < 8; i++)
|
||||
+ a[i] = ((x[i] & ~((1 << 9)-1)) ? (-x[i])>>31 & ((1 << 9)-1) : x[i]);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
index 06bce7029..a984fa560 100755
|
||||
--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target aarch64-*-* } } */
|
||||
-/* { dg-options "-O3 -fdump-rtl-combine-all" } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
|
||||
/* The test checks usage of smax/smin insns for clip evaluation and
|
||||
* uzp1/uzp2 insns for vector element narrowing. It's inspired by
|
||||
@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
{
|
||||
const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
|
||||
for( int y = 0; y < height; y++ ) {
|
||||
+ /* This loop is not being vectorized now. */
|
||||
for( int x = -2; x < width+3; x++ ) {
|
||||
int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
|
||||
+ (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
|
||||
dstv[x] = clip ( (v + 16) >> 5 );
|
||||
buf[x+2] = v + pad;
|
||||
}
|
||||
+
|
||||
+ /* Produces two versions of the code: 3xUZP1/2xMAX/2xMIN + 1xUZP1/1xMAX/1xMIN. */
|
||||
for( int x = 0; x < width; x++ )
|
||||
dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
|
||||
+ (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
|
||||
- 32*pad + 512) >> 10);
|
||||
+
|
||||
+ /* Priduces two versions of the code: 1xUZP1/2xMAX/2xMIN + 0xUZP1/1xMAX/1xMIN. */
|
||||
for( int x = 0; x < width; x++ )
|
||||
dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
|
||||
+ (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
|
||||
+ 16) >> 5);
|
||||
+
|
||||
dsth += stride;
|
||||
dstv += stride;
|
||||
dstc += stride;
|
||||
@@ -40,7 +46,10 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
}
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
-/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
-/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
|
||||
-/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
|
||||
+/* Max is performed on 0 from signed values, match smax exactly. */
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 6 } } */
|
||||
+/* Min is performed on signed val>0 and a mask, min sign doesn't matter. */
|
||||
+/* { dg-final { scan-assembler-times {[us]min\t} 6 } } */
|
||||
+/* All of the vectorized patterns are expected to be matched. */
|
||||
+/* { dg-final { scan-assembler-not {cmtst\t} } } */
|
||||
+/* { dg-final { scan-assembler-times {uzp1\t} 5 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
65
0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
Normal file
65
0046-Add-new-pattern-to-pass-the-maxmin-tests.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From dbcb2630c426c8dd2117b5ce625da8422dd8cd65 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 22 Feb 2024 17:20:17 +0800
|
||||
Subject: [PATCH 14/18] Add new pattern to pass the maxmin tests
|
||||
|
||||
---
|
||||
gcc/match.pd | 24 ++++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin.c | 2 +-
|
||||
2 files changed, 25 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 3a19e93b3..aee58e47b 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -8038,6 +8038,10 @@ and,
|
||||
(match (minmax_cmp_arg @0 @1)
|
||||
(bit_and @0 INTEGER_CST@1)
|
||||
(if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
|
||||
+/* Match ((unsigned) a > 0b0..01..1) pattern. */
|
||||
+(match (minmax_cmp_arg1 @0 @1)
|
||||
+ (gt @0 INTEGER_CST@1)
|
||||
+ (if (wi::popcount (wi::to_widest (@1) + 1) == 1)))
|
||||
|
||||
/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
|
||||
This statement is blocking for the transformation of unsigned integers.
|
||||
@@ -8095,6 +8099,26 @@ and,
|
||||
(convert (min (max @0 { integer_zero_node; })
|
||||
{ mask; })))))
|
||||
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0)))
|
||||
+ (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
|
||||
+ (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (minmax_cmp_arg1 (convert? @0) INTEGER_CST@1)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0))
|
||||
+ (if (wi::geu_p (wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, tree_to_shwi (@1)); }
|
||||
+ (convert (min (max (convert:integer_type_node @0) { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
/* Truncation via bit_and with mask. Same concerns on convert? here. */
|
||||
(simplify
|
||||
(convert
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
index a984fa560..5c0c9cc49 100755
|
||||
--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
@@ -52,4 +52,4 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
/* { dg-final { scan-assembler-times {[us]min\t} 6 } } */
|
||||
/* All of the vectorized patterns are expected to be matched. */
|
||||
/* { dg-final { scan-assembler-not {cmtst\t} } } */
|
||||
-/* { dg-final { scan-assembler-times {uzp1\t} 5 } } */
|
||||
+/* { dg-final { scan-assembler-times {uzp1\t} 2 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
3968
0047-AES-Implement-AES-pattern-matching.patch
Normal file
3968
0047-AES-Implement-AES-pattern-matching.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,27 @@
|
||||
From 915d549b03c10ab403538888149facd417a02ebc Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 27 Dec 2023 23:31:26 +0800
|
||||
Subject: [PATCH 16/18] [crypto-accel] add optimization level requirement to
|
||||
the gate
|
||||
|
||||
fix issue (src-openEuler/gcc: I8RRDW)
|
||||
---
|
||||
gcc/crypto-accel.cc | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/crypto-accel.cc b/gcc/crypto-accel.cc
|
||||
index f4e810a6b..e7766a585 100644
|
||||
--- a/gcc/crypto-accel.cc
|
||||
+++ b/gcc/crypto-accel.cc
|
||||
@@ -2391,7 +2391,7 @@ public:
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
- if (flag_crypto_accel_aes <= 0)
|
||||
+ if (flag_crypto_accel_aes <= 0 || optimize < 1)
|
||||
return false;
|
||||
return targetm.get_v16qi_mode
|
||||
&& targetm.gen_rev32v16qi
|
||||
--
|
||||
2.33.0
|
||||
|
||||
239
0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
Normal file
239
0049-Add-more-flexible-check-for-pointer-aliasing-during-.patch
Normal file
@ -0,0 +1,239 @@
|
||||
From b5865aef36ebaac87ae30d51f08bfe081795ed67 Mon Sep 17 00:00:00 2001
|
||||
From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
|
||||
Date: Tue, 12 Mar 2024 23:30:56 +0800
|
||||
Subject: [PATCH 17/18] Add more flexible check for pointer aliasing during
|
||||
vectorization It takes minimum between number of iteration and segment length
|
||||
it helps to speed up loops with small number of iterations when only tail can
|
||||
be vectorized
|
||||
|
||||
---
|
||||
gcc/params.opt | 5 ++
|
||||
.../sve/var_stride_flexible_segment_len_1.c | 23 +++++++
|
||||
gcc/tree-data-ref.cc | 67 +++++++++++++------
|
||||
gcc/tree-data-ref.h | 11 ++-
|
||||
gcc/tree-vect-data-refs.cc | 14 +++-
|
||||
5 files changed, 95 insertions(+), 25 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 6176d4790..7e5c119cf 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -1180,6 +1180,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop.
|
||||
Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization
|
||||
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check.
|
||||
|
||||
+-param=vect-alias-flexible-segment-len=
|
||||
+Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization
|
||||
+Use a minimum length of different segments. Currenlty the minimum between
|
||||
+iteration number and vectorization length is chosen by this param.
|
||||
+
|
||||
-param=vect-max-version-for-alignment-checks=
|
||||
Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
|
||||
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
new file mode 100644
|
||||
index 000000000..894f075f3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
@@ -0,0 +1,23 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */
|
||||
+
|
||||
+#define TYPE int
|
||||
+#define SIZE 257
|
||||
+
|
||||
+void __attribute__ ((weak))
|
||||
+f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
|
||||
+{
|
||||
+ for (int i = 0; i < SIZE; ++i)
|
||||
+ x[i * n] += y[i * n];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
|
||||
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
|
||||
+ an overlap check that multiplies by (257-1)*4. */
|
||||
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
|
||||
+/* One range check and a check for n being zero. */
|
||||
+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */
|
||||
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
|
||||
diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
|
||||
index 397792c35..e6ae9e847 100644
|
||||
--- a/gcc/tree-data-ref.cc
|
||||
+++ b/gcc/tree-data-ref.cc
|
||||
@@ -2329,31 +2329,15 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
|
||||
same arguments. Try to optimize cases in which the second access
|
||||
is a write and in which some overlap is valid. */
|
||||
|
||||
-static bool
|
||||
-create_waw_or_war_checks (tree *cond_expr,
|
||||
+static void
|
||||
+create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a,
|
||||
const dr_with_seg_len_pair_t &alias_pair)
|
||||
{
|
||||
const dr_with_seg_len& dr_a = alias_pair.first;
|
||||
const dr_with_seg_len& dr_b = alias_pair.second;
|
||||
|
||||
- /* Check for cases in which:
|
||||
-
|
||||
- (a) DR_B is always a write;
|
||||
- (b) the accesses are well-ordered in both the original and new code
|
||||
- (see the comment above the DR_ALIAS_* flags for details); and
|
||||
- (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
|
||||
- if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
|
||||
- return false;
|
||||
-
|
||||
- /* Check for equal (but possibly variable) steps. */
|
||||
tree step = DR_STEP (dr_a.dr);
|
||||
- if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
|
||||
- return false;
|
||||
-
|
||||
- /* Make sure that we can operate on sizetype without loss of precision. */
|
||||
tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
|
||||
- if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
|
||||
- return false;
|
||||
|
||||
/* All addresses involved are known to have a common alignment ALIGN.
|
||||
We can therefore subtract ALIGN from an exclusive endpoint to get
|
||||
@@ -2370,9 +2354,6 @@ create_waw_or_war_checks (tree *cond_expr,
|
||||
fold_convert (ssizetype, indicator),
|
||||
ssize_int (0));
|
||||
|
||||
- /* Get lengths in sizetype. */
|
||||
- tree seg_len_a
|
||||
- = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
|
||||
step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
|
||||
|
||||
/* Each access has the following pattern:
|
||||
@@ -2479,6 +2460,50 @@ create_waw_or_war_checks (tree *cond_expr,
|
||||
*cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
|
||||
if (dump_enabled_p ())
|
||||
dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
|
||||
+}
|
||||
+
|
||||
+/* This is a wrapper function for create_waw_or_war_checks2. */
|
||||
+static bool
|
||||
+create_waw_or_war_checks (tree *cond_expr,
|
||||
+ const dr_with_seg_len_pair_t &alias_pair)
|
||||
+{
|
||||
+ const dr_with_seg_len& dr_a = alias_pair.first;
|
||||
+ const dr_with_seg_len& dr_b = alias_pair.second;
|
||||
+
|
||||
+ /* Check for cases in which:
|
||||
+
|
||||
+ (a) DR_B is always a write;
|
||||
+ (b) the accesses are well-ordered in both the original and new code
|
||||
+ (see the comment above the DR_ALIAS_* flags for details); and
|
||||
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
|
||||
+ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Check for equal (but possibly variable) steps. */
|
||||
+ tree step = DR_STEP (dr_a.dr);
|
||||
+ if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Make sure that we can operate on sizetype without loss of precision. */
|
||||
+ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
|
||||
+ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Get lengths in sizetype. */
|
||||
+ tree seg_len_a
|
||||
+ = fold_convert (sizetype,
|
||||
+ rewrite_to_non_trapping_overflow (dr_a.seg_len));
|
||||
+ create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair);
|
||||
+ if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2)
|
||||
+ {
|
||||
+ tree seg_len2_a
|
||||
+ = fold_convert (sizetype,
|
||||
+ rewrite_to_non_trapping_overflow (dr_a.seg_len2));
|
||||
+ tree cond_expr2;
|
||||
+ create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair);
|
||||
+ *cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
|
||||
+ *cond_expr, cond_expr2);
|
||||
+ }
|
||||
return true;
|
||||
}
|
||||
|
||||
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
|
||||
index f643a95b2..9bc5f16ee 100644
|
||||
--- a/gcc/tree-data-ref.h
|
||||
+++ b/gcc/tree-data-ref.h
|
||||
@@ -213,12 +213,19 @@ class dr_with_seg_len
|
||||
public:
|
||||
dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
|
||||
unsigned int a)
|
||||
- : dr (d), seg_len (len), access_size (size), align (a) {}
|
||||
-
|
||||
+ : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a)
|
||||
+ {}
|
||||
+ dr_with_seg_len (data_reference_p d, tree len, tree len2,
|
||||
+ unsigned HOST_WIDE_INT size, unsigned int a)
|
||||
+ : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a)
|
||||
+ {}
|
||||
data_reference_p dr;
|
||||
/* The offset of the last access that needs to be checked minus
|
||||
the offset of the first. */
|
||||
tree seg_len;
|
||||
+ /* The second version of segment length. Currently this is used to
|
||||
+ soften checks for a small number of iterations. */
|
||||
+ tree seg_len2;
|
||||
/* A value that, when added to abs (SEG_LEN), gives the total number of
|
||||
bytes in the segment. */
|
||||
poly_uint64 access_size;
|
||||
diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
|
||||
index 4e615b80b..04e68f621 100644
|
||||
--- a/gcc/tree-vect-data-refs.cc
|
||||
+++ b/gcc/tree-vect-data-refs.cc
|
||||
@@ -3646,6 +3646,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
{
|
||||
poly_uint64 lower_bound;
|
||||
tree segment_length_a, segment_length_b;
|
||||
+ tree segment_length2_a, segment_length2_b;
|
||||
unsigned HOST_WIDE_INT access_size_a, access_size_b;
|
||||
unsigned int align_a, align_b;
|
||||
|
||||
@@ -3751,6 +3752,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
{
|
||||
segment_length_a = size_zero_node;
|
||||
segment_length_b = size_zero_node;
|
||||
+ segment_length2_a = size_zero_node;
|
||||
+ segment_length2_b = size_zero_node;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -3759,8 +3762,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
length_factor = scalar_loop_iters;
|
||||
else
|
||||
length_factor = size_int (vect_factor);
|
||||
+ /* In any case we should rememeber scalar_loop_iters
|
||||
+ this helps to create flexible aliasing check
|
||||
+ for small number of iterations. */
|
||||
segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
|
||||
segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
|
||||
+ segment_length2_a
|
||||
+ = vect_vfa_segment_size (dr_info_a, scalar_loop_iters);
|
||||
+ segment_length2_b
|
||||
+ = vect_vfa_segment_size (dr_info_b, scalar_loop_iters);
|
||||
}
|
||||
access_size_a = vect_vfa_access_size (loop_vinfo, dr_info_a);
|
||||
access_size_b = vect_vfa_access_size (loop_vinfo, dr_info_b);
|
||||
@@ -3805,9 +3815,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
}
|
||||
|
||||
dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
|
||||
- access_size_a, align_a);
|
||||
+ segment_length2_a, access_size_a, align_a);
|
||||
dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
|
||||
- access_size_b, align_b);
|
||||
+ segment_length2_b, access_size_b, align_b);
|
||||
/* Canonicalize the order to be the one that's needed for accurate
|
||||
RAW, WAR and WAW flags, in cases where the data references are
|
||||
well-ordered. The order doesn't really matter otherwise,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2071
0050-Port-IPA-prefetch-to-GCC-12.patch
Normal file
2071
0050-Port-IPA-prefetch-to-GCC-12.patch
Normal file
File diff suppressed because it is too large
Load Diff
2216
0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
Normal file
2216
0051-Port-fixes-for-IPA-prefetch-to-GCC-12.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,94 @@
|
||||
From 0263daa1312d0cdcdf9c770bcf5d982a2d4fc16b Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Fri, 29 Mar 2024 17:15:41 +0800
|
||||
Subject: [PATCH 2/2] Fix fails in IPA prefetch (src-openEuler/gcc: I96ID7)
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.cc | 28 ++++++++++++++++++++++++++--
|
||||
1 file changed, 26 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
|
||||
index 9537e4835..1ceb5137f 100644
|
||||
--- a/gcc/ipa-prefetch.cc
|
||||
+++ b/gcc/ipa-prefetch.cc
|
||||
@@ -366,6 +366,7 @@ typedef std::map<memref_t *, memref_t *> memref_map;
|
||||
typedef std::map<memref_t *, tree> memref_tree_map;
|
||||
|
||||
typedef std::set<gimple *> stmt_set;
|
||||
+typedef std::set<tree> tree_set;
|
||||
typedef std::map<tree, tree> tree_map;
|
||||
|
||||
tree_memref_map *tm_map;
|
||||
@@ -1124,8 +1125,21 @@ analyse_loops ()
|
||||
}
|
||||
}
|
||||
|
||||
+/* Compare memrefs by IDs; helper for qsort. */
|
||||
+
|
||||
+static int
|
||||
+memref_id_cmp (const void *p1, const void *p2)
|
||||
+{
|
||||
+ const memref_t *mr1 = *(const memref_t **) p1;
|
||||
+ const memref_t *mr2 = *(const memref_t **) p2;
|
||||
+
|
||||
+ if ((unsigned) mr1->mr_id > (unsigned) mr2->mr_id)
|
||||
+ return 1;
|
||||
+ return -1;
|
||||
+}
|
||||
+
|
||||
/* Reduce the set filtering out memrefs with the same memory references,
|
||||
- return the result vector of memrefs. */
|
||||
+ sort and return the result vector of memrefs. */
|
||||
|
||||
static void
|
||||
reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
|
||||
@@ -1162,6 +1176,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
|
||||
vec.safe_push (mr1);
|
||||
}
|
||||
}
|
||||
+ vec.qsort (memref_id_cmp);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "MRs (%d) after filtering: ", vec.length ());
|
||||
@@ -1663,10 +1678,15 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
}
|
||||
|
||||
/* Create other new vars. Insert new stmts. */
|
||||
+ vec<memref_t *> used_mr_vec = vNULL;
|
||||
for (memref_set::const_iterator it = used_mrs.begin ();
|
||||
it != used_mrs.end (); it++)
|
||||
+ used_mr_vec.safe_push (*it);
|
||||
+ used_mr_vec.qsort (memref_id_cmp);
|
||||
+
|
||||
+ for (unsigned int j = 0; j < used_mr_vec.length (); j++)
|
||||
{
|
||||
- memref_t *mr = *it;
|
||||
+ memref_t *mr = used_mr_vec[j];
|
||||
if (mr == comp_mr)
|
||||
continue;
|
||||
gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
|
||||
@@ -1702,6 +1722,7 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
local = integer_three_node;
|
||||
break;
|
||||
}
|
||||
+ tree_set prefetched_addrs;
|
||||
for (unsigned int j = 0; j < vmrs.length (); j++)
|
||||
{
|
||||
memref_t *mr = vmrs[j];
|
||||
@@ -1714,10 +1735,13 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
|
||||
if (decl_map->count (addr))
|
||||
addr = (*decl_map)[addr];
|
||||
+ if (prefetched_addrs.count (addr))
|
||||
+ continue;
|
||||
last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
|
||||
3, addr, write_p, local);
|
||||
pcalls.safe_push (last_stmt);
|
||||
gimple_seq_add_stmt (&stmts, last_stmt);
|
||||
+ prefetched_addrs.insert (addr);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1366
0053-struct-reorg-Add-Semi-Relayout.patch
Normal file
1366
0053-struct-reorg-Add-Semi-Relayout.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,28 @@
|
||||
From 9dc3df938b9ed2c27498c8548087fee1ce930366 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
|
||||
Date: Tue, 2 Apr 2024 11:08:30 +0800
|
||||
Subject: [PATCH] [Struct Reorg] Bugfix for structure pointer compression
|
||||
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index fa33f2d35..3922873f3 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -7541,9 +7541,11 @@ ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void)
|
||||
if (!type->has_legal_alloc_num)
|
||||
{
|
||||
if (current_layout_opt_level & POINTER_COMPRESSION_UNSAFE)
|
||||
+ {
|
||||
if (dump_file)
|
||||
fprintf (dump_file, " has unknown alloc size, but"
|
||||
" in unsafe mode, so");
|
||||
+ }
|
||||
else
|
||||
{
|
||||
if (dump_file)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
420
0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
Normal file
420
0055-Struct-Reorg-Port-bugfixes-to-GCC-12.3.1.patch
Normal file
@ -0,0 +1,420 @@
|
||||
From 55c547748af36ffc3f2d5ed154a91fb3fcb8431c Mon Sep 17 00:00:00 2001
|
||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
||||
Date: Thu, 11 Apr 2024 15:49:59 +0800
|
||||
Subject: [PATCH] [Struct Reorg] Port bugfixes to GCC 12.3.1
|
||||
|
||||
Migrated from commits in GCC10.3.1:
|
||||
https://gitee.com/openeuler/gcc/commit/41af6d361a6d85ef4fce8a8438113d765596afdd
|
||||
https://gitee.com/openeuler/gcc/commit/25d74b98caeaae881e374924886ee664aa1af5bc
|
||||
https://gitee.com/openeuler/gcc/commit/b5a3bfe92f96cd0d2224d80ac4eaa80dab1bd6bf
|
||||
https://gitee.com/openeuler/gcc/commit/708ffe6f132ee39441b66b6ab6b98847d35916b7
|
||||
https://gitee.com/openeuler/gcc/commit/e875e4e7f3716aa268ffbbf55ee199ec82b6aeba
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 97 ++++++++++---------
|
||||
gcc/testsuite/gcc.dg/struct/dfe_escape.c | 50 ++++++++++
|
||||
gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c | 69 +++++++++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 2 +
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 ++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 +++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 +++++
|
||||
7 files changed, 243 insertions(+), 46 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index 6a202b4bd..f03d1d875 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -466,10 +466,19 @@ srtype::has_dead_field (void)
|
||||
unsigned i;
|
||||
FOR_EACH_VEC_ELT (fields, i, this_field)
|
||||
{
|
||||
- if (!(this_field->field_access & READ_FIELD))
|
||||
- {
|
||||
- may_dfe = true;
|
||||
- break;
|
||||
+ /* Function pointer members are not processed, because DFE
|
||||
+ does not currently support accurate analysis of function
|
||||
+ pointers, and we have not identified specific use cases. */
|
||||
+ if (!(this_field->field_access & READ_FIELD)
|
||||
+ && !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
|
||||
+ {
|
||||
+ /* Fields with escape risks should not be processed. */
|
||||
+ if (this_field->type == NULL
|
||||
+ || (this_field->type->escapes == does_not_escape))
|
||||
+ {
|
||||
+ may_dfe = true;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
return may_dfe;
|
||||
@@ -1032,8 +1041,13 @@ srtype::create_new_type (void)
|
||||
{
|
||||
srfield *f = fields[i];
|
||||
if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
|
||||
- && !(f->field_access & READ_FIELD))
|
||||
- continue;
|
||||
+ && !(f->field_access & READ_FIELD)
|
||||
+ && !FUNCTION_POINTER_TYPE_P (f->fieldtype))
|
||||
+ {
|
||||
+ /* Fields with escape risks should not be processed. */
|
||||
+ if (f->type == NULL || (f->type->escapes == does_not_escape))
|
||||
+ continue;
|
||||
+ }
|
||||
f->create_new_fields (newtype, newfields, newlast);
|
||||
}
|
||||
|
||||
@@ -3815,9 +3829,17 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other,
|
||||
if (VOID_POINTER_P (TREE_TYPE (side))
|
||||
&& TREE_CODE (side) == SSA_NAME)
|
||||
{
|
||||
- /* The type is other, the declaration is side. */
|
||||
- current_function->record_decl (type, side, -1,
|
||||
- isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
|
||||
+ tree inner = SSA_NAME_VAR (side);
|
||||
+ if (inner)
|
||||
+ {
|
||||
+ srdecl *in = find_decl (inner);
|
||||
+ if (in && !in->type->has_escaped ())
|
||||
+ {
|
||||
+ /* The type is other, the declaration is side. */
|
||||
+ current_function->record_decl (type, side, -1,
|
||||
+ isptrptr (TREE_TYPE (other)) ? TREE_TYPE (other) : NULL);
|
||||
+ }
|
||||
+ }
|
||||
}
|
||||
else
|
||||
/* *_1 = &MEM[(void *)&x + 8B]. */
|
||||
@@ -3910,6 +3932,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
|
||||
maybe_mark_or_record_other_side (rhs, lhs, stmt);
|
||||
if (TREE_CODE (lhs) == SSA_NAME)
|
||||
maybe_mark_or_record_other_side (lhs, rhs, stmt);
|
||||
+
|
||||
+ /* Handle missing ARRAY_REF cases. */
|
||||
+ if (TREE_CODE (lhs) == ARRAY_REF)
|
||||
+ mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
|
||||
+ if (TREE_CODE (rhs) == ARRAY_REF)
|
||||
+ mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5272,8 +5300,11 @@ ipa_struct_reorg::record_accesses (void)
|
||||
record_function (cnode);
|
||||
else
|
||||
{
|
||||
- tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
|
||||
- mark_type_as_escape (return_type, escape_return, NULL);
|
||||
+ if (cnode->externally_visible)
|
||||
+ {
|
||||
+ tree return_type = TREE_TYPE (TREE_TYPE (cnode->decl));
|
||||
+ mark_type_as_escape (return_type, escape_return, NULL);
|
||||
+ }
|
||||
}
|
||||
|
||||
}
|
||||
@@ -5889,6 +5920,7 @@ ipa_struct_reorg::rewrite_expr (tree expr,
|
||||
bool escape_from_base = false;
|
||||
|
||||
tree newbase[max_split];
|
||||
+ memset (newbase, 0, sizeof (tree[max_split]));
|
||||
memset (newexpr, 0, sizeof (tree[max_split]));
|
||||
|
||||
if (TREE_CODE (expr) == CONSTRUCTOR)
|
||||
@@ -6912,7 +6944,7 @@ create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr,
|
||||
}
|
||||
|
||||
tree
|
||||
-ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt,
|
||||
+ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt ATTRIBUTE_UNUSED,
|
||||
gimple_stmt_iterator *gsi,
|
||||
tree ptr, tree offset,
|
||||
srtype *type)
|
||||
@@ -7889,41 +7921,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
|
||||
should be removed. */
|
||||
|
||||
bool
|
||||
-ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
|
||||
+ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
|
||||
{
|
||||
- if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
|
||||
- /* Delete debug gimple now. */
|
||||
- return true;
|
||||
- bool remove = false;
|
||||
- if (gimple_debug_bind_p (stmt))
|
||||
- {
|
||||
- tree var = gimple_debug_bind_get_var (stmt);
|
||||
- tree newvar[max_split];
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- if (gimple_debug_bind_has_value_p (stmt))
|
||||
- {
|
||||
- var = gimple_debug_bind_get_value (stmt);
|
||||
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
|
||||
- var = TREE_OPERAND (var, 0);
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- }
|
||||
- }
|
||||
- else if (gimple_debug_source_bind_p (stmt))
|
||||
- {
|
||||
- tree var = gimple_debug_source_bind_get_var (stmt);
|
||||
- tree newvar[max_split];
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- var = gimple_debug_source_bind_get_value (stmt);
|
||||
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
|
||||
- var = TREE_OPERAND (var, 0);
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- }
|
||||
-
|
||||
- return remove;
|
||||
+ /* In debug statements, there might be some statements that have
|
||||
+ been optimized out in gimple but left in debug gimple. Sometimes
|
||||
+ these statements need to be analyzed to escape, but in rewrite
|
||||
+ stage it shouldn't happen. It needs to care a lot to handle these
|
||||
+ cases but seems useless. So now we just delete debug gimple. */
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* Rewrite PHI nodes, return true if the PHI was replaced. */
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
new file mode 100644
|
||||
index 000000000..09efe8027
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/* { dg-do compile } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+typedef struct arc arc_t;
|
||||
+typedef struct arc *arc_p;
|
||||
+
|
||||
+typedef struct network
|
||||
+{
|
||||
+ int x;
|
||||
+} network_t;
|
||||
+
|
||||
+struct arc
|
||||
+{
|
||||
+ int flow;
|
||||
+ network_t* net_add;
|
||||
+};
|
||||
+
|
||||
+const int MAX = 100;
|
||||
+
|
||||
+/* let it escape_array, "Type is used in an array [not handled yet]". */
|
||||
+network_t* net[2];
|
||||
+arc_p stop_arcs = NULL;
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ net[0] = (network_t*) calloc (1, sizeof(network_t));
|
||||
+ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
|
||||
+
|
||||
+ net[0]->x = 100;
|
||||
+
|
||||
+ for (unsigned i = 0; i < 3; i++)
|
||||
+ {
|
||||
+ net[0]->x = net[0]->x + 2;
|
||||
+ stop_arcs->flow = net[0]->x / 2;
|
||||
+ stop_arcs->flow = stop_arcs->flow + 20;
|
||||
+ stop_arcs->net_add = net[0];
|
||||
+ stop_arcs++;
|
||||
+ }
|
||||
+
|
||||
+ if( net[1] != 0 && stop_arcs != 0)
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
|
||||
new file mode 100644
|
||||
index 000000000..74ea93bbc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_func_ptr.c
|
||||
@@ -0,0 +1,69 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-do run } */
|
||||
+
|
||||
+#include <stdlib.h>
|
||||
+#include <stdio.h>
|
||||
+
|
||||
+#ifdef STACK_SIZE
|
||||
+#if STACK_SIZE > 16000
|
||||
+#define N 1000
|
||||
+#else
|
||||
+#define N (STACK_SIZE/16)
|
||||
+#endif
|
||||
+#else
|
||||
+#define N 1000
|
||||
+#endif
|
||||
+
|
||||
+int num;
|
||||
+
|
||||
+int (*foo)(int d);
|
||||
+int f (int t);
|
||||
+
|
||||
+typedef struct str_t str_t1;
|
||||
+struct str_t
|
||||
+{
|
||||
+ int a;
|
||||
+ float b;
|
||||
+ int (*foo)(int d);
|
||||
+};
|
||||
+
|
||||
+int main ()
|
||||
+{
|
||||
+ int i, r;
|
||||
+ r = rand ();
|
||||
+ num = r > N ? N : r;
|
||||
+ str_t1 * p1 = calloc (num, sizeof (str_t1));
|
||||
+ if (p1 == NULL)
|
||||
+ return 0;
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ {
|
||||
+ p1[i].foo = malloc (1 * sizeof (f));
|
||||
+ p1[i].foo = f;
|
||||
+ p1[i].foo (i);
|
||||
+ }
|
||||
+
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ p1[i].a = 1;
|
||||
+
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ p1[i].b = 2;
|
||||
+
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ if (p1[i].a != 1)
|
||||
+ abort ();
|
||||
+
|
||||
+ for (i = 0; i < num; i++)
|
||||
+ if (abs (p1[i].b - 2) > 0.0001)
|
||||
+ abort ();
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+int f (int t)
|
||||
+{
|
||||
+ if ( t < 0)
|
||||
+ abort ();
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
||||
index c5a955b00..687f6609f 100644
|
||||
--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
|
||||
@@ -46,6 +46,8 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf_*.c]] \
|
||||
# -fipa-struct-reorg=3
|
||||
gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
|
||||
"" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
||||
+gcc-dg-runtest $srcdir/$subdir/struct_reorg-7.c \
|
||||
+ "" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
|
||||
|
||||
# -fipa-struct-reorg=4
|
||||
gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
new file mode 100644
|
||||
index 000000000..ec422f76f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct a {
|
||||
+ int b;
|
||||
+ char c;
|
||||
+};
|
||||
+struct {
|
||||
+ double d;
|
||||
+ _Bool e;
|
||||
+} * f;
|
||||
+struct g {
|
||||
+ struct a h;
|
||||
+} i;
|
||||
+long j;
|
||||
+void k();
|
||||
+void l() { k(i); }
|
||||
+void k(struct a m) {
|
||||
+ f->e = 0;
|
||||
+ for (;;)
|
||||
+ l();
|
||||
+}
|
||||
+int main() {
|
||||
+ for (; j; f = 0) {
|
||||
+ struct g *n = 0;
|
||||
+ char o = n->h.c;
|
||||
+ }
|
||||
+ l();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
new file mode 100644
|
||||
index 000000000..3e42aa84a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct a {
|
||||
+ int b;
|
||||
+ double c;
|
||||
+};
|
||||
+struct d {
|
||||
+ struct a e;
|
||||
+};
|
||||
+int f;
|
||||
+int main() {
|
||||
+ _Bool g;
|
||||
+ struct d **h = 0;
|
||||
+ g = *h += f;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
new file mode 100644
|
||||
index 000000000..d434f9fe0
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct foo {
|
||||
+ long element1;
|
||||
+ long element2;
|
||||
+};
|
||||
+
|
||||
+struct goo {
|
||||
+ struct foo element_foo;
|
||||
+};
|
||||
+
|
||||
+struct goo g1;
|
||||
+
|
||||
+void func () {
|
||||
+ struct foo (*local)[] = 0;
|
||||
+ long idx;
|
||||
+ (g1).element_foo = (*local)[idx];
|
||||
+}
|
||||
+
|
||||
+struct foo g2;
|
||||
+int main () {
|
||||
+ func ();
|
||||
+ g2 = g1.element_foo;
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,27 @@
|
||||
From fa6f80044dcebd28506e871e6e5d25e2dfd7e105 Mon Sep 17 00:00:00 2001
|
||||
From: tiancheng-bao <baotiancheng1@huawei.com>
|
||||
Date: Fri, 12 Apr 2024 15:09:28 +0800
|
||||
Subject: [PATCH 01/32] Fix bug that verifying gimple failed when reorg-level >
|
||||
5
|
||||
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index f03d1d875..e08577c0c 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -7461,6 +7461,9 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
|
||||
continue;
|
||||
tree lhs_expr = newlhs[i] ? newlhs[i] : lhs;
|
||||
tree rhs_expr = newrhs[i] ? newrhs[i] : rhs;
|
||||
+ if (!useless_type_conversion_p (TREE_TYPE (lhs_expr),
|
||||
+ TREE_TYPE (rhs_expr)))
|
||||
+ rhs_expr = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (lhs_expr), rhs_expr);
|
||||
gimple *newstmt = gimple_build_assign (lhs_expr, rhs_expr);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
90
0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
Normal file
90
0057-AutoFdo-Fix-memory-leaks-in-autofdo.patch
Normal file
@ -0,0 +1,90 @@
|
||||
From 13e82fccba781b29e55a6e1934986514019b728d Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao <zhaozhenyu17@huawei.com>
|
||||
Date: Sun, 24 Mar 2024 20:42:27 +0800
|
||||
Subject: [PATCH 02/32] [AutoFdo] Fix memory leaks in autofdo
|
||||
|
||||
---
|
||||
gcc/final.cc | 22 ++++++++++++++--------
|
||||
1 file changed, 14 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/final.cc b/gcc/final.cc
|
||||
index d4c4fa08f..af4e529bb 100644
|
||||
--- a/gcc/final.cc
|
||||
+++ b/gcc/final.cc
|
||||
@@ -4402,12 +4402,15 @@ get_fdo_count_quality (profile_count count)
|
||||
return profile_quality[count.quality ()];
|
||||
}
|
||||
|
||||
-static const char *
|
||||
+/* If the function is not public, return the function_name/file_name for
|
||||
+ disambiguation of local symbols since there could be identical function
|
||||
+ names coming from identical file names. The caller needs to free memory. */
|
||||
+static char *
|
||||
alias_local_functions (const char *fnname)
|
||||
{
|
||||
if (TREE_PUBLIC (cfun->decl))
|
||||
{
|
||||
- return fnname;
|
||||
+ return concat (fnname, NULL);
|
||||
}
|
||||
return concat (fnname, "/", lbasename (dump_base_name), NULL);
|
||||
}
|
||||
@@ -4457,12 +4460,13 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
||||
|
||||
if (callee)
|
||||
{
|
||||
+ char *func_name =
|
||||
+ alias_local_functions (get_fnname_from_decl (callee));
|
||||
fprintf (asm_out_file, "\t.string \"%x\"\n",
|
||||
INSN_ADDRESSES (INSN_UID (insn)));
|
||||
|
||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
- ASM_FDO_CALLEE_FLAG,
|
||||
- alias_local_functions (get_fnname_from_decl (callee)));
|
||||
+ ASM_FDO_CALLEE_FLAG, func_name);
|
||||
|
||||
fprintf (asm_out_file,
|
||||
"\t.string \"" HOST_WIDE_INT_PRINT_DEC "\"\n",
|
||||
@@ -4472,9 +4476,9 @@ dump_direct_callee_info_to_asm (basic_block bb, gcov_type call_count)
|
||||
{
|
||||
fprintf (dump_file, "call: %x --> %s \n",
|
||||
INSN_ADDRESSES (INSN_UID (insn)),
|
||||
- alias_local_functions
|
||||
- (get_fnname_from_decl (callee)));
|
||||
+ func_name);
|
||||
}
|
||||
+ free (func_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4547,8 +4551,9 @@ dump_bb_info_to_asm (basic_block bb, gcov_type bb_count)
|
||||
static void
|
||||
dump_function_info_to_asm (const char *fnname)
|
||||
{
|
||||
+ char *func_name = alias_local_functions (fnname);
|
||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
- ASM_FDO_CALLER_FLAG, alias_local_functions (fnname));
|
||||
+ ASM_FDO_CALLER_FLAG, func_name);
|
||||
fprintf (asm_out_file, "\t.string \"%s%d\"\n",
|
||||
ASM_FDO_CALLER_SIZE_FLAG, get_function_end_addr ());
|
||||
fprintf (asm_out_file, "\t.string \"%s%s\"\n",
|
||||
@@ -4557,7 +4562,7 @@ dump_function_info_to_asm (const char *fnname)
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "\n FUNC_NAME: %s\n",
|
||||
- alias_local_functions (fnname));
|
||||
+ func_name);
|
||||
fprintf (dump_file, " file: %s\n",
|
||||
dump_base_name);
|
||||
fprintf (dump_file, "profile_status: %s\n",
|
||||
@@ -4567,6 +4572,7 @@ dump_function_info_to_asm (const char *fnname)
|
||||
fprintf (dump_file, " function_bind: %s\n",
|
||||
simple_get_function_bind ());
|
||||
}
|
||||
+ free (func_name);
|
||||
}
|
||||
|
||||
/* Dump function profile into form AutoFDO or PGO to asm. */
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
321
0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
Normal file
321
0086-Modfify-cost-calculation-for-dealing-with-equivalenc.patch
Normal file
@ -0,0 +1,321 @@
|
||||
From c546aad5d38165e2962456525a0f6a427e03583b Mon Sep 17 00:00:00 2001
|
||||
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
|
||||
Date: Thu, 26 Oct 2023 09:50:40 -0400
|
||||
Subject: [PATCH 31/32] Modfify cost calculation for dealing with equivalences
|
||||
|
||||
RISCV target developers reported that pseudos with equivalence used in
|
||||
a loop can be spilled. Simple changes of heuristics of cost
|
||||
calculation of pseudos with equivalence or even ignoring equivalences
|
||||
resulted in numerous testsuite failures on different targets or worse
|
||||
spec2017 performance. This patch implements more sophisticated cost
|
||||
calculations of pseudos with equivalences. The patch does not change
|
||||
RA behaviour for targets still using the old reload pass instead of
|
||||
LRA. The patch solves the reported problem and improves x86-64
|
||||
specint2017 a bit (specfp2017 performance stays the same). The patch
|
||||
takes into account how the equivalence will be used: will it be
|
||||
integrated into the user insns or require an input reload insn. It
|
||||
requires additional pass over insns. To compensate RA slow down, the
|
||||
patch removes a pass over insns in the reload pass used by IRA before.
|
||||
This also decouples IRA from reload more and will help to remove the
|
||||
reload pass in the future if it ever happens.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* dwarf2out.cc (reg_loc_descriptor): Use lra_eliminate_regs when
|
||||
LRA is used.
|
||||
* ira-costs.cc: Include regset.h.
|
||||
(equiv_can_be_consumed_p, get_equiv_regno, calculate_equiv_gains):
|
||||
New functions.
|
||||
(find_costs_and_classes): Call calculate_equiv_gains and redefine
|
||||
mem_cost of pseudos with equivs when LRA is used.
|
||||
* var-tracking.cc: Include ira.h and lra.h.
|
||||
(vt_initialize): Use lra_eliminate_regs when LRA is used.
|
||||
---
|
||||
gcc/dwarf2out.cc | 4 +-
|
||||
gcc/ira-costs.cc | 169 ++++++++++++++++++++++++++++++++++++++++++--
|
||||
gcc/var-tracking.cc | 14 +++-
|
||||
3 files changed, 179 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/gcc/dwarf2out.cc b/gcc/dwarf2out.cc
|
||||
index 0a5c081d8..f0f6f4fd4 100644
|
||||
--- a/gcc/dwarf2out.cc
|
||||
+++ b/gcc/dwarf2out.cc
|
||||
@@ -14263,7 +14263,9 @@ reg_loc_descriptor (rtx rtl, enum var_init_status initialized)
|
||||
argument pointer and soft frame pointer rtx's.
|
||||
Use DW_OP_fbreg offset DW_OP_stack_value in this case. */
|
||||
if ((rtl == arg_pointer_rtx || rtl == frame_pointer_rtx)
|
||||
- && eliminate_regs (rtl, VOIDmode, NULL_RTX) != rtl)
|
||||
+ && (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (rtl, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (rtl, VOIDmode, NULL_RTX)) != rtl)
|
||||
{
|
||||
dw_loc_descr_ref result = NULL;
|
||||
|
||||
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
|
||||
index 642fda529..c79311783 100644
|
||||
--- a/gcc/ira-costs.cc
|
||||
+++ b/gcc/ira-costs.cc
|
||||
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tm_p.h"
|
||||
#include "insn-config.h"
|
||||
#include "regs.h"
|
||||
+#include "regset.h"
|
||||
#include "ira.h"
|
||||
#include "ira-int.h"
|
||||
#include "addresses.h"
|
||||
@@ -1750,6 +1751,145 @@ process_bb_node_for_costs (ira_loop_tree_node_t loop_tree_node)
|
||||
process_bb_for_costs (bb);
|
||||
}
|
||||
|
||||
+/* Check that reg REGNO can be changed by TO in INSN. Return true in case the
|
||||
+ result insn would be valid one. */
|
||||
+static bool
|
||||
+equiv_can_be_consumed_p (int regno, rtx to, rtx_insn *insn)
|
||||
+{
|
||||
+ validate_replace_src_group (regno_reg_rtx[regno], to, insn);
|
||||
+ bool res = verify_changes (0);
|
||||
+ cancel_changes (0);
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* Return true if X contains a pseudo with equivalence. In this case also
|
||||
+ return the pseudo through parameter REG. If the pseudo is a part of subreg,
|
||||
+ return the subreg through parameter SUBREG. */
|
||||
+
|
||||
+static bool
|
||||
+get_equiv_regno (rtx x, int ®no, rtx &subreg)
|
||||
+{
|
||||
+ subreg = NULL_RTX;
|
||||
+ if (GET_CODE (x) == SUBREG)
|
||||
+ {
|
||||
+ subreg = x;
|
||||
+ x = SUBREG_REG (x);
|
||||
+ }
|
||||
+ if (REG_P (x)
|
||||
+ && (ira_reg_equiv[REGNO (x)].memory != NULL
|
||||
+ || ira_reg_equiv[REGNO (x)].constant != NULL))
|
||||
+ {
|
||||
+ regno = REGNO (x);
|
||||
+ return true;
|
||||
+ }
|
||||
+ RTX_CODE code = GET_CODE (x);
|
||||
+ const char *fmt = GET_RTX_FORMAT (code);
|
||||
+
|
||||
+ for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
||||
+ if (fmt[i] == 'e')
|
||||
+ {
|
||||
+ if (get_equiv_regno (XEXP (x, i), regno, subreg))
|
||||
+ return true;
|
||||
+ }
|
||||
+ else if (fmt[i] == 'E')
|
||||
+ {
|
||||
+ for (int j = 0; j < XVECLEN (x, i); j++)
|
||||
+ if (get_equiv_regno (XVECEXP (x, i, j), regno, subreg))
|
||||
+ return true;
|
||||
+ }
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
+/* A pass through the current function insns. Calculate costs of using
|
||||
+ equivalences for pseudos and store them in regno_equiv_gains. */
|
||||
+
|
||||
+static void
|
||||
+calculate_equiv_gains (void)
|
||||
+{
|
||||
+ basic_block bb;
|
||||
+ int regno, freq, cost;
|
||||
+ rtx subreg;
|
||||
+ rtx_insn *insn;
|
||||
+ machine_mode mode;
|
||||
+ enum reg_class rclass;
|
||||
+ bitmap_head equiv_pseudos;
|
||||
+
|
||||
+ ira_assert (allocno_p);
|
||||
+ bitmap_initialize (&equiv_pseudos, ®_obstack);
|
||||
+ for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
|
||||
+ if (ira_reg_equiv[regno].init_insns != NULL
|
||||
+ && (ira_reg_equiv[regno].memory != NULL
|
||||
+ || (ira_reg_equiv[regno].constant != NULL
|
||||
+ /* Ignore complicated constants which probably will be placed
|
||||
+ in memory: */
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_DOUBLE
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != CONST_VECTOR
|
||||
+ && GET_CODE (ira_reg_equiv[regno].constant) != LABEL_REF)))
|
||||
+ {
|
||||
+ rtx_insn_list *x;
|
||||
+ for (x = ira_reg_equiv[regno].init_insns; x != NULL; x = x->next ())
|
||||
+ {
|
||||
+ insn = x->insn ();
|
||||
+ rtx set = single_set (insn);
|
||||
+
|
||||
+ if (set == NULL_RTX || SET_DEST (set) != regno_reg_rtx[regno])
|
||||
+ break;
|
||||
+ bb = BLOCK_FOR_INSN (insn);
|
||||
+ ira_curr_regno_allocno_map
|
||||
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
|
||||
+ mode = PSEUDO_REGNO_MODE (regno);
|
||||
+ rclass = pref[COST_INDEX (regno)];
|
||||
+ ira_init_register_move_cost_if_necessary (mode);
|
||||
+ if (ira_reg_equiv[regno].memory != NULL)
|
||||
+ cost = ira_memory_move_cost[mode][rclass][1];
|
||||
+ else
|
||||
+ cost = ira_register_move_cost[mode][rclass][rclass];
|
||||
+ freq = REG_FREQ_FROM_BB (bb);
|
||||
+ regno_equiv_gains[regno] += cost * freq;
|
||||
+ }
|
||||
+ if (x != NULL)
|
||||
+ /* We found complicated equiv or reverse equiv mem=reg. Ignore
|
||||
+ them. */
|
||||
+ regno_equiv_gains[regno] = 0;
|
||||
+ else
|
||||
+ bitmap_set_bit (&equiv_pseudos, regno);
|
||||
+ }
|
||||
+
|
||||
+ FOR_EACH_BB_FN (bb, cfun)
|
||||
+ {
|
||||
+ freq = REG_FREQ_FROM_BB (bb);
|
||||
+ ira_curr_regno_allocno_map
|
||||
+ = ira_bb_nodes[bb->index].parent->regno_allocno_map;
|
||||
+ FOR_BB_INSNS (bb, insn)
|
||||
+ {
|
||||
+ if (!INSN_P (insn) || !get_equiv_regno (PATTERN (insn), regno, subreg)
|
||||
+ || !bitmap_bit_p (&equiv_pseudos, regno))
|
||||
+ continue;
|
||||
+ rtx subst = ira_reg_equiv[regno].memory;
|
||||
+
|
||||
+ if (subst == NULL)
|
||||
+ subst = ira_reg_equiv[regno].constant;
|
||||
+ ira_assert (subst != NULL);
|
||||
+ mode = PSEUDO_REGNO_MODE (regno);
|
||||
+ ira_init_register_move_cost_if_necessary (mode);
|
||||
+ bool consumed_p = equiv_can_be_consumed_p (regno, subst, insn);
|
||||
+
|
||||
+ rclass = pref[COST_INDEX (regno)];
|
||||
+ if (MEM_P (subst)
|
||||
+ /* If it is a change of constant into double for example, the
|
||||
+ result constant probably will be placed in memory. */
|
||||
+ || (subreg != NULL_RTX && !INTEGRAL_MODE_P (GET_MODE (subreg))))
|
||||
+ cost = ira_memory_move_cost[mode][rclass][1] + (consumed_p ? 0 : 1);
|
||||
+ else if (consumed_p)
|
||||
+ continue;
|
||||
+ else
|
||||
+ cost = ira_register_move_cost[mode][rclass][rclass];
|
||||
+ regno_equiv_gains[regno] -= cost * freq;
|
||||
+ }
|
||||
+ }
|
||||
+ bitmap_clear (&equiv_pseudos);
|
||||
+}
|
||||
+
|
||||
/* Find costs of register classes and memory for allocnos or pseudos
|
||||
and their best costs. Set up preferred, alternative and allocno
|
||||
classes for pseudos. */
|
||||
@@ -1848,6 +1988,12 @@ find_costs_and_classes (FILE *dump_file)
|
||||
if (pass == 0)
|
||||
pref = pref_buffer;
|
||||
|
||||
+ if (ira_use_lra_p && allocno_p && pass == 1)
|
||||
+ /* It is a pass through all insns. So do it once and only for RA (not
|
||||
+ for insn scheduler) when we already found preferable pseudo register
|
||||
+ classes on the previous pass. */
|
||||
+ calculate_equiv_gains ();
|
||||
+
|
||||
/* Now for each allocno look at how desirable each class is and
|
||||
find which class is preferred. */
|
||||
for (i = max_reg_num () - 1; i >= FIRST_PSEUDO_REGISTER; i--)
|
||||
@@ -1940,6 +2086,17 @@ find_costs_and_classes (FILE *dump_file)
|
||||
}
|
||||
if (i >= first_moveable_pseudo && i < last_moveable_pseudo)
|
||||
i_mem_cost = 0;
|
||||
+ else if (ira_use_lra_p)
|
||||
+ {
|
||||
+ if (equiv_savings > 0)
|
||||
+ {
|
||||
+ i_mem_cost = 0;
|
||||
+ if (ira_dump_file != NULL && internal_flag_ira_verbose > 5)
|
||||
+ fprintf (ira_dump_file,
|
||||
+ " Use MEM for r%d as the equiv savings is %d\n",
|
||||
+ i, equiv_savings);
|
||||
+ }
|
||||
+ }
|
||||
else if (equiv_savings < 0)
|
||||
i_mem_cost = -equiv_savings;
|
||||
else if (equiv_savings > 0)
|
||||
@@ -2378,7 +2535,10 @@ ira_costs (void)
|
||||
total_allocno_costs = (struct costs *) ira_allocate (max_struct_costs_size
|
||||
* ira_allocnos_num);
|
||||
initiate_regno_cost_classes ();
|
||||
- calculate_elim_costs_all_insns ();
|
||||
+ if (!ira_use_lra_p)
|
||||
+ /* Process equivs in reload to update costs through hook
|
||||
+ ira_adjust_equiv_reg_cost. */
|
||||
+ calculate_elim_costs_all_insns ();
|
||||
find_costs_and_classes (ira_dump_file);
|
||||
setup_allocno_class_and_costs ();
|
||||
finish_regno_cost_classes ();
|
||||
@@ -2503,13 +2663,14 @@ ira_tune_allocno_costs (void)
|
||||
}
|
||||
}
|
||||
|
||||
-/* Add COST to the estimated gain for eliminating REGNO with its
|
||||
- equivalence. If COST is zero, record that no such elimination is
|
||||
- possible. */
|
||||
+/* A hook from the reload pass. Add COST to the estimated gain for eliminating
|
||||
+ REGNO with its equivalence. If COST is zero, record that no such
|
||||
+ elimination is possible. */
|
||||
|
||||
void
|
||||
ira_adjust_equiv_reg_cost (unsigned regno, int cost)
|
||||
{
|
||||
+ ira_assert (!ira_use_lra_p);
|
||||
if (cost == 0)
|
||||
regno_equiv_gains[regno] = 0;
|
||||
else
|
||||
diff --git a/gcc/var-tracking.cc b/gcc/var-tracking.cc
|
||||
index 7c3ad0a55..b10c8c1eb 100644
|
||||
--- a/gcc/var-tracking.cc
|
||||
+++ b/gcc/var-tracking.cc
|
||||
@@ -107,6 +107,8 @@
|
||||
#include "cfgrtl.h"
|
||||
#include "cfganal.h"
|
||||
#include "reload.h"
|
||||
+#include "ira.h"
|
||||
+#include "lra.h"
|
||||
#include "calls.h"
|
||||
#include "tree-dfa.h"
|
||||
#include "tree-ssa.h"
|
||||
@@ -10133,7 +10135,9 @@ vt_initialize (void)
|
||||
#else
|
||||
reg = arg_pointer_rtx;
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
@@ -10153,7 +10157,9 @@ vt_initialize (void)
|
||||
reg = arg_pointer_rtx;
|
||||
fp_cfa_offset = ARG_POINTER_CFA_OFFSET (current_function_decl);
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
@@ -10185,7 +10191,9 @@ vt_initialize (void)
|
||||
#else
|
||||
reg = arg_pointer_rtx;
|
||||
#endif
|
||||
- elim = eliminate_regs (reg, VOIDmode, NULL_RTX);
|
||||
+ elim = (ira_use_lra_p
|
||||
+ ? lra_eliminate_regs (reg, VOIDmode, NULL_RTX)
|
||||
+ : eliminate_regs (reg, VOIDmode, NULL_RTX));
|
||||
if (elim != reg)
|
||||
{
|
||||
if (GET_CODE (elim) == PLUS)
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
@ -0,0 +1,49 @@
|
||||
From 4965473a4211a9feb46a0d168180ab450cb18bcc Mon Sep 17 00:00:00 2001
|
||||
From: "Vladimir N. Makarov" <vmakarov@redhat.com>
|
||||
Date: Fri, 27 Oct 2023 08:28:24 -0400
|
||||
Subject: [PATCH 32/32] Add cost calculation for reg equivalence invariants
|
||||
|
||||
My recent patch improving cost calculation for pseudos with equivalence
|
||||
resulted in failure of gcc.target/arm/eliminate.c on aarch64. This patch
|
||||
fixes this failure.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* ira-costs.cc: (get_equiv_regno, calculate_equiv_gains):
|
||||
Process reg equivalence invariants.
|
||||
---
|
||||
gcc/ira-costs.cc | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/gcc/ira-costs.cc b/gcc/ira-costs.cc
|
||||
index c79311783..d33104a30 100644
|
||||
--- a/gcc/ira-costs.cc
|
||||
+++ b/gcc/ira-costs.cc
|
||||
@@ -1777,6 +1777,7 @@ get_equiv_regno (rtx x, int ®no, rtx &subreg)
|
||||
}
|
||||
if (REG_P (x)
|
||||
&& (ira_reg_equiv[REGNO (x)].memory != NULL
|
||||
+ || ira_reg_equiv[REGNO (x)].invariant != NULL
|
||||
|| ira_reg_equiv[REGNO (x)].constant != NULL))
|
||||
{
|
||||
regno = REGNO (x);
|
||||
@@ -1819,6 +1820,7 @@ calculate_equiv_gains (void)
|
||||
for (regno = max_reg_num () - 1; regno >= FIRST_PSEUDO_REGISTER; regno--)
|
||||
if (ira_reg_equiv[regno].init_insns != NULL
|
||||
&& (ira_reg_equiv[regno].memory != NULL
|
||||
+ || ira_reg_equiv[regno].invariant != NULL
|
||||
|| (ira_reg_equiv[regno].constant != NULL
|
||||
/* Ignore complicated constants which probably will be placed
|
||||
in memory: */
|
||||
@@ -1869,6 +1871,8 @@ calculate_equiv_gains (void)
|
||||
|
||||
if (subst == NULL)
|
||||
subst = ira_reg_equiv[regno].constant;
|
||||
+ if (subst == NULL)
|
||||
+ subst = ira_reg_equiv[regno].invariant;
|
||||
ira_assert (subst != NULL);
|
||||
mode = PSEUDO_REGNO_MODE (regno);
|
||||
ira_init_register_move_cost_if_necessary (mode);
|
||||
--
|
||||
2.28.0.windows.1
|
||||
|
||||
30102
0088-BUGFIX-Fix-the-configure-file-of-BOLT.patch
Normal file
30102
0088-BUGFIX-Fix-the-configure-file-of-BOLT.patch
Normal file
File diff suppressed because it is too large
Load Diff
48
0089-StructReorderFields-Fix-gimple-call-not-rewritten.patch
Normal file
48
0089-StructReorderFields-Fix-gimple-call-not-rewritten.patch
Normal file
@ -0,0 +1,48 @@
|
||||
From 302b7e15d6308c29c215db4c9901342e1106381a Mon Sep 17 00:00:00 2001
|
||||
From: huang-xiaoquan <huangxiaoquan1@huawei.com>
|
||||
Date: Mon, 29 Apr 2024 11:00:12 +0800
|
||||
Subject: [PATCH] [StructReorderFields] Fix gimple call not rewritten due to
|
||||
empty function node
|
||||
|
||||
Add parameter type escape for empty functions or inline functions.
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 16 ++++++++++++++++
|
||||
1 file changed, 16 insertions(+)
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index e08577c0c..2257d3528 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -4366,6 +4366,17 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
|
||||
|
||||
argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE;
|
||||
}
|
||||
+
|
||||
+ /* Types escapes via a argument at empty or inlined function. */
|
||||
+ cgraph_node *callee = node->get_edge (stmt)->callee;
|
||||
+ if (!gimple_call_builtin_p (stmt, BUILT_IN_FREE)
|
||||
+ && gimple_call_num_args (stmt)
|
||||
+ && callee && (!callee->has_gimple_body_p () || callee->inlined_to))
|
||||
+ {
|
||||
+ for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
|
||||
+ mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
|
||||
+ escape_var_arg_function);
|
||||
+ }
|
||||
}
|
||||
|
||||
void
|
||||
@@ -8068,6 +8079,11 @@ ipa_struct_reorg::rewrite_functions (void)
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "\nNo rewrite:\n");
|
||||
+ if (current_function_decl == NULL)
|
||||
+ {
|
||||
+ fprintf (dump_file, "\ncurrent_function_decl == NULL\n");
|
||||
+ continue;
|
||||
+ }
|
||||
if (current_function_decl)
|
||||
dump_function_to_file (current_function_decl, dump_file,
|
||||
dump_flags | TDF_VOPS);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,40 @@
|
||||
From 01517aa2397f854ffa96128a0fb23dd5542be709 Mon Sep 17 00:00:00 2001
|
||||
From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
|
||||
Date: Tue, 30 Apr 2024 18:43:32 +0800
|
||||
Subject: [PATCH 1/4] [double-sized-mul][testsuite] Add march armv8.2-a for dg
|
||||
tests
|
||||
|
||||
---
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-1.c | 2 +-
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-2.c | 2 +-
|
||||
2 files changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
index 4d475cc8a..d32a25223 100644
|
||||
--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
|
||||
proper overflow detection in some cases. */
|
||||
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned __int128 uint128_t;
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
index cc6e5af25..ff35902b7 100644
|
||||
--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* fif-conversion-gimple is required for proper overflow detection
|
||||
in some cases. */
|
||||
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned __int128 uint128_t;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
From b84a896e2df214b08d6519a097cc410d3e582add Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 8 May 2024 21:28:32 +0800
|
||||
Subject: [PATCH 2/4] [IPA][Bugfix] Fix fails in IPA prefetch
|
||||
(src-openEuler/gcc: I9J6N6)
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.cc | 10 ++++++++--
|
||||
1 file changed, 8 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
|
||||
index 1ceb5137f..94290ea9c 100644
|
||||
--- a/gcc/ipa-prefetch.cc
|
||||
+++ b/gcc/ipa-prefetch.cc
|
||||
@@ -1432,8 +1432,14 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
|
||||
TREE_THIS_VOLATILE (*tp) = TREE_THIS_VOLATILE (old);
|
||||
TREE_SIDE_EFFECTS (*tp) = TREE_SIDE_EFFECTS (old);
|
||||
TREE_NO_WARNING (*tp) = TREE_NO_WARNING (old);
|
||||
- /* TODO: maybe support this case. */
|
||||
- gcc_assert (MR_DEPENDENCE_CLIQUE (old) == 0);
|
||||
+ if (MR_DEPENDENCE_CLIQUE (old) != 0)
|
||||
+ {
|
||||
+ MR_DEPENDENCE_CLIQUE (*tp) = MR_DEPENDENCE_CLIQUE (old);
|
||||
+ MR_DEPENDENCE_BASE (*tp) = MR_DEPENDENCE_BASE (old);
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file, "Copy clique=%d base=%d info.\n",
|
||||
+ MR_DEPENDENCE_CLIQUE (old), MR_DEPENDENCE_BASE (old));
|
||||
+ }
|
||||
/* We cannot propagate the TREE_THIS_NOTRAP flag if we have
|
||||
remapped a parameter as the property might be valid only
|
||||
for the parameter itself. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
From acb6bbf0612aead00a879892ba8ed816c90fe788 Mon Sep 17 00:00:00 2001
|
||||
From: Chernonog Viacheslav <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 8 May 2024 19:24:27 +0800
|
||||
Subject: [PATCH 3/4] [AES][Bugfix] Change set_of to reg_set_p, and add check
|
||||
for global_regs fix for I9JDHE
|
||||
|
||||
---
|
||||
gcc/rtl-matcher.h | 5 +++--
|
||||
1 file changed, 3 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/rtl-matcher.h b/gcc/rtl-matcher.h
|
||||
index 6aed8d98d..5310f6266 100644
|
||||
--- a/gcc/rtl-matcher.h
|
||||
+++ b/gcc/rtl-matcher.h
|
||||
@@ -56,8 +56,9 @@ check_def_chain_ref (df_ref ref, rtx reg)
|
||||
if (!ref || !DF_REF_INSN_INFO (ref))
|
||||
return false;
|
||||
|
||||
- return !global_regs[REGNO (reg)]
|
||||
- || set_of (reg, DF_REF_INSN (ref));
|
||||
+ return !(REGNO (reg) < FIRST_PSEUDO_REGISTER
|
||||
+ && global_regs[REGNO (reg)])
|
||||
+ || reg_set_p (reg, DF_REF_INSN (ref));
|
||||
}
|
||||
|
||||
/* Get the single def instruction of the reg being used in the insn. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
26
0093-fix-bugs-within-pointer-compression-and-DFE.patch
Normal file
26
0093-fix-bugs-within-pointer-compression-and-DFE.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From 48724ee73cd58b67d59962ee4d56ac85db797e61 Mon Sep 17 00:00:00 2001
|
||||
From: tiancheng-bao <baotiancheng1@huawei.com>
|
||||
Date: Fri, 10 May 2024 17:52:27 +0800
|
||||
Subject: [PATCH 4/4] fix bugs within pointer compression and DFE
|
||||
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.cc | 3 ---
|
||||
1 file changed, 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
index 2257d3528..1a169c635 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.cc
|
||||
@@ -7472,9 +7472,6 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
|
||||
continue;
|
||||
tree lhs_expr = newlhs[i] ? newlhs[i] : lhs;
|
||||
tree rhs_expr = newrhs[i] ? newrhs[i] : rhs;
|
||||
- if (!useless_type_conversion_p (TREE_TYPE (lhs_expr),
|
||||
- TREE_TYPE (rhs_expr)))
|
||||
- rhs_expr = gimplify_build1 (gsi, NOP_EXPR, TREE_TYPE (lhs_expr), rhs_expr);
|
||||
gimple *newstmt = gimple_build_assign (lhs_expr, rhs_expr);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
28
0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
Normal file
28
0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
Normal file
@ -0,0 +1,28 @@
|
||||
From 4861c3db991e947060de54a4d20c1a13747a6024 Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||
Date: Wed, 15 May 2024 14:41:45 +0800
|
||||
Subject: [PATCH] [BUGFIX] AutoBOLT function miss bind type
|
||||
|
||||
---
|
||||
gcc/final.cc | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/final.cc b/gcc/final.cc
|
||||
index af4e529bb..c440846f7 100644
|
||||
--- a/gcc/final.cc
|
||||
+++ b/gcc/final.cc
|
||||
@@ -4272,9 +4272,9 @@ leaf_renumber_regs_insn (rtx in_rtx)
|
||||
|
||||
#define ASM_FDO_CALLER_FLAG ".fdo.caller "
|
||||
#define ASM_FDO_CALLER_SIZE_FLAG ".fdo.caller.size "
|
||||
-#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind"
|
||||
+#define ASM_FDO_CALLER_BIND_FLAG ".fdo.caller.bind "
|
||||
|
||||
-#define ASM_FDO_CALLEE_FLAG ".fdo.callee"
|
||||
+#define ASM_FDO_CALLEE_FLAG ".fdo.callee "
|
||||
|
||||
/* Return the relative offset address of the start instruction of BB,
|
||||
return -1 if it is empty instruction. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
9044
0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
Normal file
9044
0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,45 @@
|
||||
From 06e86b362f74ba0706fb5d8377f78d24b658c300 Mon Sep 17 00:00:00 2001
|
||||
From: zhenyu--zhao_admin <zhaozhenyu17@huawei.com>
|
||||
Date: Sat, 18 May 2024 12:22:23 +0800
|
||||
Subject: [PATCH] [Bugfix] Autofdo use PMU sampling set num eauals den
|
||||
|
||||
---
|
||||
gcc/final.cc | 2 +-
|
||||
gcc/tree-cfg.cc | 8 ++++++++
|
||||
2 files changed, 9 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/final.cc b/gcc/final.cc
|
||||
index f66c9d155..e4bfceabc 100644
|
||||
--- a/gcc/final.cc
|
||||
+++ b/gcc/final.cc
|
||||
@@ -4604,7 +4604,7 @@ dump_profile_to_elf_sections ()
|
||||
/* Return if no feedback data. */
|
||||
if (!flag_profile_use && !flag_auto_profile)
|
||||
{
|
||||
- error ("-fauto-bolt should use with -profile-use or -fauto-profile");
|
||||
+ error ("-fauto-bolt should use with -fprofile-use or -fauto-profile");
|
||||
return;
|
||||
}
|
||||
|
||||
diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
|
||||
index 05fc45147..48b52f785 100644
|
||||
--- a/gcc/tree-cfg.cc
|
||||
+++ b/gcc/tree-cfg.cc
|
||||
@@ -9741,6 +9741,14 @@ execute_fixup_cfg (void)
|
||||
/* Same scaling is also done by ipa_merge_profiles. */
|
||||
profile_count num = node->count;
|
||||
profile_count den = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
|
||||
+ /* When autofdo uses PMU as the sampling unit, the number of
|
||||
+ node can not be obtained directly, sometimes it will be zero,
|
||||
+ but the execution number for function should at least be 1. We
|
||||
+ set num be den here to make sure the num will not decrease. */
|
||||
+ if (num == profile_count::zero ().afdo () && den.quality () == profile_quality::AFDO)
|
||||
+ {
|
||||
+ num = den;
|
||||
+ }
|
||||
bool scale = num.initialized_p () && !(num == den);
|
||||
auto_bitmap dce_ssa_names;
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
59
Libvtv-Add-loongarch-support.patch
Normal file
59
Libvtv-Add-loongarch-support.patch
Normal file
@ -0,0 +1,59 @@
|
||||
From 62ea18c632200edbbf46b4e957bc4d997f1c66f0 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Tue, 27 Sep 2022 15:28:43 +0800
|
||||
Subject: [PATCH 024/124] Libvtv: Add loongarch support.
|
||||
|
||||
The loongarch64 specification permits page sizes of 4KiB, 16KiB and 64KiB,
|
||||
but only 16KiB pages are supported for now.
|
||||
|
||||
Co-Authored-By: qijingwen <qijingwen@loongson.cn>
|
||||
|
||||
include/ChangeLog:
|
||||
|
||||
* vtv-change-permission.h (defined): Determines whether the macro
|
||||
__loongarch_lp64 is defined
|
||||
(VTV_PAGE_SIZE): Set VTV_PAGE_SIZE to 16KiB for loongarch64.
|
||||
|
||||
libvtv/ChangeLog:
|
||||
|
||||
* configure.tgt: Add loongarch support.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
include/vtv-change-permission.h | 4 ++++
|
||||
libvtv/configure.tgt | 3 +++
|
||||
2 files changed, 7 insertions(+)
|
||||
|
||||
diff --git a/include/vtv-change-permission.h b/include/vtv-change-permission.h
|
||||
index 70bdad92b..e7b9294a0 100644
|
||||
--- a/include/vtv-change-permission.h
|
||||
+++ b/include/vtv-change-permission.h
|
||||
@@ -48,6 +48,10 @@ extern void __VLTChangePermission (int);
|
||||
#else
|
||||
#if defined(__sun__) && defined(__svr4__) && defined(__sparc__)
|
||||
#define VTV_PAGE_SIZE 8192
|
||||
+#elif defined(__loongarch_lp64)
|
||||
+/* The page size is configurable by the kernel to be 4, 16 or 64 KiB.
|
||||
+ For now, only the default page size of 16KiB is supported. */
|
||||
+#define VTV_PAGE_SIZE 16384
|
||||
#else
|
||||
#define VTV_PAGE_SIZE 4096
|
||||
#endif
|
||||
diff --git a/libvtv/configure.tgt b/libvtv/configure.tgt
|
||||
index aa2a3f675..6cdd1e97a 100644
|
||||
--- a/libvtv/configure.tgt
|
||||
+++ b/libvtv/configure.tgt
|
||||
@@ -50,6 +50,9 @@ case "${target}" in
|
||||
;;
|
||||
x86_64-*-darwin[1]* | i?86-*-darwin[1]*)
|
||||
;;
|
||||
+ loongarch*-*-linux*)
|
||||
+ VTV_SUPPORTED=yes
|
||||
+ ;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
--
|
||||
2.33.0
|
||||
|
||||
332
LoongArch-Add-LA664-support.patch
Normal file
332
LoongArch-Add-LA664-support.patch
Normal file
@ -0,0 +1,332 @@
|
||||
From c68463abbab98aa7f5a9b91e71ed6f6834c723df Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 16 Nov 2023 20:43:53 +0800
|
||||
Subject: [PATCH] LoongArch: Add LA664 support.
|
||||
|
||||
Define ISA_BASE_LA64V110, which represents the base instruction set defined in LoongArch1.1.
|
||||
Support the configure setting --with-arch =la664, and support -march=la664,-mtune=la664.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config.gcc: Support LA664.
|
||||
* config/loongarch/genopts/loongarch-strings: Likewise.
|
||||
* config/loongarch/genopts/loongarch.opt.in: Likewise.
|
||||
* config/loongarch/loongarch-cpu.cc (fill_native_cpu_config): Likewise.
|
||||
* config/loongarch/loongarch-def.c: Likewise.
|
||||
* config/loongarch/loongarch-def.h (N_ISA_BASE_TYPES): Likewise.
|
||||
(ISA_BASE_LA64V110): Define macro.
|
||||
(N_ARCH_TYPES): Update value.
|
||||
(N_TUNE_TYPES): Update value.
|
||||
(CPU_LA664): New macro.
|
||||
* config/loongarch/loongarch-opts.cc (isa_default_abi): Likewise.
|
||||
(isa_base_compat_p): Likewise.
|
||||
* config/loongarch/loongarch-opts.h (TARGET_64BIT): This parameter is enabled
|
||||
when la_target.isa.base is equal to ISA_BASE_LA64V100 or ISA_BASE_LA64V110.
|
||||
(TARGET_uARCH_LA664): Define macro.
|
||||
* config/loongarch/loongarch-str.h (STR_CPU_LA664): Likewise.
|
||||
* config/loongarch/loongarch.cc (loongarch_cpu_sched_reassociation_width):
|
||||
Add LA664 support.
|
||||
* config/loongarch/loongarch.opt: Regenerate.
|
||||
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config.gcc | 10 ++++-----
|
||||
.../loongarch/genopts/loongarch-strings | 1 +
|
||||
gcc/config/loongarch/genopts/loongarch.opt.in | 3 +++
|
||||
gcc/config/loongarch/loongarch-cpu.cc | 4 ++++
|
||||
gcc/config/loongarch/loongarch-def.c | 21 +++++++++++++++++++
|
||||
gcc/config/loongarch/loongarch-def.h | 8 ++++---
|
||||
gcc/config/loongarch/loongarch-opts.cc | 8 +++----
|
||||
gcc/config/loongarch/loongarch-opts.h | 4 +++-
|
||||
gcc/config/loongarch/loongarch-str.h | 1 +
|
||||
gcc/config/loongarch/loongarch.cc | 1 +
|
||||
gcc/config/loongarch/loongarch.opt | 3 +++
|
||||
11 files changed, 51 insertions(+), 13 deletions(-)
|
||||
|
||||
diff --git a/gcc/config.gcc b/gcc/config.gcc
|
||||
index 6d51bd93f3f..b88591b6fd8 100644
|
||||
--- a/gcc/config.gcc
|
||||
+++ b/gcc/config.gcc
|
||||
@@ -5039,7 +5039,7 @@ case "${target}" in
|
||||
|
||||
# Perform initial sanity checks on --with-* options.
|
||||
case ${with_arch} in
|
||||
- "" | abi-default | loongarch64 | la464) ;; # OK, append here.
|
||||
+ "" | abi-default | loongarch64 | la[46]64) ;; # OK, append here.
|
||||
native)
|
||||
if test x${host} != x${target}; then
|
||||
echo "--with-arch=native is illegal for cross-compiler." 1>&2
|
||||
@@ -5088,7 +5088,7 @@ case "${target}" in
|
||||
case ${abi_base}/${abi_ext} in
|
||||
lp64*/base)
|
||||
# architectures that support lp64* ABI
|
||||
- arch_pattern="native|abi-default|loongarch64|la464"
|
||||
+ arch_pattern="native|abi-default|loongarch64|la[46]64"
|
||||
# default architecture for lp64* ABI
|
||||
arch_default="abi-default"
|
||||
;;
|
||||
@@ -5163,7 +5163,7 @@ case "${target}" in
|
||||
# Check default with_tune configuration using with_arch.
|
||||
case ${with_arch} in
|
||||
loongarch64)
|
||||
- tune_pattern="native|abi-default|loongarch64|la464"
|
||||
+ tune_pattern="native|abi-default|loongarch64|la[46]64"
|
||||
;;
|
||||
*)
|
||||
# By default, $with_tune == $with_arch
|
||||
@@ -5219,7 +5219,7 @@ case "${target}" in
|
||||
# Fixed: use the default gcc configuration for all multilib
|
||||
# builds by default.
|
||||
with_multilib_default="" ;;
|
||||
- arch,native|arch,loongarch64|arch,la464) # OK, append here.
|
||||
+ arch,native|arch,loongarch64|arch,la[46]64) # OK, append here.
|
||||
with_multilib_default="/march=${component}" ;;
|
||||
arch,*)
|
||||
with_multilib_default="/march=abi-default"
|
||||
@@ -5307,7 +5307,7 @@ case "${target}" in
|
||||
if test x${parse_state} = x"arch"; then
|
||||
# -march option
|
||||
case ${component} in
|
||||
- native | abi-default | loongarch64 | la464) # OK, append here.
|
||||
+ native | abi-default | loongarch64 | la[46]64) # OK, append here.
|
||||
# Append -march spec for each multilib variant.
|
||||
loongarch_multilib_list_make="${loongarch_multilib_list_make}/march=${component}"
|
||||
parse_state="opts"
|
||||
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
|
||||
index 8e412f7536e..7bc4824007e 100644
|
||||
--- a/gcc/config/loongarch/genopts/loongarch-strings
|
||||
+++ b/gcc/config/loongarch/genopts/loongarch-strings
|
||||
@@ -26,6 +26,7 @@ STR_CPU_NATIVE native
|
||||
STR_CPU_ABI_DEFAULT abi-default
|
||||
STR_CPU_LOONGARCH64 loongarch64
|
||||
STR_CPU_LA464 la464
|
||||
+STR_CPU_LA664 la664
|
||||
|
||||
# Base architecture
|
||||
STR_ISA_BASE_LA64V100 la64
|
||||
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||
index 158701d327a..00b4733d75b 100644
|
||||
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
|
||||
@@ -107,6 +107,9 @@ Enum(cpu_type) String(@@STR_CPU_LOONGARCH64@@) Value(CPU_LOONGARCH64)
|
||||
EnumValue
|
||||
Enum(cpu_type) String(@@STR_CPU_LA464@@) Value(CPU_LA464)
|
||||
|
||||
+EnumValue
|
||||
+Enum(cpu_type) String(@@STR_CPU_LA664@@) Value(CPU_LA664)
|
||||
+
|
||||
m@@OPTSTR_ARCH@@=
|
||||
Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
|
||||
-m@@OPTSTR_ARCH@@=PROCESSOR Generate code for the given PROCESSOR ISA.
|
||||
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
|
||||
index 7a2866f60f9..f3a13414143 100644
|
||||
--- a/gcc/config/loongarch/loongarch-cpu.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-cpu.cc
|
||||
@@ -106,6 +106,10 @@ fill_native_cpu_config (struct loongarch_target *tgt)
|
||||
native_cpu_type = CPU_LA464;
|
||||
break;
|
||||
|
||||
+ case 0x0014d000: /* LA664 */
|
||||
+ native_cpu_type = CPU_LA664;
|
||||
+ break;
|
||||
+
|
||||
default:
|
||||
/* Unknown PRID. */
|
||||
if (tune_native_p)
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||
index 430ef8b2d95..067629141b6 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.c
|
||||
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||
@@ -28,6 +28,7 @@ loongarch_cpu_strings[N_TUNE_TYPES] = {
|
||||
[CPU_ABI_DEFAULT] = STR_CPU_ABI_DEFAULT,
|
||||
[CPU_LOONGARCH64] = STR_CPU_LOONGARCH64,
|
||||
[CPU_LA464] = STR_CPU_LA464,
|
||||
+ [CPU_LA664] = STR_CPU_LA664,
|
||||
};
|
||||
|
||||
struct loongarch_isa
|
||||
@@ -42,6 +43,11 @@ loongarch_cpu_default_isa[N_ARCH_TYPES] = {
|
||||
.fpu = ISA_EXT_FPU64,
|
||||
.simd = ISA_EXT_SIMD_LASX,
|
||||
},
|
||||
+ [CPU_LA664] = {
|
||||
+ .base = ISA_BASE_LA64V110,
|
||||
+ .fpu = ISA_EXT_FPU64,
|
||||
+ .simd = ISA_EXT_SIMD_LASX,
|
||||
+ },
|
||||
};
|
||||
|
||||
struct loongarch_cache
|
||||
@@ -58,6 +64,12 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||
.l2d_size = 256,
|
||||
.simultaneous_prefetches = 4,
|
||||
},
|
||||
+ [CPU_LA664] = {
|
||||
+ .l1d_line_size = 64,
|
||||
+ .l1d_size = 64,
|
||||
+ .l2d_size = 256,
|
||||
+ .simultaneous_prefetches = 4,
|
||||
+ },
|
||||
};
|
||||
|
||||
struct loongarch_align
|
||||
@@ -70,6 +82,10 @@ loongarch_cpu_align[N_TUNE_TYPES] = {
|
||||
.function = "32",
|
||||
.label = "16",
|
||||
},
|
||||
+ [CPU_LA664] = {
|
||||
+ .function = "32",
|
||||
+ .label = "16",
|
||||
+ },
|
||||
};
|
||||
|
||||
|
||||
@@ -104,6 +120,9 @@ loongarch_cpu_rtx_cost_data[N_TUNE_TYPES] = {
|
||||
[CPU_LA464] = {
|
||||
DEFAULT_COSTS
|
||||
},
|
||||
+ [CPU_LA664] = {
|
||||
+ DEFAULT_COSTS
|
||||
+ },
|
||||
};
|
||||
|
||||
/* RTX costs to use when optimizing for size. */
|
||||
@@ -127,6 +146,7 @@ loongarch_cpu_issue_rate[N_TUNE_TYPES] = {
|
||||
[CPU_NATIVE] = 4,
|
||||
[CPU_LOONGARCH64] = 4,
|
||||
[CPU_LA464] = 4,
|
||||
+ [CPU_LA664] = 6,
|
||||
};
|
||||
|
||||
int
|
||||
@@ -134,6 +154,7 @@ loongarch_cpu_multipass_dfa_lookahead[N_TUNE_TYPES] = {
|
||||
[CPU_NATIVE] = 4,
|
||||
[CPU_LOONGARCH64] = 4,
|
||||
[CPU_LA464] = 4,
|
||||
+ [CPU_LA664] = 6,
|
||||
};
|
||||
|
||||
/* Wiring string definitions from loongarch-str.h to global arrays
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
|
||||
index 6e2a6987910..db497f3ffe2 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.h
|
||||
+++ b/gcc/config/loongarch/loongarch-def.h
|
||||
@@ -55,7 +55,8 @@ extern "C" {
|
||||
/* enum isa_base */
|
||||
extern const char* loongarch_isa_base_strings[];
|
||||
#define ISA_BASE_LA64V100 0
|
||||
-#define N_ISA_BASE_TYPES 1
|
||||
+#define ISA_BASE_LA64V110 1
|
||||
+#define N_ISA_BASE_TYPES 2
|
||||
|
||||
/* enum isa_ext_* */
|
||||
extern const char* loongarch_isa_ext_strings[];
|
||||
@@ -141,8 +142,9 @@ struct loongarch_target
|
||||
#define CPU_ABI_DEFAULT 1
|
||||
#define CPU_LOONGARCH64 2
|
||||
#define CPU_LA464 3
|
||||
-#define N_ARCH_TYPES 4
|
||||
-#define N_TUNE_TYPES 4
|
||||
+#define CPU_LA664 4
|
||||
+#define N_ARCH_TYPES 5
|
||||
+#define N_TUNE_TYPES 5
|
||||
|
||||
/* parallel tables. */
|
||||
extern const char* loongarch_cpu_strings[];
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||
index e5921189a06..67a59152a01 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||
@@ -552,17 +552,17 @@ isa_default_abi (const struct loongarch_isa *isa)
|
||||
switch (isa->fpu)
|
||||
{
|
||||
case ISA_EXT_FPU64:
|
||||
- if (isa->base == ISA_BASE_LA64V100)
|
||||
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||
abi.base = ABI_BASE_LP64D;
|
||||
break;
|
||||
|
||||
case ISA_EXT_FPU32:
|
||||
- if (isa->base == ISA_BASE_LA64V100)
|
||||
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||
abi.base = ABI_BASE_LP64F;
|
||||
break;
|
||||
|
||||
case ISA_EXT_NONE:
|
||||
- if (isa->base == ISA_BASE_LA64V100)
|
||||
+ if (isa->base >= ISA_BASE_LA64V100)
|
||||
abi.base = ABI_BASE_LP64S;
|
||||
break;
|
||||
|
||||
@@ -582,7 +582,7 @@ isa_base_compat_p (const struct loongarch_isa *set1,
|
||||
switch (set2->base)
|
||||
{
|
||||
case ISA_BASE_LA64V100:
|
||||
- return (set1->base == ISA_BASE_LA64V100);
|
||||
+ return (set1->base >= ISA_BASE_LA64V100);
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
|
||||
index 6dd309aad96..0e1b3e528a1 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.h
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.h
|
||||
@@ -76,7 +76,8 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||
#define TARGET_DOUBLE_FLOAT (la_target.isa.fpu == ISA_EXT_FPU64)
|
||||
#define TARGET_DOUBLE_FLOAT_ABI (la_target.abi.base == ABI_BASE_LP64D)
|
||||
|
||||
-#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100)
|
||||
+#define TARGET_64BIT (la_target.isa.base == ISA_BASE_LA64V100 \
|
||||
+ || la_target.isa.base == ISA_BASE_LA64V110)
|
||||
#define TARGET_ABI_LP64 (la_target.abi.base == ABI_BASE_LP64D \
|
||||
|| la_target.abi.base == ABI_BASE_LP64F \
|
||||
|| la_target.abi.base == ABI_BASE_LP64S)
|
||||
@@ -88,6 +89,7 @@ loongarch_update_gcc_opt_status (struct loongarch_target *target,
|
||||
|
||||
/* TARGET_ macros for use in *.md template conditionals */
|
||||
#define TARGET_uARCH_LA464 (la_target.cpu_tune == CPU_LA464)
|
||||
+#define TARGET_uARCH_LA664 (la_target.cpu_tune == CPU_LA664)
|
||||
|
||||
/* Note: optimize_size may vary across functions,
|
||||
while -m[no]-memcpy imposes a global constraint. */
|
||||
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
|
||||
index 072558c28f1..fc4f41bfc1e 100644
|
||||
--- a/gcc/config/loongarch/loongarch-str.h
|
||||
+++ b/gcc/config/loongarch/loongarch-str.h
|
||||
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define STR_CPU_ABI_DEFAULT "abi-default"
|
||||
#define STR_CPU_LOONGARCH64 "loongarch64"
|
||||
#define STR_CPU_LA464 "la464"
|
||||
+#define STR_CPU_LA664 "la664"
|
||||
|
||||
#define STR_ISA_BASE_LA64V100 "la64"
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 22ca24a1878..4cd509f11c6 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -10177,6 +10177,7 @@ loongarch_cpu_sched_reassociation_width (struct loongarch_target *target,
|
||||
{
|
||||
case CPU_LOONGARCH64:
|
||||
case CPU_LA464:
|
||||
+ case CPU_LA664:
|
||||
/* Vector part. */
|
||||
if (LSX_SUPPORTED_MODE_P (mode) || LASX_SUPPORTED_MODE_P (mode))
|
||||
{
|
||||
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
|
||||
index a5988411fbb..7f129e53ba5 100644
|
||||
--- a/gcc/config/loongarch/loongarch.opt
|
||||
+++ b/gcc/config/loongarch/loongarch.opt
|
||||
@@ -114,6 +114,9 @@ Enum(cpu_type) String(loongarch64) Value(CPU_LOONGARCH64)
|
||||
EnumValue
|
||||
Enum(cpu_type) String(la464) Value(CPU_LA464)
|
||||
|
||||
+EnumValue
|
||||
+Enum(cpu_type) String(la664) Value(CPU_LA664)
|
||||
+
|
||||
march=
|
||||
Target RejectNegative Joined Enum(cpu_type) Var(la_opt_cpu_arch) Init(M_OPT_UNSET)
|
||||
-march=PROCESSOR Generate code for the given PROCESSOR ISA.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
8376
LoongArch-Add-Loongson-ASX-base-instruction-support.patch
Normal file
8376
LoongArch-Add-Loongson-ASX-base-instruction-support.patch
Normal file
File diff suppressed because it is too large
Load Diff
7458
LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
Normal file
7458
LoongArch-Add-Loongson-ASX-directive-builtin-functio.patch
Normal file
File diff suppressed because it is too large
Load Diff
8433
LoongArch-Add-Loongson-SX-base-instruction-support.patch
Normal file
8433
LoongArch-Add-Loongson-SX-base-instruction-support.patch
Normal file
File diff suppressed because it is too large
Load Diff
7549
LoongArch-Add-Loongson-SX-directive-builtin-function.patch
Normal file
7549
LoongArch-Add-Loongson-SX-directive-builtin-function.patch
Normal file
File diff suppressed because it is too large
Load Diff
166
LoongArch-Add-built-in-functions-description-of-Loon.patch
Normal file
166
LoongArch-Add-built-in-functions-description-of-Loon.patch
Normal file
@ -0,0 +1,166 @@
|
||||
From 7cfe6e057045ac794afbe9097b1b211c0e1ea723 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 6 Apr 2023 16:02:07 +0800
|
||||
Subject: [PATCH 039/124] LoongArch: Add built-in functions description of
|
||||
LoongArch Base instruction set instructions.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* doc/extend.texi: Add section for LoongArch Base Built-in functions.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/doc/extend.texi | 129 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
1 file changed, 129 insertions(+)
|
||||
|
||||
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
|
||||
index 3c101ca89..1d1bac255 100644
|
||||
--- a/gcc/doc/extend.texi
|
||||
+++ b/gcc/doc/extend.texi
|
||||
@@ -14678,6 +14678,7 @@ instructions, but allow the compiler to schedule those calls.
|
||||
* Blackfin Built-in Functions::
|
||||
* BPF Built-in Functions::
|
||||
* FR-V Built-in Functions::
|
||||
+* LoongArch Base Built-in Functions::
|
||||
* MIPS DSP Built-in Functions::
|
||||
* MIPS Paired-Single Support::
|
||||
* MIPS Loongson Built-in Functions::
|
||||
@@ -16128,6 +16129,134 @@ Use the @code{nldub} instruction to load the contents of address @var{x}
|
||||
into the data cache. The instruction is issued in slot I1@.
|
||||
@end table
|
||||
|
||||
+@node LoongArch Base Built-in Functions
|
||||
+@subsection LoongArch Base Built-in Functions
|
||||
+
|
||||
+These built-in functions are available for LoongArch.
|
||||
+
|
||||
+Data Type Description:
|
||||
+@itemize
|
||||
+@item @code{imm0_31}, a compile-time constant in range 0 to 31;
|
||||
+@item @code{imm0_16383}, a compile-time constant in range 0 to 16383;
|
||||
+@item @code{imm0_32767}, a compile-time constant in range 0 to 32767;
|
||||
+@item @code{imm_n2048_2047}, a compile-time constant in range -2048 to 2047;
|
||||
+@end itemize
|
||||
+
|
||||
+The intrinsics provided are listed below:
|
||||
+@smallexample
|
||||
+ unsigned int __builtin_loongarch_movfcsr2gr (imm0_31)
|
||||
+ void __builtin_loongarch_movgr2fcsr (imm0_31, unsigned int)
|
||||
+ void __builtin_loongarch_cacop_d (imm0_31, unsigned long int, imm_n2048_2047)
|
||||
+ unsigned int __builtin_loongarch_cpucfg (unsigned int)
|
||||
+ void __builtin_loongarch_asrtle_d (long int, long int)
|
||||
+ void __builtin_loongarch_asrtgt_d (long int, long int)
|
||||
+ long int __builtin_loongarch_lddir_d (long int, imm0_31)
|
||||
+ void __builtin_loongarch_ldpte_d (long int, imm0_31)
|
||||
+
|
||||
+ int __builtin_loongarch_crc_w_b_w (char, int)
|
||||
+ int __builtin_loongarch_crc_w_h_w (short, int)
|
||||
+ int __builtin_loongarch_crc_w_w_w (int, int)
|
||||
+ int __builtin_loongarch_crc_w_d_w (long int, int)
|
||||
+ int __builtin_loongarch_crcc_w_b_w (char, int)
|
||||
+ int __builtin_loongarch_crcc_w_h_w (short, int)
|
||||
+ int __builtin_loongarch_crcc_w_w_w (int, int)
|
||||
+ int __builtin_loongarch_crcc_w_d_w (long int, int)
|
||||
+
|
||||
+ unsigned int __builtin_loongarch_csrrd_w (imm0_16383)
|
||||
+ unsigned int __builtin_loongarch_csrwr_w (unsigned int, imm0_16383)
|
||||
+ unsigned int __builtin_loongarch_csrxchg_w (unsigned int, unsigned int, imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrrd_d (imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrwr_d (unsigned long int, imm0_16383)
|
||||
+ unsigned long int __builtin_loongarch_csrxchg_d (unsigned long int, unsigned long int, imm0_16383)
|
||||
+
|
||||
+ unsigned char __builtin_loongarch_iocsrrd_b (unsigned int)
|
||||
+ unsigned short __builtin_loongarch_iocsrrd_h (unsigned int)
|
||||
+ unsigned int __builtin_loongarch_iocsrrd_w (unsigned int)
|
||||
+ unsigned long int __builtin_loongarch_iocsrrd_d (unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_b (unsigned char, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_h (unsigned short, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_w (unsigned int, unsigned int)
|
||||
+ void __builtin_loongarch_iocsrwr_d (unsigned long int, unsigned int)
|
||||
+
|
||||
+ void __builtin_loongarch_dbar (imm0_32767)
|
||||
+ void __builtin_loongarch_ibar (imm0_32767)
|
||||
+
|
||||
+ void __builtin_loongarch_syscall (imm0_32767)
|
||||
+ void __builtin_loongarch_break (imm0_32767)
|
||||
+@end smallexample
|
||||
+
|
||||
+@emph{Note:}Since the control register is divided into 32-bit and 64-bit,
|
||||
+but the access instruction is not distinguished. So GCC renames the control
|
||||
+instructions when implementing intrinsics.
|
||||
+
|
||||
+Take the csrrd instruction as an example, built-in functions are implemented as follows:
|
||||
+@smallexample
|
||||
+ __builtin_loongarch_csrrd_w // When reading the 32-bit control register use.
|
||||
+ __builtin_loongarch_csrrd_d // When reading the 64-bit control register use.
|
||||
+@end smallexample
|
||||
+
|
||||
+For the convenience of use, the built-in functions are encapsulated,
|
||||
+the encapsulated functions and @code{__drdtime_t, __rdtime_t} are
|
||||
+defined in the @code{larchintrin.h}. So if you call the following
|
||||
+function you need to include @code{larchintrin.h}.
|
||||
+
|
||||
+@smallexample
|
||||
+ typedef struct drdtime@{
|
||||
+ unsigned long dvalue;
|
||||
+ unsigned long dtimeid;
|
||||
+ @} __drdtime_t;
|
||||
+
|
||||
+ typedef struct rdtime@{
|
||||
+ unsigned int value;
|
||||
+ unsigned int timeid;
|
||||
+ @} __rdtime_t;
|
||||
+@end smallexample
|
||||
+
|
||||
+@smallexample
|
||||
+ __drdtime_t __rdtime_d (void)
|
||||
+ __rdtime_t __rdtimel_w (void)
|
||||
+ __rdtime_t __rdtimeh_w (void)
|
||||
+ unsigned int __movfcsr2gr (imm0_31)
|
||||
+ void __movgr2fcsr (imm0_31, unsigned int)
|
||||
+ void __cacop_d (imm0_31, unsigned long, imm_n2048_2047)
|
||||
+ unsigned int __cpucfg (unsigned int)
|
||||
+ void __asrtle_d (long int, long int)
|
||||
+ void __asrtgt_d (long int, long int)
|
||||
+ long int __lddir_d (long int, imm0_31)
|
||||
+ void __ldpte_d (long int, imm0_31)
|
||||
+
|
||||
+ int __crc_w_b_w (char, int)
|
||||
+ int __crc_w_h_w (short, int)
|
||||
+ int __crc_w_w_w (int, int)
|
||||
+ int __crc_w_d_w (long int, int)
|
||||
+ int __crcc_w_b_w (char, int)
|
||||
+ int __crcc_w_h_w (short, int)
|
||||
+ int __crcc_w_w_w (int, int)
|
||||
+ int __crcc_w_d_w (long int, int)
|
||||
+
|
||||
+ unsigned int __csrrd_w (imm0_16383)
|
||||
+ unsigned int __csrwr_w (unsigned int, imm0_16383)
|
||||
+ unsigned int __csrxchg_w (unsigned int, unsigned int, imm0_16383)
|
||||
+ unsigned long __csrrd_d (imm0_16383)
|
||||
+ unsigned long __csrwr_d (unsigned long, imm0_16383)
|
||||
+ unsigned long __csrxchg_d (unsigned long, unsigned long, imm0_16383)
|
||||
+
|
||||
+ unsigned char __iocsrrd_b (unsigned int)
|
||||
+ unsigned short __iocsrrd_h (unsigned int)
|
||||
+ unsigned int __iocsrrd_w (unsigned int)
|
||||
+ unsigned long __iocsrrd_d (unsigned int)
|
||||
+ void __iocsrwr_b (unsigned char, unsigned int)
|
||||
+ void __iocsrwr_h (unsigned short, unsigned int)
|
||||
+ void __iocsrwr_w (unsigned int, unsigned int)
|
||||
+ void __iocsrwr_d (unsigned long, unsigned int)
|
||||
+
|
||||
+ void __dbar (imm0_32767)
|
||||
+ void __ibar (imm0_32767)
|
||||
+
|
||||
+ void __syscall (imm0_32767)
|
||||
+ void __break (imm0_32767)
|
||||
+@end smallexample
|
||||
+
|
||||
@node MIPS DSP Built-in Functions
|
||||
@subsection MIPS DSP Built-in Functions
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
107
LoongArch-Add-fcopysign-instructions.patch
Normal file
107
LoongArch-Add-fcopysign-instructions.patch
Normal file
@ -0,0 +1,107 @@
|
||||
From 41a4945886631a1b2898ae957389d5db18a07141 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Fri, 4 Nov 2022 15:12:22 +0800
|
||||
Subject: [PATCH 025/124] LoongArch: Add fcopysign instructions
|
||||
|
||||
Add fcopysign.{s,d} with the names copysign{sf,df}3 so GCC will expand
|
||||
__builtin_copysign{f,} to a single instruction.
|
||||
|
||||
Link: https://sourceware.org/pipermail/libc-alpha/2022-November/143177.html
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FCOPYSIGN): New unspec.
|
||||
(type): Add fcopysign.
|
||||
(copysign<mode>3): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/fcopysign.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 22 ++++++++++++++++++-
|
||||
.../gcc.target/loongarch/fcopysign.c | 16 ++++++++++++++
|
||||
2 files changed, 37 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 214b14bdd..bda34d0f3 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -37,6 +37,7 @@
|
||||
UNSPEC_FCLASS
|
||||
UNSPEC_FMAX
|
||||
UNSPEC_FMIN
|
||||
+ UNSPEC_FCOPYSIGN
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -214,6 +215,7 @@
|
||||
;; fabs floating point absolute value
|
||||
;; fneg floating point negation
|
||||
;; fcmp floating point compare
|
||||
+;; fcopysign floating point copysign
|
||||
;; fcvt floating point convert
|
||||
;; fsqrt floating point square root
|
||||
;; frsqrt floating point reciprocal square root
|
||||
@@ -226,7 +228,7 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcvt,fsqrt,
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
|
||||
frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
@@ -976,6 +978,24 @@
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT COPYSIGN
|
||||
+;;
|
||||
+;; ....................
|
||||
+
|
||||
+(define_insn "copysign<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:ANYF 2 "register_operand" "f")]
|
||||
+ UNSPEC_FCOPYSIGN))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "fcopysign.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fcopysign")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+
|
||||
+;;
|
||||
;; ...................
|
||||
;;
|
||||
;; Count leading zeroes.
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/fcopysign.c b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
new file mode 100644
|
||||
index 000000000..058ba2cf5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/fcopysign.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float" } */
|
||||
+/* { dg-final { scan-assembler "fcopysign\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fcopysign\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+my_copysign (double a, double b)
|
||||
+{
|
||||
+ return __builtin_copysign (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_copysignf (float a, float b)
|
||||
+{
|
||||
+ return __builtin_copysignf (a, b);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
123
LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
Normal file
123
LoongArch-Add-flogb.-s-d-instructions-and-expand-log.patch
Normal file
@ -0,0 +1,123 @@
|
||||
From 2ae587a86bba31b91a127e353c31c9f861ff5326 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 8 Nov 2022 13:42:20 +0800
|
||||
Subject: [PATCH 030/124] LoongArch: Add flogb.{s,d} instructions and expand
|
||||
logb{sf,df}2
|
||||
|
||||
On LoongArch, flogb instructions extract the exponent of a non-negative
|
||||
floating point value, but produces NaN for negative values. So we need
|
||||
to add a fabs instruction when we expand logb.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FLOGB): New unspec.
|
||||
(type): Add flogb.
|
||||
(logb_non_negative<mode>2): New instruction template.
|
||||
(logb<mode>2): New define_expand.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/flogb.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 35 ++++++++++++++++++++--
|
||||
gcc/testsuite/gcc.target/loongarch/flogb.c | 18 +++++++++++
|
||||
2 files changed, 51 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index c141c9add..682ab9617 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -42,6 +42,7 @@
|
||||
UNSPEC_FTINTRM
|
||||
UNSPEC_FTINTRP
|
||||
UNSPEC_FSCALEB
|
||||
+ UNSPEC_FLOGB
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -217,6 +218,7 @@
|
||||
;; fdiv floating point divide
|
||||
;; frdiv floating point reciprocal divide
|
||||
;; fabs floating point absolute value
|
||||
+;; flogb floating point exponent extract
|
||||
;; fneg floating point negation
|
||||
;; fcmp floating point compare
|
||||
;; fcopysign floating point copysign
|
||||
@@ -233,8 +235,8 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
|
||||
- fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,flogb,fneg,fcmp,fcopysign,fcvt,
|
||||
+ fscaleb,fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
|
||||
@@ -1039,6 +1041,35 @@
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT EXPONENT EXTRACT
|
||||
+;;
|
||||
+;; ....................
|
||||
+
|
||||
+(define_insn "logb_non_negative<mode>2"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")]
|
||||
+ UNSPEC_FLOGB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "flogb.<fmt>\t%0,%1"
|
||||
+ [(set_attr "type" "flogb")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+(define_expand "logb<mode>2"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand")
|
||||
+ (unspec:ANYF [(abs:ANYF (match_operand:ANYF 1 "register_operand"))]
|
||||
+ UNSPEC_FLOGB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+{
|
||||
+ rtx tmp = gen_reg_rtx (<MODE>mode);
|
||||
+
|
||||
+ emit_insn (gen_abs<mode>2 (tmp, operands[1]));
|
||||
+ emit_insn (gen_logb_non_negative<mode>2 (operands[0], tmp));
|
||||
+ DONE;
|
||||
+})
|
||||
+
|
||||
+;;
|
||||
;; ...................
|
||||
;;
|
||||
;; Count leading zeroes.
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/flogb.c b/gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
new file mode 100644
|
||||
index 000000000..1daefe54e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/flogb.c
|
||||
@@ -0,0 +1,18 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mdouble-float -fno-math-errno" } */
|
||||
+/* { dg-final { scan-assembler "fabs\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "fabs\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "flogb\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "flogb\\.d" } } */
|
||||
+
|
||||
+double
|
||||
+my_logb (double a)
|
||||
+{
|
||||
+ return __builtin_logb (a);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_logbf (float a)
|
||||
+{
|
||||
+ return __builtin_logbf (a);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
155
LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
Normal file
155
LoongArch-Add-fscaleb.-s-d-instructions-as-ldexp-sf-.patch
Normal file
@ -0,0 +1,155 @@
|
||||
From e3d69a3b7a4e00e8bba88b8b4abaa1c17bc083d5 Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Tue, 8 Nov 2022 12:14:35 +0800
|
||||
Subject: [PATCH 029/124] LoongArch: Add fscaleb.{s,d} instructions as
|
||||
ldexp{sf,df}3
|
||||
|
||||
This allows optimizing __builtin_ldexp{,f} and __builtin_scalbn{,f} with
|
||||
-fno-math-errno.
|
||||
|
||||
IMODE is added because we can't hard code SI for operand 2: fscaleb.d
|
||||
instruction always take the high half of both source registers into
|
||||
account. See my_ldexp_long in the test case.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FSCALEB): New unspec.
|
||||
(type): Add fscaleb.
|
||||
(IMODE): New mode attr.
|
||||
(ldexp<mode>3): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/fscaleb.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 26 ++++++++++-
|
||||
gcc/testsuite/gcc.target/loongarch/fscaleb.c | 48 ++++++++++++++++++++
|
||||
2 files changed, 72 insertions(+), 2 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index eb127c346..c141c9add 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -41,6 +41,7 @@
|
||||
UNSPEC_FTINT
|
||||
UNSPEC_FTINTRM
|
||||
UNSPEC_FTINTRP
|
||||
+ UNSPEC_FSCALEB
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -220,6 +221,7 @@
|
||||
;; fcmp floating point compare
|
||||
;; fcopysign floating point copysign
|
||||
;; fcvt floating point convert
|
||||
+;; fscaleb floating point scale
|
||||
;; fsqrt floating point square root
|
||||
;; frsqrt floating point reciprocal square root
|
||||
;; multi multiword sequence (or user asm statements)
|
||||
@@ -231,8 +233,8 @@
|
||||
"unknown,branch,jump,call,load,fpload,fpidxload,store,fpstore,fpidxstore,
|
||||
prefetch,prefetchx,condmove,mgtf,mftg,const,arith,logical,
|
||||
shift,slt,signext,clz,trap,imul,idiv,move,
|
||||
- fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fsqrt,
|
||||
- frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
+ fmove,fadd,fmul,fmadd,fdiv,frdiv,fabs,fneg,fcmp,fcopysign,fcvt,fscaleb,
|
||||
+ fsqrt,frsqrt,accext,accmod,multi,atomic,syncloop,nop,ghost"
|
||||
(cond [(eq_attr "jirl" "!unset") (const_string "call")
|
||||
(eq_attr "got" "load") (const_string "load")
|
||||
|
||||
@@ -418,6 +420,10 @@
|
||||
;; the controlling mode.
|
||||
(define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
|
||||
|
||||
+;; This attribute gives the integer mode that has the same size of a
|
||||
+;; floating-point mode.
|
||||
+(define_mode_attr IMODE [(SF "SI") (DF "DI")])
|
||||
+
|
||||
;; This code iterator allows signed and unsigned widening multiplications
|
||||
;; to use the same template.
|
||||
(define_code_iterator any_extend [sign_extend zero_extend])
|
||||
@@ -1014,7 +1020,23 @@
|
||||
"fcopysign.<fmt>\t%0,%1,%2"
|
||||
[(set_attr "type" "fcopysign")
|
||||
(set_attr "mode" "<UNITMODE>")])
|
||||
+
|
||||
+;;
|
||||
+;; ....................
|
||||
+;;
|
||||
+;; FLOATING POINT SCALE
|
||||
+;;
|
||||
+;; ....................
|
||||
|
||||
+(define_insn "ldexp<mode>3"
|
||||
+ [(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
+ (unspec:ANYF [(match_operand:ANYF 1 "register_operand" "f")
|
||||
+ (match_operand:<IMODE> 2 "register_operand" "f")]
|
||||
+ UNSPEC_FSCALEB))]
|
||||
+ "TARGET_HARD_FLOAT"
|
||||
+ "fscaleb.<fmt>\t%0,%1,%2"
|
||||
+ [(set_attr "type" "fscaleb")
|
||||
+ (set_attr "mode" "<UNITMODE>")])
|
||||
|
||||
;;
|
||||
;; ...................
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/fscaleb.c b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
new file mode 100644
|
||||
index 000000000..f18470fbb
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/fscaleb.c
|
||||
@@ -0,0 +1,48 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -mabi=lp64d -mdouble-float -fno-math-errno" } */
|
||||
+/* { dg-final { scan-assembler-times "fscaleb\\.s" 3 } } */
|
||||
+/* { dg-final { scan-assembler-times "fscaleb\\.d" 4 } } */
|
||||
+/* { dg-final { scan-assembler-times "slli\\.w" 1 } } */
|
||||
+
|
||||
+double
|
||||
+my_scalbln (double a, long b)
|
||||
+{
|
||||
+ return __builtin_scalbln (a, b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+my_scalbn (double a, int b)
|
||||
+{
|
||||
+ return __builtin_scalbn (a, b);
|
||||
+}
|
||||
+
|
||||
+double
|
||||
+my_ldexp (double a, int b)
|
||||
+{
|
||||
+ return __builtin_ldexp (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_scalblnf (float a, long b)
|
||||
+{
|
||||
+ return __builtin_scalblnf (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_scalbnf (float a, int b)
|
||||
+{
|
||||
+ return __builtin_scalbnf (a, b);
|
||||
+}
|
||||
+
|
||||
+float
|
||||
+my_ldexpf (float a, int b)
|
||||
+{
|
||||
+ return __builtin_ldexpf (a, b);
|
||||
+}
|
||||
+
|
||||
+/* b must be sign-extended */
|
||||
+double
|
||||
+my_ldexp_long (double a, long b)
|
||||
+{
|
||||
+ return __builtin_ldexp (a, b);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
220
LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
Normal file
220
LoongArch-Add-ftint-rm-rp-.-w-l-.-s-d-instructions.patch
Normal file
@ -0,0 +1,220 @@
|
||||
From 76d599c6d8f9cf78b51cd76a7ca8fbe11e2cda2b Mon Sep 17 00:00:00 2001
|
||||
From: Xi Ruoyao <xry111@xry111.site>
|
||||
Date: Sun, 6 Nov 2022 23:16:49 +0800
|
||||
Subject: [PATCH 028/124] LoongArch: Add ftint{,rm,rp}.{w,l}.{s,d} instructions
|
||||
|
||||
This allows to optimize the following builtins if -fno-math-errno:
|
||||
|
||||
- __builtin_lrint{,f}
|
||||
- __builtin_lfloor{,f}
|
||||
- __builtin_lceil{,f}
|
||||
|
||||
Inspired by
|
||||
https://gcc.gnu.org/pipermail/gcc-patches/2022-November/605287.html.
|
||||
|
||||
ANYFI is added so the compiler won't try ftint.l.s if -mfpu=32. If we
|
||||
simply used GPR here an ICE would be triggered with __builtin_lrintf
|
||||
and -mfpu=32.
|
||||
|
||||
ftint{rm,rp} instructions may raise inexact exception, so they can't be
|
||||
used if -fno-trapping-math -fno-fp-int-builtin-inexact.
|
||||
|
||||
Note that the .w.{s,d} variants are not tested because we don't support
|
||||
ILP32 for now.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch.md (UNSPEC_FTINT): New unspec.
|
||||
(UNSPEC_FTINTRM): Likewise.
|
||||
(UNSPEC_FTINTRP): Likewise.
|
||||
(LRINT): New define_int_iterator.
|
||||
(lrint_pattern): New define_int_attr.
|
||||
(lrint_submenmonic): Likewise.
|
||||
(lrint_allow_inexact): Likewise.
|
||||
(ANYFI): New define_mode_iterator.
|
||||
(lrint<ANYF><ANYFI>): New instruction template.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/ftint.c: New test.
|
||||
* gcc.target/loongarch/ftint-no-inexact.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch.md | 34 ++++++++++++++
|
||||
.../gcc.target/loongarch/ftint-no-inexact.c | 44 +++++++++++++++++++
|
||||
gcc/testsuite/gcc.target/loongarch/ftint.c | 44 +++++++++++++++++++
|
||||
3 files changed, 122 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index a14ab14ac..eb127c346 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -38,6 +38,9 @@
|
||||
UNSPEC_FMAX
|
||||
UNSPEC_FMIN
|
||||
UNSPEC_FCOPYSIGN
|
||||
+ UNSPEC_FTINT
|
||||
+ UNSPEC_FTINTRM
|
||||
+ UNSPEC_FTINTRP
|
||||
|
||||
;; Override return address for exception handling.
|
||||
UNSPEC_EH_RETURN
|
||||
@@ -374,6 +377,11 @@
|
||||
(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
|
||||
(DF "TARGET_DOUBLE_FLOAT")])
|
||||
|
||||
+;; Iterator for fixed-point modes which can be hold by a hardware
|
||||
+;; floating-point register.
|
||||
+(define_mode_iterator ANYFI [(SI "TARGET_HARD_FLOAT")
|
||||
+ (DI "TARGET_DOUBLE_FLOAT")])
|
||||
+
|
||||
;; A mode for which moves involving FPRs may need to be split.
|
||||
(define_mode_iterator SPLITF
|
||||
[(DF "!TARGET_64BIT && TARGET_DOUBLE_FLOAT")
|
||||
@@ -515,6 +523,19 @@
|
||||
(define_code_attr sel [(eq "masknez") (ne "maskeqz")])
|
||||
(define_code_attr selinv [(eq "maskeqz") (ne "masknez")])
|
||||
|
||||
+;; Iterator and attributes for floating-point to fixed-point conversion
|
||||
+;; instructions.
|
||||
+(define_int_iterator LRINT [UNSPEC_FTINT UNSPEC_FTINTRM UNSPEC_FTINTRP])
|
||||
+(define_int_attr lrint_pattern [(UNSPEC_FTINT "lrint")
|
||||
+ (UNSPEC_FTINTRM "lfloor")
|
||||
+ (UNSPEC_FTINTRP "lceil")])
|
||||
+(define_int_attr lrint_submenmonic [(UNSPEC_FTINT "")
|
||||
+ (UNSPEC_FTINTRM "rm")
|
||||
+ (UNSPEC_FTINTRP "rp")])
|
||||
+(define_int_attr lrint_allow_inexact [(UNSPEC_FTINT "1")
|
||||
+ (UNSPEC_FTINTRM "0")
|
||||
+ (UNSPEC_FTINTRP "0")])
|
||||
+
|
||||
;;
|
||||
;; ....................
|
||||
;;
|
||||
@@ -2022,6 +2043,19 @@
|
||||
[(set_attr "type" "fcvt")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
+;; Convert floating-point numbers to integers
|
||||
+(define_insn "<lrint_pattern><ANYF:mode><ANYFI:mode>2"
|
||||
+ [(set (match_operand:ANYFI 0 "register_operand" "=f")
|
||||
+ (unspec:ANYFI [(match_operand:ANYF 1 "register_operand" "f")]
|
||||
+ LRINT))]
|
||||
+ "TARGET_HARD_FLOAT &&
|
||||
+ (<lrint_allow_inexact>
|
||||
+ || flag_fp_int_builtin_inexact
|
||||
+ || !flag_trapping_math)"
|
||||
+ "ftint<lrint_submenmonic>.<ANYFI:ifmt>.<ANYF:fmt> %0,%1"
|
||||
+ [(set_attr "type" "fcvt")
|
||||
+ (set_attr "mode" "<ANYF:MODE>")])
|
||||
+
|
||||
;; Load the low word of operand 0 with operand 1.
|
||||
(define_insn "load_low<mode>"
|
||||
[(set (match_operand:SPLITF 0 "register_operand" "=f,f")
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
new file mode 100644
|
||||
index 000000000..88b83a9c0
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/ftint-no-inexact.c
|
||||
@@ -0,0 +1,44 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -fno-fp-int-builtin-inexact" } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrm\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler-not "ftintrp\\.l\\.d" } } */
|
||||
+
|
||||
+long
|
||||
+my_lrint (double a)
|
||||
+{
|
||||
+ return __builtin_lrint (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lrintf (float a)
|
||||
+{
|
||||
+ return __builtin_lrintf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloor (double a)
|
||||
+{
|
||||
+ return __builtin_lfloor (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloorf (float a)
|
||||
+{
|
||||
+ return __builtin_lfloorf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceil (double a)
|
||||
+{
|
||||
+ return __builtin_lceil (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceilf (float a)
|
||||
+{
|
||||
+ return __builtin_lceilf (a);
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/ftint.c b/gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
new file mode 100644
|
||||
index 000000000..7a326a454
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/ftint.c
|
||||
@@ -0,0 +1,44 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -mdouble-float -fno-math-errno -ffp-int-builtin-inexact" } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftint\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrm\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrm\\.l\\.d" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrp\\.l\\.s" } } */
|
||||
+/* { dg-final { scan-assembler "ftintrp\\.l\\.d" } } */
|
||||
+
|
||||
+long
|
||||
+my_lrint (double a)
|
||||
+{
|
||||
+ return __builtin_lrint (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lrintf (float a)
|
||||
+{
|
||||
+ return __builtin_lrintf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloor (double a)
|
||||
+{
|
||||
+ return __builtin_lfloor (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lfloorf (float a)
|
||||
+{
|
||||
+ return __builtin_lfloorf (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceil (double a)
|
||||
+{
|
||||
+ return __builtin_lceil (a);
|
||||
+}
|
||||
+
|
||||
+long
|
||||
+my_lceilf (float a)
|
||||
+{
|
||||
+ return __builtin_lceilf (a);
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1051
LoongArch-Add-new-code-model-medium.patch
Normal file
1051
LoongArch-Add-new-code-model-medium.patch
Normal file
File diff suppressed because it is too large
Load Diff
158
LoongArch-Add-prefetch-instructions.patch
Normal file
158
LoongArch-Add-prefetch-instructions.patch
Normal file
@ -0,0 +1,158 @@
|
||||
From 52a41006c2e8141a42de93ffcc2c040e034244b2 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Wed, 16 Nov 2022 09:25:14 +0800
|
||||
Subject: [PATCH 031/124] LoongArch: Add prefetch instructions.
|
||||
|
||||
Enable sw prefetching at -O3 and higher.
|
||||
|
||||
Co-Authored-By: xujiahao <xujiahao@loongson.cn>
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/constraints.md (ZD): New constraint.
|
||||
* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
|
||||
* config/loongarch/loongarch-tune.h (struct loongarch_cache):
|
||||
Define number of parallel prefetch.
|
||||
* config/loongarch/loongarch.cc (loongarch_option_override_internal):
|
||||
Set up parameters to be used in prefetching algorithm.
|
||||
* config/loongarch/loongarch.md (prefetch): New template.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/constraints.md | 10 ++++++++++
|
||||
gcc/config/loongarch/loongarch-def.c | 2 ++
|
||||
gcc/config/loongarch/loongarch-tune.h | 1 +
|
||||
gcc/config/loongarch/loongarch.cc | 28 +++++++++++++++++++++++++++
|
||||
gcc/config/loongarch/loongarch.md | 14 ++++++++++++++
|
||||
5 files changed, 55 insertions(+)
|
||||
|
||||
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
|
||||
index 43cb7b5f0..46f7f63ae 100644
|
||||
--- a/gcc/config/loongarch/constraints.md
|
||||
+++ b/gcc/config/loongarch/constraints.md
|
||||
@@ -86,6 +86,10 @@
|
||||
;; "ZB"
|
||||
;; "An address that is held in a general-purpose register.
|
||||
;; The offset is zero"
|
||||
+;; "ZD"
|
||||
+;; "An address operand whose address is formed by a base register
|
||||
+;; and offset that is suitable for use in instructions with the same
|
||||
+;; addressing mode as @code{preld}."
|
||||
;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
|
||||
;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
|
||||
|
||||
@@ -190,3 +194,9 @@
|
||||
The offset is zero"
|
||||
(and (match_code "mem")
|
||||
(match_test "REG_P (XEXP (op, 0))")))
|
||||
+
|
||||
+(define_address_constraint "ZD"
|
||||
+ "An address operand whose address is formed by a base register
|
||||
+ and offset that is suitable for use in instructions with the same
|
||||
+ addressing mode as @code{preld}."
|
||||
+ (match_test "loongarch_12bit_offset_address_p (op, mode)"))
|
||||
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
|
||||
index cbf995d81..80ab10a52 100644
|
||||
--- a/gcc/config/loongarch/loongarch-def.c
|
||||
+++ b/gcc/config/loongarch/loongarch-def.c
|
||||
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
|
||||
.l1d_line_size = 64,
|
||||
.l1d_size = 64,
|
||||
.l2d_size = 256,
|
||||
+ .simultaneous_prefetches = 4,
|
||||
},
|
||||
[CPU_LA464] = {
|
||||
.l1d_line_size = 64,
|
||||
.l1d_size = 64,
|
||||
.l2d_size = 256,
|
||||
+ .simultaneous_prefetches = 4,
|
||||
},
|
||||
};
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
|
||||
index 6f3530f5c..8e3eb2947 100644
|
||||
--- a/gcc/config/loongarch/loongarch-tune.h
|
||||
+++ b/gcc/config/loongarch/loongarch-tune.h
|
||||
@@ -45,6 +45,7 @@ struct loongarch_cache {
|
||||
int l1d_line_size; /* bytes */
|
||||
int l1d_size; /* KiB */
|
||||
int l2d_size; /* kiB */
|
||||
+ int simultaneous_prefetches; /* number of parallel prefetch */
|
||||
};
|
||||
|
||||
#endif /* LOONGARCH_TUNE_H */
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index d552b162a..622c9435b 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "context.h"
|
||||
#include "builtins.h"
|
||||
#include "rtl-iter.h"
|
||||
+#include "opts.h"
|
||||
|
||||
/* This file should be included last. */
|
||||
#include "target-def.h"
|
||||
@@ -6099,6 +6100,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
if (loongarch_branch_cost == 0)
|
||||
loongarch_branch_cost = loongarch_cost->branch_cost;
|
||||
|
||||
+ /* Set up parameters to be used in prefetching algorithm. */
|
||||
+ int simultaneous_prefetches
|
||||
+ = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_simultaneous_prefetches,
|
||||
+ simultaneous_prefetches);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l1_cache_line_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l1_cache_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
|
||||
+
|
||||
+ SET_OPTION_IF_UNSET (opts, &global_options_set,
|
||||
+ param_l2_cache_size,
|
||||
+ loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
|
||||
+
|
||||
+
|
||||
+ /* Enable sw prefetching at -O3 and higher. */
|
||||
+ if (opts->x_flag_prefetch_loop_arrays < 0
|
||||
+ && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
|
||||
+ && !opts->x_optimize_size)
|
||||
+ opts->x_flag_prefetch_loop_arrays = 1;
|
||||
+
|
||||
if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
|
||||
error ("%qs cannot be used for compiling a shared library",
|
||||
"-mdirect-extern-access");
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 682ab9617..2fda53819 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -3282,6 +3282,20 @@
|
||||
;; ....................
|
||||
;;
|
||||
|
||||
+(define_insn "prefetch"
|
||||
+ [(prefetch (match_operand 0 "address_operand" "ZD")
|
||||
+ (match_operand 1 "const_int_operand" "n")
|
||||
+ (match_operand 2 "const_int_operand" "n"))]
|
||||
+ ""
|
||||
+{
|
||||
+ switch (INTVAL (operands[1]))
|
||||
+ {
|
||||
+ case 0: return "preld\t0,%a0";
|
||||
+ case 1: return "preld\t8,%a0";
|
||||
+ default: gcc_unreachable ();
|
||||
+ }
|
||||
+})
|
||||
+
|
||||
(define_insn "nop"
|
||||
[(const_int 0)]
|
||||
""
|
||||
--
|
||||
2.33.0
|
||||
|
||||
794
LoongArch-Add-support-code-model-extreme.patch
Normal file
794
LoongArch-Add-support-code-model-extreme.patch
Normal file
@ -0,0 +1,794 @@
|
||||
From b1c92fb9dab678e4c9c23fa77185011494d145b9 Mon Sep 17 00:00:00 2001
|
||||
From: Lulu Cheng <chenglulu@loongson.cn>
|
||||
Date: Thu, 18 Aug 2022 17:26:13 +0800
|
||||
Subject: [PATCH 011/124] LoongArch: Add support code model extreme.
|
||||
|
||||
Use five instructions to calculate a signed 64-bit offset relative to the pc.
|
||||
|
||||
gcc/ChangeLog:
|
||||
|
||||
* config/loongarch/loongarch-opts.cc: Allow cmodel to be extreme.
|
||||
* config/loongarch/loongarch.cc (loongarch_call_tls_get_addr):
|
||||
Add extreme support for TLS GD and LD types.
|
||||
(loongarch_legitimize_tls_address): Add extreme support for TLS LE
|
||||
and IE.
|
||||
(loongarch_split_symbol): When compiling with -mcmodel=extreme,
|
||||
the symbol address will be obtained through five instructions.
|
||||
(loongarch_print_operand_reloc): Add support.
|
||||
(loongarch_print_operand): Add support.
|
||||
(loongarch_print_operand_address): Add support.
|
||||
(loongarch_option_override_internal): Set '-mcmodel=extreme' option
|
||||
incompatible with '-mno-explicit-relocs'.
|
||||
* config/loongarch/loongarch.md (@lui_l_hi20<mode>):
|
||||
Loads bits 12-31 of data into registers.
|
||||
(lui_h_lo20): Load bits 32-51 of the data and spell bits 0-31 of
|
||||
the source register.
|
||||
(lui_h_hi12): Load bits 52-63 of the data and spell bits 0-51 of
|
||||
the source register.
|
||||
* config/loongarch/predicates.md: Symbols need to be decomposed
|
||||
when defining the macro TARGET_CMODEL_EXTREME
|
||||
* doc/invoke.texi: Modify the description information of cmodel in the document.
|
||||
Document -W[no-]extreme-plt.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/func-call-1.c: Add option '-mcmodel=normal'.
|
||||
* gcc.target/loongarch/func-call-2.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-3.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-4.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-5.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-6.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-7.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-8.c: Likewise.
|
||||
* gcc.target/loongarch/relocs-symbol-noaddend.c: Likewise.
|
||||
* gcc.target/loongarch/func-call-extreme-1.c: New test.
|
||||
* gcc.target/loongarch/func-call-extreme-2.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/config/loongarch/loongarch-opts.cc | 3 +-
|
||||
gcc/config/loongarch/loongarch.cc | 222 +++++++++++++++---
|
||||
gcc/config/loongarch/loongarch.md | 34 ++-
|
||||
gcc/config/loongarch/predicates.md | 9 +-
|
||||
gcc/doc/invoke.texi | 50 +---
|
||||
.../gcc.target/loongarch/func-call-1.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-2.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-3.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-4.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-5.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-6.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-7.c | 2 +-
|
||||
.../gcc.target/loongarch/func-call-8.c | 2 +-
|
||||
.../loongarch/func-call-extreme-1.c | 32 +++
|
||||
.../loongarch/func-call-extreme-2.c | 32 +++
|
||||
.../loongarch/relocs-symbol-noaddend.c | 2 +-
|
||||
16 files changed, 318 insertions(+), 82 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
|
||||
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
|
||||
index 3f70943de..2ae89f234 100644
|
||||
--- a/gcc/config/loongarch/loongarch-opts.cc
|
||||
+++ b/gcc/config/loongarch/loongarch-opts.cc
|
||||
@@ -376,14 +376,13 @@ fallback:
|
||||
|
||||
/* 5. Target code model */
|
||||
t.cmodel = constrained.cmodel ? opt_cmodel : CMODEL_NORMAL;
|
||||
- if (t.cmodel != CMODEL_NORMAL)
|
||||
+ if (t.cmodel != CMODEL_NORMAL && t.cmodel != CMODEL_EXTREME)
|
||||
{
|
||||
warning (0, "%qs is not supported, now cmodel is set to %qs",
|
||||
loongarch_cmodel_strings[t.cmodel], "normal");
|
||||
t.cmodel = CMODEL_NORMAL;
|
||||
}
|
||||
|
||||
-
|
||||
/* Cleanup and return. */
|
||||
obstack_free (&msg_obstack, NULL);
|
||||
*target = t;
|
||||
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||||
index 76bf55ea4..1a33f668f 100644
|
||||
--- a/gcc/config/loongarch/loongarch.cc
|
||||
+++ b/gcc/config/loongarch/loongarch.cc
|
||||
@@ -2436,7 +2436,19 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||
/* Split tls symbol to high and low. */
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc));
|
||||
high = loongarch_force_temporary (tmp, high);
|
||||
- emit_insn (gen_tls_low (Pmode, a0, high, loc));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp1 = gen_reg_rtx (Pmode);
|
||||
+ emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc));
|
||||
+ emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_tls_low (Pmode, a0, high, loc));
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -2449,14 +2461,44 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
|
||||
}
|
||||
|
||||
if (flag_plt)
|
||||
- insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol,
|
||||
+ insn = emit_call_insn (gen_call_value_internal (v0,
|
||||
+ loongarch_tls_symbol,
|
||||
const0_rtx));
|
||||
else
|
||||
{
|
||||
rtx dest = gen_reg_rtx (Pmode);
|
||||
- rtx high = gen_reg_rtx (Pmode);
|
||||
- loongarch_emit_move (high, gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
- emit_insn (gen_ld_from_got (Pmode, dest, high, loongarch_tls_symbol));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp1 = gen_reg_rtx (Pmode);
|
||||
+ rtx high = gen_reg_rtx (Pmode);
|
||||
+
|
||||
+ loongarch_emit_move (high,
|
||||
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
+ loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode,
|
||||
+ gen_rtx_REG (Pmode, 0),
|
||||
+ loongarch_tls_symbol));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol));
|
||||
+ loongarch_emit_move (dest,
|
||||
+ gen_rtx_MEM (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode, high, tmp1)));
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (TARGET_EXPLICIT_RELOCS)
|
||||
+ {
|
||||
+ rtx high = gen_reg_rtx (Pmode);
|
||||
+ loongarch_emit_move (high,
|
||||
+ gen_rtx_HIGH (Pmode, loongarch_tls_symbol));
|
||||
+ emit_insn (gen_ld_from_got (Pmode, dest, high,
|
||||
+ loongarch_tls_symbol));
|
||||
+ }
|
||||
+ else
|
||||
+ loongarch_emit_move (dest, loongarch_tls_symbol);
|
||||
+ }
|
||||
insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx));
|
||||
}
|
||||
|
||||
@@ -2508,7 +2550,23 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||
tmp3 = gen_reg_rtx (Pmode);
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||||
high = loongarch_force_temporary (tmp3, high);
|
||||
- emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ rtx tmp3 = gen_reg_rtx (Pmode);
|
||||
+ emit_insn (gen_tls_low (Pmode, tmp3,
|
||||
+ gen_rtx_REG (Pmode, 0), tmp2));
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2));
|
||||
+ emit_move_insn (tmp1,
|
||||
+ gen_rtx_MEM (Pmode,
|
||||
+ gen_rtx_PLUS (Pmode,
|
||||
+ high, tmp3)));
|
||||
+ }
|
||||
+ else
|
||||
+ emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2));
|
||||
}
|
||||
else
|
||||
emit_insn (loongarch_got_load_tls_ie (tmp1, loc));
|
||||
@@ -2530,11 +2588,18 @@ loongarch_legitimize_tls_address (rtx loc)
|
||||
rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2));
|
||||
high = loongarch_force_temporary (tmp3, high);
|
||||
emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2));
|
||||
+
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2));
|
||||
+ emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2));
|
||||
+ }
|
||||
}
|
||||
else
|
||||
emit_insn (loongarch_got_load_tls_le (tmp1, loc));
|
||||
emit_insn (gen_add3_insn (dest, tmp1, tp));
|
||||
-
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -2603,7 +2668,6 @@ bool
|
||||
loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
{
|
||||
enum loongarch_symbol_type symbol_type;
|
||||
- rtx high;
|
||||
|
||||
/* If build with '-mno-explicit-relocs', don't split symbol. */
|
||||
if (!TARGET_EXPLICIT_RELOCS)
|
||||
@@ -2615,6 +2679,8 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
|| !loongarch_split_symbol_type (symbol_type))
|
||||
return false;
|
||||
|
||||
+ rtx high, temp1 = NULL;
|
||||
+
|
||||
if (temp == NULL)
|
||||
temp = gen_reg_rtx (Pmode);
|
||||
|
||||
@@ -2622,20 +2688,42 @@ loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out)
|
||||
high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
|
||||
high = loongarch_force_temporary (temp, high);
|
||||
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ {
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
+ temp1 = gen_reg_rtx (Pmode);
|
||||
+ emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0),
|
||||
+ addr));
|
||||
+ emit_insn (gen_lui_h_lo20 (temp1, temp1, addr));
|
||||
+ emit_insn (gen_lui_h_hi12 (temp1, temp1, addr));
|
||||
+ }
|
||||
+
|
||||
if (low_out)
|
||||
switch (symbol_type)
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
- break;
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ *low_out = gen_rtx_PLUS (Pmode, high, temp1);
|
||||
+ else
|
||||
+ *low_out = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
/* SYMBOL_GOT_DISP symbols are loaded from the GOT. */
|
||||
{
|
||||
- rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
- rtx mem = gen_rtx_MEM (Pmode, low);
|
||||
- *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
|
||||
- UNSPEC_LOAD_FROM_GOT);
|
||||
+ if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ())
|
||||
+ *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1));
|
||||
+ else
|
||||
+ {
|
||||
+ rtx low = gen_rtx_LO_SUM (Pmode, high, addr);
|
||||
+ rtx mem = gen_rtx_MEM (Pmode, low);
|
||||
+ *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem),
|
||||
+ UNSPEC_LOAD_FROM_GOT);
|
||||
+ }
|
||||
+
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -4584,34 +4672,86 @@ loongarch_memmodel_needs_release_fence (enum memmodel model)
|
||||
in context CONTEXT. HI_RELOC indicates a high-part reloc. */
|
||||
|
||||
static void
|
||||
-loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
|
||||
+loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
|
||||
+ bool hi_reloc)
|
||||
{
|
||||
const char *reloc;
|
||||
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ gcc_assert (TARGET_EXPLICIT_RELOCS);
|
||||
+
|
||||
switch (loongarch_classify_symbolic_expression (op))
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%pc64_hi12" : "%pc64_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%pc_hi20" : "%pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
- reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%got_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLS_IE:
|
||||
- reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%ie64_pc_hi12" : "%ie64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%ie_pc_hi20" : "%ie_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLS_LE:
|
||||
- reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%le64_hi12" : "%le64_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%le_hi20" : "%le_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLSGD:
|
||||
- reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%gd_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
case SYMBOL_TLSLDM:
|
||||
- reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
|
||||
+ if (hi64_part)
|
||||
+ {
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ reloc = hi_reloc ? "%got64_pc_hi12" : "%got64_pc_lo20";
|
||||
+ else
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
+ else
|
||||
+ reloc = hi_reloc ? "%ld_pc_hi20" : "%got_pc_lo12";
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -4637,6 +4777,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi_reloc)
|
||||
'L' Print the low-part relocation associated with OP.
|
||||
'm' Print one less than CONST_INT OP in decimal.
|
||||
'N' Print the inverse of the integer branch condition for comparison OP.
|
||||
+ 'r' Print address 12-31bit relocation associated with OP.
|
||||
+ 'R' Print address 32-51bit relocation associated with OP.
|
||||
'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...),
|
||||
'z' for (eq:?I ...), 'n' for (ne:?I ...).
|
||||
't' Like 'T', but with the EQ/NE cases reversed
|
||||
@@ -4694,7 +4836,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
case 'h':
|
||||
if (code == HIGH)
|
||||
op = XEXP (op, 0);
|
||||
- loongarch_print_operand_reloc (file, op, true /* hi_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||||
+ true /* hi_reloc */);
|
||||
+ break;
|
||||
+
|
||||
+ case 'H':
|
||||
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||||
+ true /* hi_reloc */);
|
||||
break;
|
||||
|
||||
case 'i':
|
||||
@@ -4703,7 +4851,8 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
break;
|
||||
|
||||
case 'L':
|
||||
- loongarch_print_operand_reloc (file, op, false /* lo_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part*/,
|
||||
+ false /* lo_reloc */);
|
||||
break;
|
||||
|
||||
case 'm':
|
||||
@@ -4718,6 +4867,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
|
||||
letter);
|
||||
break;
|
||||
|
||||
+ case 'r':
|
||||
+ loongarch_print_operand_reloc (file, op, false /* hi64_part */,
|
||||
+ true /* lo_reloc */);
|
||||
+ break;
|
||||
+
|
||||
+ case 'R':
|
||||
+ loongarch_print_operand_reloc (file, op, true /* hi64_part */,
|
||||
+ false /* lo_reloc */);
|
||||
+ break;
|
||||
+
|
||||
case 't':
|
||||
case 'T':
|
||||
{
|
||||
@@ -4848,7 +5007,8 @@ loongarch_print_operand_address (FILE *file, machine_mode /* mode */, rtx x)
|
||||
|
||||
case ADDRESS_LO_SUM:
|
||||
fprintf (file, "%s,", reg_names[REGNO (addr.reg)]);
|
||||
- loongarch_print_operand_reloc (file, addr.offset, false /* hi_reloc */);
|
||||
+ loongarch_print_operand_reloc (file, addr.offset, false /* hi64_part */,
|
||||
+ false /* hi_reloc */);
|
||||
return;
|
||||
|
||||
case ADDRESS_CONST_INT:
|
||||
@@ -5821,13 +5981,21 @@ loongarch_option_override_internal (struct gcc_options *opts)
|
||||
|
||||
switch (la_target.cmodel)
|
||||
{
|
||||
- case CMODEL_TINY_STATIC:
|
||||
case CMODEL_EXTREME:
|
||||
+ if (!TARGET_EXPLICIT_RELOCS)
|
||||
+ error ("code model %qs needs %s",
|
||||
+ "extreme", "-mexplicit-relocs");
|
||||
+
|
||||
if (opts->x_flag_plt)
|
||||
- error ("code model %qs and %qs not support %s mode",
|
||||
- "tiny-static", "extreme", "plt");
|
||||
+ {
|
||||
+ if (global_options_set.x_flag_plt)
|
||||
+ error ("code model %qs is not compatible with %s",
|
||||
+ "extreme", "-fplt");
|
||||
+ opts->x_flag_plt = 0;
|
||||
+ }
|
||||
break;
|
||||
|
||||
+ case CMODEL_TINY_STATIC:
|
||||
case CMODEL_NORMAL:
|
||||
case CMODEL_TINY:
|
||||
case CMODEL_LARGE:
|
||||
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||||
index 8e8868de9..8fc10444c 100644
|
||||
--- a/gcc/config/loongarch/loongarch.md
|
||||
+++ b/gcc/config/loongarch/loongarch.md
|
||||
@@ -60,6 +60,9 @@
|
||||
|
||||
UNSPEC_LOAD_FROM_GOT
|
||||
UNSPEC_ORI_L_LO12
|
||||
+ UNSPEC_LUI_L_HI20
|
||||
+ UNSPEC_LUI_H_LO20
|
||||
+ UNSPEC_LUI_H_HI12
|
||||
UNSPEC_TLS_LOW
|
||||
])
|
||||
|
||||
@@ -1934,16 +1937,45 @@
|
||||
[(set_attr "type" "move")]
|
||||
)
|
||||
|
||||
+(define_insn "@lui_l_hi20<mode>"
|
||||
+ [(set (match_operand:P 0 "register_operand" "=r")
|
||||
+ (unspec:P [(match_operand:P 1 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_L_HI20))]
|
||||
+ ""
|
||||
+ "lu12i.w\t%0,%r1"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
(define_insn "@ori_l_lo12<mode>"
|
||||
[(set (match_operand:P 0 "register_operand" "=r")
|
||||
(unspec:P [(match_operand:P 1 "register_operand" "r")
|
||||
- (match_operand:P 2 "symbolic_operand")]
|
||||
+ (match_operand:P 2 "symbolic_operand")]
|
||||
UNSPEC_ORI_L_LO12))]
|
||||
""
|
||||
"ori\t%0,%1,%L2"
|
||||
[(set_attr "type" "move")]
|
||||
)
|
||||
|
||||
+(define_insn "lui_h_lo20"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (unspec:DI [(match_operand:DI 1 "register_operand" "0")
|
||||
+ (match_operand:DI 2 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_H_LO20))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "lu32i.d\t%0,%R2"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "lui_h_hi12"
|
||||
+ [(set (match_operand:DI 0 "register_operand" "=r")
|
||||
+ (unspec:DI [(match_operand:DI 1 "register_operand" "r")
|
||||
+ (match_operand:DI 2 "symbolic_operand")]
|
||||
+ UNSPEC_LUI_H_HI12))]
|
||||
+ "TARGET_64BIT"
|
||||
+ "lu52i.d\t%0,%1,%H2"
|
||||
+ [(set_attr "type" "move")]
|
||||
+)
|
||||
+
|
||||
;; Convert floating-point numbers to integers
|
||||
(define_insn "frint_<fmt>"
|
||||
[(set (match_operand:ANYF 0 "register_operand" "=f")
|
||||
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
|
||||
index cd3528c7c..e38c6fbdd 100644
|
||||
--- a/gcc/config/loongarch/predicates.md
|
||||
+++ b/gcc/config/loongarch/predicates.md
|
||||
@@ -111,7 +111,7 @@
|
||||
(match_code "const,symbol_ref,label_ref")
|
||||
{
|
||||
/* Split symbol to high and low if return false.
|
||||
- If defined TARGET_CMODEL_LARGE, all symbol would be splited,
|
||||
+ If defined TARGET_CMODEL_EXTREME, all symbol would be splited,
|
||||
else if offset is not zero, the symbol would be splited. */
|
||||
|
||||
enum loongarch_symbol_type symbol_type;
|
||||
@@ -126,10 +126,13 @@
|
||||
switch (symbol_type)
|
||||
{
|
||||
case SYMBOL_PCREL:
|
||||
- return 1;
|
||||
+ if (TARGET_CMODEL_EXTREME)
|
||||
+ return false;
|
||||
+ else
|
||||
+ return 1;
|
||||
|
||||
case SYMBOL_GOT_DISP:
|
||||
- if (TARGET_CMODEL_LARGE || !flag_plt)
|
||||
+ if (TARGET_CMODEL_EXTREME || !flag_plt)
|
||||
return false;
|
||||
else
|
||||
return 1;
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index 1de2b2bd4..c4f83e62a 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -1006,6 +1006,7 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mcond-move-float -mno-cond-move-float @gol
|
||||
-memcpy -mno-memcpy -mstrict-align -mno-strict-align @gol
|
||||
-mmax-inline-memcpy-size=@var{n} @gol
|
||||
+-mexplicit-relocs -mno-explicit-relocs @gol
|
||||
-mcmodel=@var{code-model}}
|
||||
|
||||
@emph{M32R/D Options}
|
||||
@@ -24617,50 +24618,19 @@ less than or equal to @var{n} bytes. The default value of @var{n} is 1024.
|
||||
@item -mcmodel=@var{code-model}
|
||||
Set the code model to one of:
|
||||
@table @samp
|
||||
-@item tiny-static
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol and global strong symbol: The data section must be within +/-2MiB addressing space.
|
||||
-The text section must be within +/-128MiB addressing space.
|
||||
-@item
|
||||
-global weak symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
-
|
||||
-@item tiny
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2MiB addressing space.
|
||||
-The text section must be within +/-128MiB
|
||||
-addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+@item tiny-static (Not implemented yet)
|
||||
+@item tiny (Not implemented yet)
|
||||
|
||||
@item normal
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2GiB addressing space.
|
||||
-The text section must be within +/-128MiB addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+The text segment must be within 128MB addressing space. The data segment must
|
||||
+be within 2GB addressing space.
|
||||
|
||||
-@item large
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data section must be within +/-2GiB addressing space.
|
||||
-The text section must be within +/-128GiB addressing space.
|
||||
-@item
|
||||
-global symbol: The got table must be within +/-2GiB addressing space.
|
||||
-@end itemize
|
||||
+@item large (Not implemented yet)
|
||||
|
||||
-@item extreme(Not implemented yet)
|
||||
-@itemize @bullet
|
||||
-@item
|
||||
-local symbol: The data and text section must be within +/-8EiB addressing space.
|
||||
-@item
|
||||
-global symbol: The data got table must be within +/-8EiB addressing space.
|
||||
-@end itemize
|
||||
+@item extreme
|
||||
+This mode does not limit the size of the code segment and data segment.
|
||||
+The @option{-mcmodel=extreme} option is incompatible with @option{-fplt} and
|
||||
+@option{-mno-explicit-relocs}.
|
||||
@end table
|
||||
The default code model is @code{normal}.
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
index 01b8ea23f..76bf11b0c 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-1.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
index 4565baaec..4b468fef8 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-2.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-3.c b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
index 4f669a029..dd3a4882d 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-3.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*la\.global\t.*f\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-4.c b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
index 943adb640..f8158ec34 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-4.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mno-explicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*la\.global\t.*g\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-5.c b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
index 2c2a1c8a1..37994af43 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-5.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fplt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\t%plt\\(f\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-6.c b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
index 4b0e4266e..8e366e376 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-6.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fplt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*bl\t%plt\\(g\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-7.c b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
index 51792711f..4177c3d96 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-7.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*pcalau12i\t.*%got_pc_hi20\\(f\\)\n\tld\.d\t.*%got_pc_lo12\\(f\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-8.c b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
index 330140d88..4254eaa16 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-8.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs" } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "test:.*pcalau12i\t.*%got_pc_hi20\\(g\\)\n\tld\.d\t.*%got_pc_lo12\\(g\\)\n\tjirl" } } */
|
||||
/* { dg-final { scan-assembler "test1:.*bl\tf\n" } } */
|
||||
/* { dg-final { scan-assembler "test2:.*bl\tl\n" } } */
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
new file mode 100644
|
||||
index 000000000..db1e0f853
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-1.c
|
||||
@@ -0,0 +1,32 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fno-pic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
|
||||
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+
|
||||
+extern void g (void);
|
||||
+void
|
||||
+f (void)
|
||||
+{}
|
||||
+
|
||||
+static void
|
||||
+l (void)
|
||||
+{}
|
||||
+
|
||||
+void
|
||||
+test (void)
|
||||
+{
|
||||
+ g ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test1 (void)
|
||||
+{
|
||||
+ f ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test2 (void)
|
||||
+{
|
||||
+ l ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
new file mode 100644
|
||||
index 000000000..21bf81ae8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/func-call-extreme-2.c
|
||||
@@ -0,0 +1,32 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-mabi=lp64d -O0 -fpic -fno-plt -mexplicit-relocs -mcmodel=extreme" } */
|
||||
+/* { dg-final { scan-assembler "test:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test1:.*pcalau12i.*%got_pc_hi20.*\n\taddi\.d.*%got_pc_lo12.*\n\tlu32i\.d.*%got64_pc_lo20.*\n\tlu52i\.d.*%got64_pc_hi12.*\n\tldx\.d" } } */
|
||||
+/* { dg-final { scan-assembler "test2:.*pcalau12i.*%pc_hi20.*\n\taddi\.d.*%pc_lo12.*\n\tlu32i\.d.*%pc64_lo20.*\n\tlu52i\.d.*pc64_hi12.*\n\tadd\.d" } } */
|
||||
+
|
||||
+extern void g (void);
|
||||
+void
|
||||
+f (void)
|
||||
+{}
|
||||
+
|
||||
+static void
|
||||
+l (void)
|
||||
+{}
|
||||
+
|
||||
+void
|
||||
+test (void)
|
||||
+{
|
||||
+ g ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test1 (void)
|
||||
+{
|
||||
+ f ();
|
||||
+}
|
||||
+
|
||||
+void
|
||||
+test2 (void)
|
||||
+{
|
||||
+ l ();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
index bfcc9bc33..3ec8bd229 100644
|
||||
--- a/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/relocs-symbol-noaddend.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2" } */
|
||||
+/* { dg-options "-mabi=lp64d -mexplicit-relocs -fno-pic -O2 -mcmodel=normal" } */
|
||||
/* { dg-final { scan-assembler "pcalau12i.*%pc_hi20\\(\.LANCHOR0\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "addi\.d.*%pc_lo12\\(\.LANCHOR0\\)\n" } } */
|
||||
/* { dg-final { scan-assembler "ldptr.d\t\\\$r4,.*,0\n" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
4485
LoongArch-Add-tests-for-ASX-builtin-functions.patch
Normal file
4485
LoongArch-Add-tests-for-ASX-builtin-functions.patch
Normal file
File diff suppressed because it is too large
Load Diff
5363
LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
Normal file
5363
LoongArch-Add-tests-for-ASX-vector-comparison-and-se.patch
Normal file
File diff suppressed because it is too large
Load Diff
7291
LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
Normal file
7291
LoongArch-Add-tests-for-ASX-vector-floating-point-co.patch
Normal file
File diff suppressed because it is too large
Load Diff
5614
LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
Normal file
5614
LoongArch-Add-tests-for-ASX-vector-floating-point-op.patch
Normal file
File diff suppressed because it is too large
Load Diff
4566
LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
Normal file
4566
LoongArch-Add-tests-for-ASX-vector-subtraction-instr.patch
Normal file
File diff suppressed because it is too large
Load Diff
5595
LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
Normal file
5595
LoongArch-Add-tests-for-ASX-vector-xvabsd-xvavg-xvav.patch
Normal file
File diff suppressed because it is too large
Load Diff
6368
LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
Normal file
6368
LoongArch-Add-tests-for-ASX-vector-xvadd-xvadda-xvad.patch
Normal file
File diff suppressed because it is too large
Load Diff
1854
LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
Normal file
1854
LoongArch-Add-tests-for-ASX-vector-xvand-xvandi-xvan.patch
Normal file
File diff suppressed because it is too large
Load Diff
5057
LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
Normal file
5057
LoongArch-Add-tests-for-ASX-vector-xvbitclr-xvbitclr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4600
LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
Normal file
4600
LoongArch-Add-tests-for-ASX-vector-xvext2xv-xvexth-x.patch
Normal file
File diff suppressed because it is too large
Load Diff
4737
LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
Normal file
4737
LoongArch-Add-tests-for-ASX-vector-xvextl-xvsra-xvsr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4510
LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
Normal file
4510
LoongArch-Add-tests-for-ASX-vector-xvfcmp-caf-ceq-cl.patch
Normal file
File diff suppressed because it is too large
Load Diff
4824
LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
Normal file
4824
LoongArch-Add-tests-for-ASX-vector-xvfcmp-saf-seq-sl.patch
Normal file
File diff suppressed because it is too large
Load Diff
4991
LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
Normal file
4991
LoongArch-Add-tests-for-ASX-vector-xvfnmadd-xvfrstp-.patch
Normal file
File diff suppressed because it is too large
Load Diff
6930
LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
Normal file
6930
LoongArch-Add-tests-for-ASX-vector-xvhadd-xvhaddw-xv.patch
Normal file
File diff suppressed because it is too large
Load Diff
2735
LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
Normal file
2735
LoongArch-Add-tests-for-ASX-vector-xvldi-xvmskgez-xv.patch
Normal file
File diff suppressed because it is too large
Load Diff
4124
LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
Normal file
4124
LoongArch-Add-tests-for-ASX-vector-xvmax-xvmaxi-xvmi.patch
Normal file
File diff suppressed because it is too large
Load Diff
5766
LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
Normal file
5766
LoongArch-Add-tests-for-ASX-vector-xvmul-xvmod-xvdiv.patch
Normal file
File diff suppressed because it is too large
Load Diff
5364
LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
Normal file
5364
LoongArch-Add-tests-for-ASX-vector-xvpackev-xvpackod.patch
Normal file
File diff suppressed because it is too large
Load Diff
5611
LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
Normal file
5611
LoongArch-Add-tests-for-ASX-vector-xvsll-xvsrl-instr.patch
Normal file
File diff suppressed because it is too large
Load Diff
4258
LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
Normal file
4258
LoongArch-Add-tests-for-ASX-vector-xvssran-xvssrani-.patch
Normal file
File diff suppressed because it is too large
Load Diff
4123
LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
Normal file
4123
LoongArch-Add-tests-for-ASX-vector-xvssrln-xvssrlni-.patch
Normal file
File diff suppressed because it is too large
Load Diff
65
LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
Normal file
65
LoongArch-Add-tests-for-ASX-xvldrepl-xvstelm-instruc.patch
Normal file
@ -0,0 +1,65 @@
|
||||
From 2ef90d604d7bae207d5b2067b4ce38d04d4835be Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Tue, 12 Sep 2023 16:00:48 +0800
|
||||
Subject: [PATCH 110/124] LoongArch: Add tests for ASX xvldrepl/xvstelm
|
||||
instruction generation.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c: New test.
|
||||
* gcc.target/loongarch/vector/lasx/lasx-xvstelm.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/lasx/lasx-xvldrepl.c | 16 ++++++++++++++++
|
||||
.../loongarch/vector/lasx/lasx-xvstelm.c | 14 ++++++++++++++
|
||||
2 files changed, 30 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
new file mode 100644
|
||||
index 000000000..105567951
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvldrepl.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-times "xvldrepl.w" 2} } */
|
||||
+
|
||||
+#define N 258
|
||||
+
|
||||
+float a[N], b[N], c[N];
|
||||
+
|
||||
+void
|
||||
+test ()
|
||||
+{
|
||||
+ for (int i = 0; i < 256; i++)
|
||||
+ {
|
||||
+ a[i] = c[0] * b[i] + c[1];
|
||||
+ }
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
new file mode 100644
|
||||
index 000000000..1a7b0e86f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-xvstelm.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O3 -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-times "xvstelm.w" 8} } */
|
||||
+
|
||||
+#define LEN 256
|
||||
+
|
||||
+float a[LEN], b[LEN], c[LEN];
|
||||
+
|
||||
+void
|
||||
+test ()
|
||||
+{
|
||||
+ for (int i = 0; i < LEN; i += 2)
|
||||
+ a[i] = b[i] + c[i];
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
4354
LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
Normal file
4354
LoongArch-Add-tests-for-Loongson-SX-builtin-function.patch
Normal file
File diff suppressed because it is too large
Load Diff
7181
LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
Normal file
7181
LoongArch-Add-tests-for-SX-vector-addition-instructi.patch
Normal file
File diff suppressed because it is too large
Load Diff
715
LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
Normal file
715
LoongArch-Add-tests-for-SX-vector-addition-vsadd-ins.patch
Normal file
@ -0,0 +1,715 @@
|
||||
From 243656b5b87a3125c2a885d11f022a79cca98b39 Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 10:07:24 +0800
|
||||
Subject: [PATCH 082/124] LoongArch: Add tests for SX vector addition vsadd
|
||||
instructions.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c: New test.
|
||||
* gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/lsx/lsx-vsadd-1.c | 335 +++++++++++++++++
|
||||
.../loongarch/vector/lsx/lsx-vsadd-2.c | 345 ++++++++++++++++++
|
||||
2 files changed, 680 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
new file mode 100644
|
||||
index 000000000..1bc27c983
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-1.c
|
||||
@@ -0,0 +1,335 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
|
||||
+#include "../simd_correctness_check.h"
|
||||
+#include <lsxintrin.h>
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
|
||||
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
|
||||
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
|
||||
+
|
||||
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
|
||||
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
|
||||
+ long int long_int_out, long_int_result;
|
||||
+ unsigned int unsigned_int_out, unsigned_int_result;
|
||||
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefefefefefefefe;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffff3c992b2e;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffff730f;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff3c992b2e;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffff730f;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007fff00007fff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000002bfd9461;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00007fff00007fff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000002bfd9461;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00d3012acc56f9bb;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001021;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00d3012acc56f9bb;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001021;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000001000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000001000;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x80808080806b000b;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080806b000b;
|
||||
+ __m128i_out = __lsx_vsadd_b (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffff01ff01;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x3c600000ff800000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffe;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x3c5fffffff7fffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffefffeff00feff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00ff00ff00ff00ff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00ff00ff00ff00ff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x3ff0000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x40f3fa0000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x3ff0000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x40f3fa0000000000;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000008a0000008a;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000008900000009;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x63637687636316bb;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x6363636363636363;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x6363771163631745;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x636363ec6363636c;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000004;
|
||||
+ __m128i_out = __lsx_vsadd_h (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000080000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000080000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffefefe6a;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000c2bac2c2;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000001fffffffe;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000000fefefe68;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000000c2bac2c2;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x027c027c000027c0;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x001ffff0003ffff0;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000fffefffefffef;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x001ffff0003ffff0;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x028c026bfff027af;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0007000000040000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0003000000010000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0007000000040000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0003000000010000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x3f8000003f800000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x3f8000003f800000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x3fffff0000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x3fffff0000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x7f7fff003f800000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x7f7fff003f800000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000820202020;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00fe01fc0005fff4;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000003a24;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x003dbe88077c78c1;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000820205a44;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x013bc084078278b5;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000140001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000140001;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x67eb85afb2ebb000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xc8847ef6ed3f2000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000100000001;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x67eb85b0b2ebb001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xc8847ef6ed3f2000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffff00000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffff000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000014eb54ab;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x14eb6a002a406a00;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffff14eb54ab;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x14ea6a002a406a00;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xce9035c49ffff570;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000004;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xce9035c49ffff574;
|
||||
+ __m128i_out = __lsx_vsadd_w (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000010;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000010;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x000000000000000d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000400;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000000000040d;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000001300000013;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000001300000013;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000100000100;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000001000000ff;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000300000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000100010001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffffffffffffffa;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffffffffffa;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00000002fffffffb;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000000010000fffb;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vadd_d (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
new file mode 100644
|
||||
index 000000000..67d189991
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/lsx/lsx-vsadd-2.c
|
||||
@@ -0,0 +1,345 @@
|
||||
+/* { dg-do run } */
|
||||
+/* { dg-options "-mlsx -w -fno-strict-aliasing" } */
|
||||
+#include "../simd_correctness_check.h"
|
||||
+#include <lsxintrin.h>
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ __m128i __m128i_op0, __m128i_op1, __m128i_op2, __m128i_out, __m128i_result;
|
||||
+ __m128 __m128_op0, __m128_op1, __m128_op2, __m128_out, __m128_result;
|
||||
+ __m128d __m128d_op0, __m128d_op1, __m128d_op2, __m128d_out, __m128d_result;
|
||||
+
|
||||
+ int int_op0, int_op1, int_op2, int_out, int_result, i = 1, fail;
|
||||
+ long int long_op0, long_op1, long_op2, lont_out, lont_result;
|
||||
+ long int long_int_out, long_int_result;
|
||||
+ unsigned int unsigned_int_out, unsigned_int_result;
|
||||
+ unsigned long int unsigned_long_int_out, unsigned_long_int_result;
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x10f917d72d3d01e4;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x203e16d116de012b;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x10f917d72d3d01e4;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x203e16d116de012b;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffebd06fffe820c;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7ffe7fff3506;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfffebd06fffe820c;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7ffe7fff3506;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffff0cffffff18;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffeff6a0c;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x4f804f804f804f80;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x4f804f804f804f80;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffff60ca7104649;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xfffff790a15db63d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfffff60ca710464a;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffff790a15db63e;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xfffffffffffffffe;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0xffffffffffffff46;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00fe000100cf005f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fff7fff7fff7fff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x5f675e96e29a5a60;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fff7fff7fff7fff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x5fff5e97e2ff5abf;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfefffefffefffeff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000001000100010;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0001000100010058;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0001001100110068;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x7fffffff7fffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x7fffffff7fffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x7fff010181010102;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x7fffffff81010102;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfeffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfeffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffb81a6f70;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000d48eaa1a2;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffb81ae0bf;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00012c9748eaffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0177fff0fffffff0;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000011ff8bc;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffffffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_bu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000200;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000200;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000001;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000d0000000d;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x8006000000040000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x8002000000000007;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x8006000000040000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x8002000d00000014;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000014;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000014;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000000000000000;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ int_out = __lsx_vpickve2gr_h (__m128i_op0, 0x1);
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000600007fff;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000008ffffa209;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000600007fff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x00000008ffffa209;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x636363633f3e47c1;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x41f8e080f1ef4eaa;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00000807bf0a1f80;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000800ecedee68;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x63636b6afe486741;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x41f8e880ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000ebd20000714f;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00012c8a0000a58a;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000ffff0000e29e;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x000259140000ffff;
|
||||
+ __m128i_out = __lsx_vsadd_hu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfffffffeffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfffffffeffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0c03e17edd781b11;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x342caf9be55700b5;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00040003ff83ff84;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00040003ff4dffca;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0c07e181ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x3430af9effffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00000000ffa8ff9f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000ffffffabff99;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x000100000002007d;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0001000000020001;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00010000ffab001c;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0001ffffffadff9a;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0800080008000800;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0800080008000800;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000001;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x76f424887fffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xc110000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xc00d060000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xc110000000000001;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xffffffff7fffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x000000000000002f;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000029;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xfbfbfb17fbfb38ea;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xfbfb47fbfbfb0404;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xfbfbfb17fbfb3919;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xfbfb47fbfbfb042d;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x8080808080808081;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000000ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xffffffffffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x80808080ffffffff;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00123fff00120012;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0012001200120012;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x000000000005003a;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00123fff00120012;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x001200120017004c;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0xbfd10d0d7b6b6b73;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xc5c534920000c4ed;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0xbfd10d0d7b6b6b73;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xc5c534920000c4ed;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x000aa822a79308f6;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000000084d12ce;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x000aa822a79308f6;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x03aa558e1d37b5a1;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x00155044ffffffff;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x03aa558e2584c86f;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x021b7d24c9678a35;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x030298a6a1030a49;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x021b7d24c9678a35;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x030298a6a1030a49;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x00007a8000000480;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x00000485000004cc;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x00007a8000000480;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0x00000485000004cc;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x0000f50000000900;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0x0000090a00000998;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ *((unsigned long *)&__m128i_op0[1]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op0[0]) = 0x0000000000000000;
|
||||
+ *((unsigned long *)&__m128i_op1[1]) = 0x004eff6200d2ff76;
|
||||
+ *((unsigned long *)&__m128i_op1[0]) = 0xff70002800be00a0;
|
||||
+ *((unsigned long *)&__m128i_result[1]) = 0x004eff6200d2ff76;
|
||||
+ *((unsigned long *)&__m128i_result[0]) = 0xff70002800be00a0;
|
||||
+ __m128i_out = __lsx_vsadd_wu (__m128i_op0, __m128i_op1);
|
||||
+ ASSERTEQ_64 (__LINE__, __m128i_result, __m128i_out);
|
||||
+
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2928
LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
Normal file
2928
LoongArch-Add-tests-for-SX-vector-floating-point-ari.patch
Normal file
File diff suppressed because it is too large
Load Diff
4316
LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
Normal file
4316
LoongArch-Add-tests-for-SX-vector-floating-point-ins.patch
Normal file
File diff suppressed because it is too large
Load Diff
5411
LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
Normal file
5411
LoongArch-Add-tests-for-SX-vector-handling-and-shuff.patch
Normal file
File diff suppressed because it is too large
Load Diff
4150
LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
Normal file
4150
LoongArch-Add-tests-for-SX-vector-subtraction-instru.patch
Normal file
File diff suppressed because it is too large
Load Diff
1710
LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
Normal file
1710
LoongArch-Add-tests-for-SX-vector-vabsd-vmskgez-vmsk.patch
Normal file
File diff suppressed because it is too large
Load Diff
1209
LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
Normal file
1209
LoongArch-Add-tests-for-SX-vector-vand-vandi-vandn-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
1375
LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
Normal file
1375
LoongArch-Add-tests-for-SX-vector-vavg-vavgr-instruc.patch
Normal file
File diff suppressed because it is too large
Load Diff
3324
LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
Normal file
3324
LoongArch-Add-tests-for-SX-vector-vbitclr-vbitclri-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
1114
LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
Normal file
1114
LoongArch-Add-tests-for-SX-vector-vdiv-vmod-instruct.patch
Normal file
File diff suppressed because it is too large
Load Diff
1664
LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
Normal file
1664
LoongArch-Add-tests-for-SX-vector-vexth-vextl-vldi-v.patch
Normal file
File diff suppressed because it is too large
Load Diff
5295
LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
Normal file
5295
LoongArch-Add-tests-for-SX-vector-vfcmp-instructions.patch
Normal file
File diff suppressed because it is too large
Load Diff
1412
LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
Normal file
1412
LoongArch-Add-tests-for-SX-vector-vfmadd-vfnmadd-vld.patch
Normal file
File diff suppressed because it is too large
Load Diff
3926
LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
Normal file
3926
LoongArch-Add-tests-for-SX-vector-vfrstp-vfrstpi-vse.patch
Normal file
File diff suppressed because it is too large
Load Diff
2578
LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
Normal file
2578
LoongArch-Add-tests-for-SX-vector-vmax-vmaxi-vmin-vm.patch
Normal file
File diff suppressed because it is too large
Load Diff
3173
LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
Normal file
3173
LoongArch-Add-tests-for-SX-vector-vrotr-vrotri-vsra-.patch
Normal file
File diff suppressed because it is too large
Load Diff
4023
LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
Normal file
4023
LoongArch-Add-tests-for-SX-vector-vsll-vslli-vsrl-vs.patch
Normal file
File diff suppressed because it is too large
Load Diff
4954
LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
Normal file
4954
LoongArch-Add-tests-for-SX-vector-vssran-vssrani-vss.patch
Normal file
File diff suppressed because it is too large
Load Diff
2990
LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
Normal file
2990
LoongArch-Add-tests-for-the-SX-vector-multiplication.patch
Normal file
File diff suppressed because it is too large
Load Diff
37
LoongArch-Add-tests-of-mstrict-align-option.patch
Normal file
37
LoongArch-Add-tests-of-mstrict-align-option.patch
Normal file
@ -0,0 +1,37 @@
|
||||
From f07b91862055533d779fbf76c12cb7c0ae75b53d Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 09:35:24 +0800
|
||||
Subject: [PATCH 076/124] LoongArch: Add tests of -mstrict-align option.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/strict-align.c: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
gcc/testsuite/gcc.target/loongarch/strict-align.c | 12 ++++++++++++
|
||||
1 file changed, 12 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/strict-align.c b/gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
new file mode 100644
|
||||
index 000000000..040d84958
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/strict-align.c
|
||||
@@ -0,0 +1,12 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-Ofast -mstrict-align -mlasx" } */
|
||||
+/* { dg-final { scan-assembler-not "vfadd.s" } } */
|
||||
+
|
||||
+void
|
||||
+foo (float *restrict x, float *restrict y)
|
||||
+{
|
||||
+ x[0] = x[0] + y[0];
|
||||
+ x[1] = x[1] + y[1];
|
||||
+ x[2] = x[2] + y[2];
|
||||
+ x[3] = x[3] + y[3];
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
131
LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
Normal file
131
LoongArch-Add-testsuite-framework-for-Loongson-SX-AS.patch
Normal file
@ -0,0 +1,131 @@
|
||||
From aebd03c944312be767f03d129eeebc0c4cdf5b4a Mon Sep 17 00:00:00 2001
|
||||
From: Xiaolong Chen <chenxiaolong@loongson.cn>
|
||||
Date: Mon, 11 Sep 2023 09:36:35 +0800
|
||||
Subject: [PATCH 077/124] LoongArch: Add testsuite framework for Loongson
|
||||
SX/ASX.
|
||||
|
||||
gcc/testsuite/ChangeLog:
|
||||
|
||||
* gcc.target/loongarch/vector/loongarch-vector.exp: New test.
|
||||
* gcc.target/loongarch/vector/simd_correctness_check.h: New test.
|
||||
|
||||
Signed-off-by: Peng Fan <fanpeng@loongson.cn>
|
||||
Signed-off-by: ticat_fp <fanpeng@loongson.cn>
|
||||
---
|
||||
.../loongarch/vector/loongarch-vector.exp | 42 +++++++++++++++
|
||||
.../loongarch/vector/simd_correctness_check.h | 54 +++++++++++++++++++
|
||||
2 files changed, 96 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
new file mode 100644
|
||||
index 000000000..2c37aa91d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/loongarch-vector.exp
|
||||
@@ -0,0 +1,42 @@
|
||||
+#Copyright(C) 2023 Free Software Foundation, Inc.
|
||||
+
|
||||
+#This program is free software; you can redistribute it and / or modify
|
||||
+#it under the terms of the GNU General Public License as published by
|
||||
+#the Free Software Foundation; either version 3 of the License, or
|
||||
+#(at your option) any later version.
|
||||
+#
|
||||
+#This program is distributed in the hope that it will be useful,
|
||||
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the
|
||||
+#GNU General Public License for more details.
|
||||
+#
|
||||
+#You should have received a copy of the GNU General Public License
|
||||
+#along with GCC; see the file COPYING3.If not see
|
||||
+# <http: //www.gnu.org/licenses/>.
|
||||
+
|
||||
+#GCC testsuite that uses the `dg.exp' driver.
|
||||
+
|
||||
+#Exit immediately if this isn't a LoongArch target.
|
||||
+if ![istarget loongarch*-*-*] then {
|
||||
+ return
|
||||
+}
|
||||
+
|
||||
+#Load support procs.
|
||||
+load_lib gcc-dg.exp
|
||||
+
|
||||
+#If a testcase doesn't have special options, use these.
|
||||
+global DEFAULT_CFLAGS
|
||||
+if ![info exists DEFAULT_CFLAGS] then {
|
||||
+ set DEFAULT_CFLAGS " "
|
||||
+}
|
||||
+
|
||||
+#Initialize `dg'.
|
||||
+dg-init
|
||||
+
|
||||
+#Main loop.
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lsx/*.\[cS\]]] \
|
||||
+ " -mlsx" $DEFAULT_CFLAGS
|
||||
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/lasx/*.\[cS\]]] \
|
||||
+ " -mlasx" $DEFAULT_CFLAGS
|
||||
+# All done.
|
||||
+dg-finish
|
||||
diff --git a/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
new file mode 100644
|
||||
index 000000000..eb7fbd59c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/loongarch/vector/simd_correctness_check.h
|
||||
@@ -0,0 +1,54 @@
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+#include <string.h>
|
||||
+
|
||||
+#define ASSERTEQ_64(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ int fail = 0; \
|
||||
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||
+ { \
|
||||
+ long *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||
+ if (abs (*temp_ref - *temp_res) > 0) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected " #ref \
|
||||
+ "[%ld]:0x%lx, got: 0x%lx\n", \
|
||||
+ __FILE__, line, i, *temp_ref, *temp_res); \
|
||||
+ fail = 1; \
|
||||
+ } \
|
||||
+ } \
|
||||
+ if (fail == 1) \
|
||||
+ abort (); \
|
||||
+ } \
|
||||
+ while (0)
|
||||
+
|
||||
+#define ASSERTEQ_32(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ int fail = 0; \
|
||||
+ for (size_t i = 0; i < sizeof (res) / sizeof (res[0]); ++i) \
|
||||
+ { \
|
||||
+ int *temp_ref = &ref[i], *temp_res = &res[i]; \
|
||||
+ if (abs (*temp_ref - *temp_res) > 0) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected " #ref \
|
||||
+ "[%ld]:0x%x, got: 0x%x\n", \
|
||||
+ __FILE__, line, i, *temp_ref, *temp_res); \
|
||||
+ fail = 1; \
|
||||
+ } \
|
||||
+ } \
|
||||
+ if (fail == 1) \
|
||||
+ abort (); \
|
||||
+ } \
|
||||
+ while (0)
|
||||
+
|
||||
+#define ASSERTEQ_int(line, ref, res) \
|
||||
+ do \
|
||||
+ { \
|
||||
+ if (ref != res) \
|
||||
+ { \
|
||||
+ printf (" error: %s at line %ld , expected %d, got %d\n", __FILE__, \
|
||||
+ line, ref, res); \
|
||||
+ } \
|
||||
+ } \
|
||||
+ while (0)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user