672 lines
20 KiB
Diff
672 lines
20 KiB
Diff
|
|
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
|
||
|
|
index fcfa54697..f42aeb8e8 100644
|
||
|
|
--- a/gcc/Makefile.in
|
||
|
|
+++ b/gcc/Makefile.in
|
||
|
|
@@ -1449,6 +1449,7 @@ OBJS = \
|
||
|
|
inchash.o \
|
||
|
|
incpath.o \
|
||
|
|
init-regs.o \
|
||
|
|
+ ipa-hardware-detection.o \
|
||
|
|
internal-fn.o \
|
||
|
|
ipa-struct-reorg/ipa-struct-reorg.o \
|
||
|
|
ipa-cp.o \
|
||
|
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
||
|
|
index fd98382fa..99e626641 100644
|
||
|
|
--- a/gcc/common.opt
|
||
|
|
+++ b/gcc/common.opt
|
||
|
|
@@ -185,6 +185,9 @@ const char *main_input_basename
|
||
|
|
Variable
|
||
|
|
int main_input_baselength
|
||
|
|
|
||
|
|
+Variable
|
||
|
|
+bool optimize_maximum
|
||
|
|
+
|
||
|
|
; The base name used for auxiliary output files.
|
||
|
|
; dump_base_name minus dump_base_ext.
|
||
|
|
|
||
|
|
@@ -469,6 +472,10 @@ Ofast
|
||
|
|
Common Optimization
|
||
|
|
Optimize for speed disregarding exact standards compliance.
|
||
|
|
|
||
|
|
+Om
|
||
|
|
+Common Optimization
|
||
|
|
+Optimize for maximizing radical optimization.
|
||
|
|
+
|
||
|
|
Og
|
||
|
|
Common Optimization
|
||
|
|
Optimize for debugging experience rather than speed or size.
|
||
|
|
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
|
||
|
|
index 309ecc3d9..ad853af9a 100644
|
||
|
|
--- a/gcc/config/aarch64/aarch64.cc
|
||
|
|
+++ b/gcc/config/aarch64/aarch64.cc
|
||
|
|
@@ -18637,6 +18637,134 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
|
||
|
|
return stmt_cost;
|
||
|
|
}
|
||
|
|
|
||
|
|
+/* Check whether in C language or LTO with only C language. */
|
||
|
|
+extern bool lang_c_p (void);
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+override_C_optimize_options (struct gcc_options *opts)
|
||
|
|
+{
|
||
|
|
+ opts->x_flag_ipa_reorder_fields = 1;
|
||
|
|
+ opts->x_flag_ipa_struct_reorg = 6;
|
||
|
|
+ opts->x_struct_layout_optimize_level = 6;
|
||
|
|
+ opts->x_flag_gnu89_inline = 1;
|
||
|
|
+ opts->x_flag_ccmp2 = 1;
|
||
|
|
+ opts->x_flag_array_widen_compare = 1;
|
||
|
|
+ opts->x_flag_convert_minmax = 1;
|
||
|
|
+ opts->x_flag_tree_slp_transpose_vectorize = 1;
|
||
|
|
+ opts->x_param_max_inline_insns_auto = 64;
|
||
|
|
+ opts->x_param_inline_unit_growth = 96;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Check whether in CPP language or LTO with only CPP language. */
|
||
|
|
+static bool
|
||
|
|
+lang_cpp_p (void)
|
||
|
|
+{
|
||
|
|
+ const char *language_string = lang_hooks.name;
|
||
|
|
+ if (!language_string)
|
||
|
|
+ {
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ if (lang_GNU_CXX ())
|
||
|
|
+ {
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
|
||
|
|
+ {
|
||
|
|
+ unsigned i = 0;
|
||
|
|
+ tree t = NULL_TREE;
|
||
|
|
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
|
||
|
|
+ {
|
||
|
|
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
|
||
|
|
+ if (language_string == NULL
|
||
|
|
+ || strncmp (lang_hooks.name, "GNU C++", 7))
|
||
|
|
+ {
|
||
|
|
+ return false;
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return true;
|
||
|
|
+ }
|
||
|
|
+ return false;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+override_CPP_optimize_options (struct gcc_options *opts)
|
||
|
|
+{
|
||
|
|
+ opts->x_flag_finite_loops = 1;
|
||
|
|
+ opts->x_flag_omit_frame_pointer = 1;
|
||
|
|
+ opts->x_flag_sized_deallocation = 0;
|
||
|
|
+ opts->x_flag_loop_elim = 1;
|
||
|
|
+ opts->x_flag_convert_minmax = 1;
|
||
|
|
+ opts->x_param_early_inlining_insns = 256;
|
||
|
|
+ opts->x_param_max_inline_insns_auto = 128;
|
||
|
|
+ opts->x_param_inline_unit_growth = 256;
|
||
|
|
+ opts->x_flag_cmlt_arith = 1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+override_optimize_options_1 (struct gcc_options *opts)
|
||
|
|
+{
|
||
|
|
+ opts->x_flag_split_ldp_stp = 1;
|
||
|
|
+ opts->x_flag_if_conversion_gimple = 1;
|
||
|
|
+ opts->x_flag_ifcvt_allow_complicated_cmps = 1;
|
||
|
|
+ opts->x_param_ifcvt_allow_register_renaming = 2;
|
||
|
|
+ opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48;
|
||
|
|
+ opts->x_param_max_rtl_if_conversion_predictable_cost = 48;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+override_Fortran_optimize_options (struct gcc_options *opts)
|
||
|
|
+{
|
||
|
|
+ opts->x_flag_unroll_loops = 1;
|
||
|
|
+ opts->x_flag_unconstrained_commons = 1;
|
||
|
|
+ opts->x_param_ipa_cp_eval_threshold = 1;
|
||
|
|
+ opts->x_param_ipa_cp_unit_growth = 80;
|
||
|
|
+ opts->x_param_ipa_cp_max_recursive_depth = 8;
|
||
|
|
+ opts->x_param_large_unit_insns = 30000;
|
||
|
|
+ opts->x_flag_ira_loop_pressure = 1;
|
||
|
|
+ opts->x_flag_inline_functions_called_once = 0;
|
||
|
|
+ opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY;
|
||
|
|
+ opts->x_flag_delayed_branch = 1;
|
||
|
|
+ opts->x_flag_gcse_las = 1;
|
||
|
|
+ opts->x_flag_gcse_sm = 1;
|
||
|
|
+ opts->x_flag_ipa_pta = 1;
|
||
|
|
+ opts->x_flag_reorder_blocks_and_partition = 1;
|
||
|
|
+ opts->x_flag_reorder_blocks = 1;
|
||
|
|
+ opts->x_flag_crypto_accel_aes = 1;
|
||
|
|
+ opts->x_param_flexible_seg_len = 1;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+/* Reset the optimize option.
|
||
|
|
+ After checking the model result, this function can
|
||
|
|
+ reset the more appropriate options. */
|
||
|
|
+static void
|
||
|
|
+reset_machine_option (struct gcc_options *opts)
|
||
|
|
+{
|
||
|
|
+ if (!(opts->x_optimize_maximum)
|
||
|
|
+ || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
|
||
|
|
+ {
|
||
|
|
+ return;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||
|
|
+ if (ai_infer_level)
|
||
|
|
+ {
|
||
|
|
+ override_optimize_options_1 (opts);
|
||
|
|
+ if (lang_c_p ())
|
||
|
|
+ {
|
||
|
|
+ override_C_optimize_options (opts);
|
||
|
|
+ }
|
||
|
|
+ else if (lang_cpp_p ())
|
||
|
|
+ {
|
||
|
|
+ override_CPP_optimize_options (opts);
|
||
|
|
+ }
|
||
|
|
+ else if (lang_GNU_Fortran ())
|
||
|
|
+ {
|
||
|
|
+ override_Fortran_optimize_options (opts);
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+
|
||
|
|
/* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND
|
||
|
|
and which when vectorized would operate on vector type VECTYPE. Add the
|
||
|
|
cost of any embedded operations. */
|
||
|
|
@@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
|
||
|
|
&& opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
|
||
|
|
opts->x_flag_prefetch_loop_arrays = 1;
|
||
|
|
|
||
|
|
+ reset_machine_option (opts);
|
||
|
|
aarch64_override_options_after_change_1 (opts);
|
||
|
|
}
|
||
|
|
|
||
|
|
diff --git a/gcc/ipa-hardware-detection.cc b/gcc/ipa-hardware-detection.cc
|
||
|
|
new file mode 100644
|
||
|
|
index 000000000..8085a8c65
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/gcc/ipa-hardware-detection.cc
|
||
|
|
@@ -0,0 +1,243 @@
|
||
|
|
+/* Hardware Detection.
|
||
|
|
+ Copyright (C) 2024-2024 Free Software Foundation, Inc.
|
||
|
|
+This file is part of GCC.
|
||
|
|
+GCC is free software; you can redistribute it and/or modify it
|
||
|
|
+under the terms of the GNU General Public License as published by the
|
||
|
|
+Free Software Foundation; either version 3, or (at your option) any
|
||
|
|
+later version.
|
||
|
|
+GCC is distributed in the hope that it will be useful, but WITHOUT
|
||
|
|
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||
|
|
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||
|
|
+for more details.
|
||
|
|
+You should have received a copy of the GNU General Public License
|
||
|
|
+along with GCC; see the file COPYING3. If not see
|
||
|
|
+<http://www.gnu.org/licenses/>. */
|
||
|
|
+
|
||
|
|
+#include "config.h"
|
||
|
|
+#include "system.h"
|
||
|
|
+#include "coretypes.h"
|
||
|
|
+#include "backend.h"
|
||
|
|
+#include "target.h"
|
||
|
|
+#include "tree.h"
|
||
|
|
+#include "gimple.h"
|
||
|
|
+#include "tree-pass.h"
|
||
|
|
+#include "gimple-ssa.h"
|
||
|
|
+#include "tree-pretty-print.h"
|
||
|
|
+#include "fold-const.h"
|
||
|
|
+#include "gimplify.h"
|
||
|
|
+#include "gimple-iterator.h"
|
||
|
|
+#include "tree-ssa-loop-manip.h"
|
||
|
|
+#include "tree-ssa-loop.h"
|
||
|
|
+#include "ssa.h"
|
||
|
|
+#include "tree-into-ssa.h"
|
||
|
|
+#include "cfganal.h"
|
||
|
|
+#include "cfgloop.h"
|
||
|
|
+#include "gimple-pretty-print.h"
|
||
|
|
+#include "tree-cfg.h"
|
||
|
|
+#include "cgraph.h"
|
||
|
|
+#include "print-tree.h"
|
||
|
|
+#include "cfghooks.h"
|
||
|
|
+#include "gimple-fold.h"
|
||
|
|
+#include "gimplify-me.h"
|
||
|
|
+
|
||
|
|
+namespace {
|
||
|
|
+
|
||
|
|
+/* Build a binary operation and gimplify it. Emit code before GSI.
|
||
|
|
+ Return the gimple_val holding the result. */
|
||
|
|
+
|
||
|
|
+static tree
|
||
|
|
+gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
|
||
|
|
+ tree type, tree a, tree b)
|
||
|
|
+{
|
||
|
|
+ tree ret;
|
||
|
|
+
|
||
|
|
+ ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b);
|
||
|
|
+ return force_gimple_operand_gsi (gsi, ret, true, NULL, true,
|
||
|
|
+ GSI_SAME_STMT);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static basic_block
|
||
|
|
+create_abort_bb (basic_block last_bb)
|
||
|
|
+{
|
||
|
|
+ basic_block bb = create_empty_bb (last_bb);
|
||
|
|
+ if (last_bb->loop_father != NULL)
|
||
|
|
+ {
|
||
|
|
+ add_bb_to_loop (bb, last_bb->loop_father);
|
||
|
|
+ loops_state_set (LOOPS_NEED_FIXUP);
|
||
|
|
+ }
|
||
|
|
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||
|
|
+ tree fn = builtin_decl_implicit (BUILT_IN_ABORT);
|
||
|
|
+ gimple *g = gimple_build_call (fn, 0);
|
||
|
|
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
|
||
|
|
+ return bb;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static basic_block
|
||
|
|
+create_part_bb (basic_block last_bb, tree part_base)
|
||
|
|
+{
|
||
|
|
+ basic_block bb = create_empty_bb (last_bb);
|
||
|
|
+ if (last_bb->loop_father != NULL)
|
||
|
|
+ {
|
||
|
|
+ add_bb_to_loop (bb, last_bb->loop_father);
|
||
|
|
+ loops_state_set (LOOPS_NEED_FIXUP);
|
||
|
|
+ }
|
||
|
|
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
||
|
|
+ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||
|
|
+ /* This number is used to efficiently identify the supported part range. */
|
||
|
|
+ tree part_cond = gimplify_build2 (
|
||
|
|
+ &gsi, PLUS_EXPR, unsigned_type_node, part_base,
|
||
|
|
+ build_int_cst (unsigned_type_node, 4294963967));
|
||
|
|
+ gcond *cond = gimple_build_cond (LE_EXPR, part_cond,
|
||
|
|
+ build_int_cst (unsigned_type_node, 2),
|
||
|
|
+ NULL_TREE, NULL_TREE);
|
||
|
|
+ gimple_set_location (cond, input_location);
|
||
|
|
+ gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
|
||
|
|
+ gsi_remove (&gsi, true);
|
||
|
|
+ return bb;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static void
|
||
|
|
+create_detection_bb ()
|
||
|
|
+{
|
||
|
|
+ edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||
|
|
+ basic_block ret_bb = old_e->dest;
|
||
|
|
+
|
||
|
|
+ basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
||
|
|
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL)
|
||
|
|
+ {
|
||
|
|
+ add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
|
||
|
|
+ loops_state_set (LOOPS_NEED_FIXUP);
|
||
|
|
+ }
|
||
|
|
+ tree cpuid_decl = build_decl (input_location, VAR_DECL,
|
||
|
|
+ get_identifier ("cpuid"), unsigned_type_node);
|
||
|
|
+ add_local_decl (cfun, cpuid_decl);
|
||
|
|
+
|
||
|
|
+ gimple_stmt_iterator gsi = gsi_last_bb (detection_bb);
|
||
|
|
+ vec<tree, va_gc> *outputs = NULL;
|
||
|
|
+ tree purpose = build_string (strlen ("=r"), "=r");
|
||
|
|
+ tree output = build_tree_list (
|
||
|
|
+ build_tree_list (NULL_TREE, purpose), cpuid_decl);
|
||
|
|
+ vec_safe_push (outputs, output);
|
||
|
|
+ gasm *asm_stmt = gimple_build_asm_vec (
|
||
|
|
+ "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL);
|
||
|
|
+ gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT);
|
||
|
|
+ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
|
||
|
|
+
|
||
|
|
+ tree implementer = gimplify_build2 (
|
||
|
|
+ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||
|
|
+ build_int_cst (unsigned_type_node, 24));
|
||
|
|
+ tree part_base = gimplify_build2 (
|
||
|
|
+ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
|
||
|
|
+ build_int_cst (unsigned_type_node, 4));
|
||
|
|
+ tree part = gimplify_build2 (
|
||
|
|
+ &gsi, BIT_AND_EXPR, unsigned_type_node, part_base,
|
||
|
|
+ build_int_cst (unsigned_type_node, 4095));
|
||
|
|
+ gcond *implementer_cond = gimple_build_cond (
|
||
|
|
+ EQ_EXPR, implementer,
|
||
|
|
+ build_int_cst (unsigned_type_node, 72),
|
||
|
|
+ NULL_TREE, NULL_TREE);
|
||
|
|
+ gimple_set_location (implementer_cond, input_location);
|
||
|
|
+ gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT);
|
||
|
|
+ gsi_remove (&gsi, true);
|
||
|
|
+
|
||
|
|
+ basic_block part_bb = create_part_bb (detection_bb, part);
|
||
|
|
+ basic_block abort_bb = create_abort_bb (part_bb);
|
||
|
|
+
|
||
|
|
+ remove_edge_raw (old_e);
|
||
|
|
+ make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun),
|
||
|
|
+ detection_bb, EDGE_FALLTHRU);
|
||
|
|
+ edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE);
|
||
|
|
+ etrue->probability = profile_probability::likely ();
|
||
|
|
+ edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE);
|
||
|
|
+ efalse->probability = profile_probability::unlikely ();
|
||
|
|
+ edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE);
|
||
|
|
+ part_true->probability = profile_probability::likely ();
|
||
|
|
+ edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE);
|
||
|
|
+ part_false->probability = profile_probability::unlikely ();
|
||
|
|
+ make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU);
|
||
|
|
+ if (dom_info_available_p (CDI_DOMINATORS))
|
||
|
|
+ {
|
||
|
|
+ set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb);
|
||
|
|
+ set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb);
|
||
|
|
+ set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb);
|
||
|
|
+ }
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+const pass_data pass_data_ipa_hardware_detection =
|
||
|
|
+{
|
||
|
|
+ SIMPLE_IPA_PASS,
|
||
|
|
+ "hardware_detection",
|
||
|
|
+ OPTGROUP_NONE,
|
||
|
|
+ TV_IPA_HARDWARE_DETECTION,
|
||
|
|
+ (PROP_cfg | PROP_ssa),
|
||
|
|
+ 0,
|
||
|
|
+ 0,
|
||
|
|
+ 0,
|
||
|
|
+ (TODO_update_ssa | TODO_verify_all)
|
||
|
|
+};
|
||
|
|
+
|
||
|
|
+class pass_ipa_hardware_detection : public simple_ipa_opt_pass
|
||
|
|
+{
|
||
|
|
+public:
|
||
|
|
+ pass_ipa_hardware_detection (gcc::context *ctxt)
|
||
|
|
+ : simple_ipa_opt_pass (pass_data_ipa_hardware_detection, ctxt)
|
||
|
|
+ {}
|
||
|
|
+
|
||
|
|
+ virtual bool gate (function *);
|
||
|
|
+ virtual unsigned int execute (function *);
|
||
|
|
+}; // class pass_ipa_hardware_detection
|
||
|
|
+
|
||
|
|
+bool
|
||
|
|
+pass_ipa_hardware_detection::gate (function *)
|
||
|
|
+{
|
||
|
|
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||
|
|
+ return (ai_infer_level
|
||
|
|
+ && optimize_maximum > 0
|
||
|
|
+ /* Only enable in lto or whole_program. */
|
||
|
|
+ && (in_lto_p || flag_whole_program));
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+unsigned int
|
||
|
|
+pass_ipa_hardware_detection::execute (function *)
|
||
|
|
+{
|
||
|
|
+ unsigned int ret = 0;
|
||
|
|
+ cgraph_node *cnode;
|
||
|
|
+ FOR_EACH_FUNCTION (cnode)
|
||
|
|
+ {
|
||
|
|
+ if (!cnode->real_symbol_p ())
|
||
|
|
+ {
|
||
|
|
+ continue;
|
||
|
|
+ }
|
||
|
|
+ if (cnode->definition)
|
||
|
|
+ {
|
||
|
|
+ if (!cnode->has_gimple_body_p () || cnode->inlined_to)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ cnode->get_body ();
|
||
|
|
+ function *fn = DECL_STRUCT_FUNCTION (cnode->decl);
|
||
|
|
+ if (!fn)
|
||
|
|
+ continue;
|
||
|
|
+
|
||
|
|
+ if (DECL_NAME (cnode->decl)
|
||
|
|
+ && MAIN_NAME_P (DECL_NAME (cnode->decl)))
|
||
|
|
+ {
|
||
|
|
+ push_cfun (fn);
|
||
|
|
+ calculate_dominance_info (CDI_DOMINATORS);
|
||
|
|
+
|
||
|
|
+ create_detection_bb ();
|
||
|
|
+
|
||
|
|
+ cgraph_edge::rebuild_edges ();
|
||
|
|
+ free_dominance_info (CDI_DOMINATORS);
|
||
|
|
+ pop_cfun ();
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ }
|
||
|
|
+ return ret;
|
||
|
|
+}
|
||
|
|
+} // anon namespace
|
||
|
|
+
|
||
|
|
+simple_ipa_opt_pass *
|
||
|
|
+make_pass_ipa_hardware_detection (gcc::context *ctxt)
|
||
|
|
+{
|
||
|
|
+ return new pass_ipa_hardware_detection (ctxt);
|
||
|
|
+}
|
||
|
|
diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc
|
||
|
|
index 489a6e02a..12c3f7299 100644
|
||
|
|
--- a/gcc/opts-common.cc
|
||
|
|
+++ b/gcc/opts-common.cc
|
||
|
|
@@ -992,6 +992,158 @@ opts_concat (const char *first, ...)
|
||
|
|
return newstr;
|
||
|
|
}
|
||
|
|
|
||
|
|
+typedef int64_t (*run_ai_model_func)(int, const char **,
|
||
|
|
+ const char *, int, int64_t *);
|
||
|
|
+#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; }
|
||
|
|
+#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q)
|
||
|
|
+#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq)
|
||
|
|
+
|
||
|
|
+static int64_t
|
||
|
|
+ai_infer_optimization (int argc, const char **argv,
|
||
|
|
+ const char *mcpu_option,
|
||
|
|
+ int argc_hw, int64_t *argv_hw)
|
||
|
|
+{
|
||
|
|
+ /* Load dependent AI-framework libraries. */
|
||
|
|
+ void *onnxruntime_lib_handle = NULL;
|
||
|
|
+ const char *onnxruntime_lib_path = "libonnxruntime.so";
|
||
|
|
+
|
||
|
|
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path,
|
||
|
|
+ RTLD_LAZY | RTLD_GLOBAL);
|
||
|
|
+ if (!onnxruntime_lib_handle)
|
||
|
|
+ {
|
||
|
|
+ return -1;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ void *ai4c_lib_handle = NULL;
|
||
|
|
+ const char *ai4c_lib_path = "libONNXRunner.so";
|
||
|
|
+
|
||
|
|
+ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL);
|
||
|
|
+ if (!ai4c_lib_handle)
|
||
|
|
+ {
|
||
|
|
+ return -1;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ /* Clear any existing error. */
|
||
|
|
+ dlerror ();
|
||
|
|
+
|
||
|
|
+ /* Run AI4Compiler model. */
|
||
|
|
+ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL)
|
||
|
|
+ {
|
||
|
|
+ return -1;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ run_ai_model_func run_ai_model;
|
||
|
|
+ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union;
|
||
|
|
+ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union)
|
||
|
|
+ = dlsym (ai4c_lib_handle, "runONNXModelOptimizer");
|
||
|
|
+ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union);
|
||
|
|
+ if (!run_ai_model)
|
||
|
|
+ {
|
||
|
|
+ dlclose (ai4c_lib_handle);
|
||
|
|
+ dlclose (onnxruntime_lib_handle);
|
||
|
|
+ return -1;
|
||
|
|
+ }
|
||
|
|
+ int64_t model_pred = (*run_ai_model) (argc, argv,
|
||
|
|
+ mcpu_option, argc_hw, argv_hw);
|
||
|
|
+
|
||
|
|
+ if (ai4c_lib_handle)
|
||
|
|
+ dlclose (ai4c_lib_handle);
|
||
|
|
+
|
||
|
|
+ if (onnxruntime_lib_handle)
|
||
|
|
+ dlclose (onnxruntime_lib_handle);
|
||
|
|
+
|
||
|
|
+ if (model_pred == 1)
|
||
|
|
+ putenv ("AI_INFER_LEVEL=1");
|
||
|
|
+ return model_pred;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static int
|
||
|
|
+handle_lto_option (unsigned int lang_mask,
|
||
|
|
+ unsigned int num_decoded_options,
|
||
|
|
+ unsigned int argc,
|
||
|
|
+ const char **argv,
|
||
|
|
+ struct cl_decoded_option *&opt_array)
|
||
|
|
+{
|
||
|
|
+ int ret = 0;
|
||
|
|
+ char *lan = "";
|
||
|
|
+ char *compiler = xstrdup (argv[0]);
|
||
|
|
+ lan = strrchr (compiler, '/');
|
||
|
|
+ if (lan != NULL)
|
||
|
|
+ lan ++;
|
||
|
|
+ else
|
||
|
|
+ lan = compiler;
|
||
|
|
+ if (strstr (lan, "gcc") != NULL)
|
||
|
|
+ {
|
||
|
|
+ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2);
|
||
|
|
+ const char* lto_flag = "-flto=8";
|
||
|
|
+ decode_cmdline_option (<o_flag, lang_mask,
|
||
|
|
+ &opt_array[num_decoded_options]);
|
||
|
|
+ ret++;
|
||
|
|
+ const char* ltopartition_flag = "-flto-partition=one";
|
||
|
|
+ decode_cmdline_option (<opartition_flag, lang_mask,
|
||
|
|
+ &opt_array[num_decoded_options + 1]);
|
||
|
|
+ ret++;
|
||
|
|
+ }
|
||
|
|
+ else if (strstr (lan, "g++") != NULL
|
||
|
|
+ || strstr (lan, "gfortran") != NULL)
|
||
|
|
+ {
|
||
|
|
+ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1);
|
||
|
|
+ const char* lto_flag = "-flto=8";
|
||
|
|
+ decode_cmdline_option (<o_flag, lang_mask,
|
||
|
|
+ &opt_array[num_decoded_options]);
|
||
|
|
+ ret++;
|
||
|
|
+ }
|
||
|
|
+ if (compiler)
|
||
|
|
+ free (compiler);
|
||
|
|
+ return ret;
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
+static int
|
||
|
|
+handle_machine_option (unsigned int lang_mask,
|
||
|
|
+ unsigned int num_decoded_options,
|
||
|
|
+ unsigned int argc,
|
||
|
|
+ const char **argv,
|
||
|
|
+ struct cl_decoded_option *&opt_array)
|
||
|
|
+{
|
||
|
|
+ int ret = 0;
|
||
|
|
+ bool flag_Om = false;
|
||
|
|
+ bool flag_hip09 = false;
|
||
|
|
+ for (unsigned i = 1; i < argc; i ++)
|
||
|
|
+ {
|
||
|
|
+ if (strcmp (argv[i], "-Om") == 0)
|
||
|
|
+ flag_Om = true;
|
||
|
|
+ if (strstr (argv[i], "mcpu=hip09") != NULL)
|
||
|
|
+ flag_hip09 = true;
|
||
|
|
+ }
|
||
|
|
+ if (!flag_hip09 || !flag_Om)
|
||
|
|
+ {
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
|
||
|
|
+ if (ai_infer_level)
|
||
|
|
+ {
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+ int argc_hw = 6;
|
||
|
|
+ int64_t argv_hw[argc_hw] = {
|
||
|
|
+ global_options.x_param_simultaneous_prefetches,
|
||
|
|
+ global_options.x_param_l1_cache_size,
|
||
|
|
+ global_options.x_param_l1_cache_line_size,
|
||
|
|
+ global_options.x_param_l2_cache_size,
|
||
|
|
+ global_options.x_param_prefetch_latency,
|
||
|
|
+ global_options.x_param_ipa_prefetch_distance_factor};
|
||
|
|
+ int64_t output_pred = ai_infer_optimization (
|
||
|
|
+ argc, argv, "hip09", argc_hw, argv_hw);
|
||
|
|
+ if (output_pred != 1)
|
||
|
|
+ {
|
||
|
|
+ return ret;
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ return handle_lto_option (lang_mask, num_decoded_options,
|
||
|
|
+ argc, argv, opt_array);
|
||
|
|
+}
|
||
|
|
+
|
||
|
|
/* Decode command-line options (ARGC and ARGV being the arguments of
|
||
|
|
main) into an array, setting *DECODED_OPTIONS to a pointer to that
|
||
|
|
array and *DECODED_OPTIONS_COUNT to the number of entries in the
|
||
|
|
@@ -1090,6 +1242,9 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
|
||
|
|
num_decoded_options++;
|
||
|
|
}
|
||
|
|
|
||
|
|
+ num_decoded_options += handle_machine_option (lang_mask, num_decoded_options,
|
||
|
|
+ argc, argv, opt_array);
|
||
|
|
+
|
||
|
|
*decoded_options = opt_array;
|
||
|
|
*decoded_options_count = num_decoded_options;
|
||
|
|
prune_options (decoded_options, decoded_options_count, lang_mask);
|
||
|
|
diff --git a/gcc/opts.cc b/gcc/opts.cc
|
||
|
|
index e34e5ee8e..d97f6079f 100644
|
||
|
|
--- a/gcc/opts.cc
|
||
|
|
+++ b/gcc/opts.cc
|
||
|
|
@@ -780,6 +780,14 @@ default_options_optimization (struct gcc_options *opts,
|
||
|
|
opts->x_optimize_debug = 1;
|
||
|
|
break;
|
||
|
|
|
||
|
|
+ case OPT_Om:
|
||
|
|
+ /* -Om adds flags to -O3. */
|
||
|
|
+ opts->x_optimize_size = 0;
|
||
|
|
+ opts->x_optimize = 3;
|
||
|
|
+ opts->x_optimize_maximum = true;
|
||
|
|
+ opts->x_optimize_debug = 0;
|
||
|
|
+ break;
|
||
|
|
+
|
||
|
|
case OPT_fopenacc:
|
||
|
|
if (opt->value)
|
||
|
|
openacc_mode = true;
|
||
|
|
@@ -2733,6 +2741,8 @@ common_handle_option (struct gcc_options *opts,
|
||
|
|
&= ~(SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT);
|
||
|
|
break;
|
||
|
|
|
||
|
|
+ case OPT_Om:
|
||
|
|
+ break;
|
||
|
|
case OPT_O:
|
||
|
|
case OPT_Os:
|
||
|
|
case OPT_Ofast:
|
||
|
|
diff --git a/gcc/passes.def b/gcc/passes.def
|
||
|
|
index 8797f166f..690d344c0 100644
|
||
|
|
--- a/gcc/passes.def
|
||
|
|
+++ b/gcc/passes.def
|
||
|
|
@@ -179,6 +179,7 @@ along with GCC; see the file COPYING3. If not see
|
||
|
|
passes are executed after partitioning and thus see just parts of the
|
||
|
|
compiled unit. */
|
||
|
|
INSERT_PASSES_AFTER (all_late_ipa_passes)
|
||
|
|
+ NEXT_PASS (pass_ipa_hardware_detection);
|
||
|
|
NEXT_PASS (pass_ipa_pta);
|
||
|
|
/* FIXME: this should be a normal IP pass. */
|
||
|
|
NEXT_PASS (pass_ipa_struct_reorg);
|
||
|
|
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
||
|
|
index 8e7510eb3..bd8c9a4f7 100644
|
||
|
|
--- a/gcc/timevar.def
|
||
|
|
+++ b/gcc/timevar.def
|
||
|
|
@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp")
|
||
|
|
DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics")
|
||
|
|
DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
|
||
|
|
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
|
||
|
|
+DEFTIMEVAR (TV_IPA_HARDWARE_DETECTION, "ipa detection")
|
||
|
|
DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch")
|
||
|
|
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
|
||
|
|
DEFTIMEVAR (TV_IPA_OPT , "ipa various optimizations")
|
||
|
|
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||
|
|
index 1c983ef71..ee873f0b2 100644
|
||
|
|
--- a/gcc/tree-pass.h
|
||
|
|
+++ b/gcc/tree-pass.h
|
||
|
|
@@ -528,6 +528,8 @@ extern ipa_opt_pass_d *make_pass_ipa_icp (gcc::context *ctxt);
|
||
|
|
extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
|
||
|
|
extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
|
||
|
|
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
|
||
|
|
+extern simple_ipa_opt_pass *make_pass_ipa_hardware_detection (gcc::context *
|
||
|
|
+ ctxt);
|
||
|
|
extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt);
|
||
|
|
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
|
||
|
|
extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
|