166 lines
5.5 KiB
Diff
166 lines
5.5 KiB
Diff
From c5abe64e64aba601e67f3367a27caf616062b8f4 Mon Sep 17 00:00:00 2001
|
|
From: Xi Ruoyao <xry111@xry111.site>
|
|
Date: Sat, 9 Dec 2023 17:41:32 +0800
|
|
Subject: [PATCH 071/188] LoongArch: Fix instruction costs [PR112936]
|
|
|
|
Replace the instruction costs in loongarch_rtx_cost_data constructor
|
|
based on micro-benchmark results on LA464 and LA664.
|
|
|
|
This allows optimizations like "x * 17" to alsl, and "x * 68" to alsl
|
|
and slli.
|
|
|
|
gcc/ChangeLog:
|
|
|
|
PR target/112936
|
|
* config/loongarch/loongarch-def.cc
|
|
(loongarch_rtx_cost_data::loongarch_rtx_cost_data): Update
|
|
instruction costs per micro-benchmark results.
|
|
(loongarch_rtx_cost_optimize_size): Set all instruction costs
|
|
to (COSTS_N_INSNS (1) + 1).
|
|
* config/loongarch/loongarch.cc (loongarch_rtx_costs): Remove
|
|
special case for multiplication when optimizing for size.
|
|
Adjust division cost when TARGET_64BIT && !TARGET_DIV32.
|
|
Account the extra cost when TARGET_CHECK_ZERO_DIV and
|
|
optimizing for speed.
|
|
|
|
gcc/testsuite/ChangeLog
|
|
|
|
PR target/112936
|
|
* gcc.target/loongarch/mul-const-reduction.c: New test.
|
|
---
|
|
gcc/config/loongarch/loongarch-def.cc | 39 ++++++++++---------
|
|
gcc/config/loongarch/loongarch.cc | 22 +++++------
|
|
.../loongarch/mul-const-reduction.c | 11 ++++++
|
|
3 files changed, 43 insertions(+), 29 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
|
|
|
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
|
|
index 6217b1926..4a8885e83 100644
|
|
--- a/gcc/config/loongarch/loongarch-def.cc
|
|
+++ b/gcc/config/loongarch/loongarch-def.cc
|
|
@@ -92,15 +92,15 @@ array_tune<loongarch_align> loongarch_cpu_align =
|
|
|
|
/* Default RTX cost initializer. */
|
|
loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
|
- : fp_add (COSTS_N_INSNS (1)),
|
|
- fp_mult_sf (COSTS_N_INSNS (2)),
|
|
- fp_mult_df (COSTS_N_INSNS (4)),
|
|
- fp_div_sf (COSTS_N_INSNS (6)),
|
|
+ : fp_add (COSTS_N_INSNS (5)),
|
|
+ fp_mult_sf (COSTS_N_INSNS (5)),
|
|
+ fp_mult_df (COSTS_N_INSNS (5)),
|
|
+ fp_div_sf (COSTS_N_INSNS (8)),
|
|
fp_div_df (COSTS_N_INSNS (8)),
|
|
- int_mult_si (COSTS_N_INSNS (1)),
|
|
- int_mult_di (COSTS_N_INSNS (1)),
|
|
- int_div_si (COSTS_N_INSNS (4)),
|
|
- int_div_di (COSTS_N_INSNS (6)),
|
|
+ int_mult_si (COSTS_N_INSNS (4)),
|
|
+ int_mult_di (COSTS_N_INSNS (4)),
|
|
+ int_div_si (COSTS_N_INSNS (5)),
|
|
+ int_div_di (COSTS_N_INSNS (5)),
|
|
branch_cost (6),
|
|
memory_latency (4) {}
|
|
|
|
@@ -111,18 +111,21 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
|
|
array_tune<loongarch_rtx_cost_data> loongarch_cpu_rtx_cost_data =
|
|
array_tune<loongarch_rtx_cost_data> ();
|
|
|
|
-/* RTX costs to use when optimizing for size. */
|
|
+/* RTX costs to use when optimizing for size.
|
|
+ We use a value slightly larger than COSTS_N_INSNS (1) for all of them
|
|
+ because they are slower than simple instructions. */
|
|
+#define COST_COMPLEX_INSN (COSTS_N_INSNS (1) + 1)
|
|
const loongarch_rtx_cost_data loongarch_rtx_cost_optimize_size =
|
|
loongarch_rtx_cost_data ()
|
|
- .fp_add_ (4)
|
|
- .fp_mult_sf_ (4)
|
|
- .fp_mult_df_ (4)
|
|
- .fp_div_sf_ (4)
|
|
- .fp_div_df_ (4)
|
|
- .int_mult_si_ (4)
|
|
- .int_mult_di_ (4)
|
|
- .int_div_si_ (4)
|
|
- .int_div_di_ (4);
|
|
+ .fp_add_ (COST_COMPLEX_INSN)
|
|
+ .fp_mult_sf_ (COST_COMPLEX_INSN)
|
|
+ .fp_mult_df_ (COST_COMPLEX_INSN)
|
|
+ .fp_div_sf_ (COST_COMPLEX_INSN)
|
|
+ .fp_div_df_ (COST_COMPLEX_INSN)
|
|
+ .int_mult_si_ (COST_COMPLEX_INSN)
|
|
+ .int_mult_di_ (COST_COMPLEX_INSN)
|
|
+ .int_div_si_ (COST_COMPLEX_INSN)
|
|
+ .int_div_di_ (COST_COMPLEX_INSN);
|
|
|
|
array_tune<int> loongarch_cpu_issue_rate = array_tune<int> ()
|
|
.set (CPU_NATIVE, 4)
|
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
|
index 4362149ef..afbb55390 100644
|
|
--- a/gcc/config/loongarch/loongarch.cc
|
|
+++ b/gcc/config/loongarch/loongarch.cc
|
|
@@ -3797,8 +3797,6 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
|
*total = (speed
|
|
? loongarch_cost->int_mult_si * 3 + 6
|
|
: COSTS_N_INSNS (7));
|
|
- else if (!speed)
|
|
- *total = COSTS_N_INSNS (1) + 1;
|
|
else if (mode == DImode)
|
|
*total = loongarch_cost->int_mult_di;
|
|
else
|
|
@@ -3833,14 +3831,18 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
|
|
|
case UDIV:
|
|
case UMOD:
|
|
- if (!speed)
|
|
- {
|
|
- *total = COSTS_N_INSNS (loongarch_idiv_insns (mode));
|
|
- }
|
|
- else if (mode == DImode)
|
|
+ if (mode == DImode)
|
|
*total = loongarch_cost->int_div_di;
|
|
else
|
|
- *total = loongarch_cost->int_div_si;
|
|
+ {
|
|
+ *total = loongarch_cost->int_div_si;
|
|
+ if (TARGET_64BIT && !TARGET_DIV32)
|
|
+ *total += COSTS_N_INSNS (2);
|
|
+ }
|
|
+
|
|
+ if (TARGET_CHECK_ZERO_DIV)
|
|
+ *total += COSTS_N_INSNS (2);
|
|
+
|
|
return false;
|
|
|
|
case SIGN_EXTEND:
|
|
@@ -3872,9 +3874,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
|
|
&& (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
|
|
== ZERO_EXTEND))))
|
|
{
|
|
- if (!speed)
|
|
- *total = COSTS_N_INSNS (1) + 1;
|
|
- else if (mode == DImode)
|
|
+ if (mode == DImode)
|
|
*total = loongarch_cost->int_mult_di;
|
|
else
|
|
*total = loongarch_cost->int_mult_si;
|
|
diff --git a/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
|
new file mode 100644
|
|
index 000000000..02d9a4876
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.target/loongarch/mul-const-reduction.c
|
|
@@ -0,0 +1,11 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -mtune=la464" } */
|
|
+/* { dg-final { scan-assembler "alsl\.w" } } */
|
|
+/* { dg-final { scan-assembler "slli\.w" } } */
|
|
+/* { dg-final { scan-assembler-not "mul\.w" } } */
|
|
+
|
|
+int
|
|
+test (int a)
|
|
+{
|
|
+ return a * 68;
|
|
+}
|
|
--
|
|
2.43.0
|
|
|