414 lines
13 KiB
Diff
414 lines
13 KiB
Diff
|
|
From 95089699271d235efc29ae48b78f8c7f1b6386c4 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Lulu Cheng <chenglulu@loongson.cn>
|
||
|
|
Date: Fri, 12 Jul 2024 09:57:40 +0800
|
||
|
|
Subject: [PATCH 176/188] LoongArch: Organize the code related to split move
|
||
|
|
and merge the same functions.
|
||
|
|
|
||
|
|
gcc/ChangeLog:
|
||
|
|
|
||
|
|
* config/loongarch/loongarch-protos.h
|
||
|
|
(loongarch_split_128bit_move): Delete.
|
||
|
|
(loongarch_split_128bit_move_p): Delete.
|
||
|
|
(loongarch_split_256bit_move): Delete.
|
||
|
|
(loongarch_split_256bit_move_p): Delete.
|
||
|
|
(loongarch_split_vector_move): Add a function declaration.
|
||
|
|
* config/loongarch/loongarch.cc
|
||
|
|
(loongarch_vector_costs::finish_cost): Adjust the code
|
||
|
|
formatting.
|
||
|
|
(loongarch_split_vector_move_p): Merge
|
||
|
|
loongarch_split_128bit_move_p and loongarch_split_256bit_move_p.
|
||
|
|
(loongarch_split_move_p): Merge code.
|
||
|
|
(loongarch_split_move): Likewise.
|
||
|
|
(loongarch_split_128bit_move_p): Delete.
|
||
|
|
(loongarch_split_256bit_move_p): Delete.
|
||
|
|
(loongarch_split_128bit_move): Delete.
|
||
|
|
(loongarch_split_vector_move): Merge loongarch_split_128bit_move
|
||
|
|
and loongarch_split_256bit_move.
|
||
|
|
(loongarch_split_256bit_move): Delete.
|
||
|
|
(loongarch_global_init): Remove the extra semicolon at the
|
||
|
|
end of the function.
|
||
|
|
* config/loongarch/loongarch.md (*movdf_softfloat): Added a new
|
||
|
|
condition TARGET_64BIT.
|
||
|
|
---
|
||
|
|
gcc/config/loongarch/loongarch-protos.h | 5 +-
|
||
|
|
gcc/config/loongarch/loongarch.cc | 221 ++++++------------------
|
||
|
|
gcc/config/loongarch/loongarch.md | 1 +
|
||
|
|
3 files changed, 58 insertions(+), 169 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
|
||
|
|
index 0c31a74b7..abf1a0893 100644
|
||
|
|
--- a/gcc/config/loongarch/loongarch-protos.h
|
||
|
|
+++ b/gcc/config/loongarch/loongarch-protos.h
|
||
|
|
@@ -85,10 +85,7 @@ extern bool loongarch_split_move_p (rtx, rtx);
|
||
|
|
extern void loongarch_split_move (rtx, rtx);
|
||
|
|
extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
|
||
|
|
extern void loongarch_split_plus_constant (rtx *, machine_mode);
|
||
|
|
-extern void loongarch_split_128bit_move (rtx, rtx);
|
||
|
|
-extern bool loongarch_split_128bit_move_p (rtx, rtx);
|
||
|
|
-extern void loongarch_split_256bit_move (rtx, rtx);
|
||
|
|
-extern bool loongarch_split_256bit_move_p (rtx, rtx);
|
||
|
|
+extern void loongarch_split_vector_move (rtx, rtx);
|
||
|
|
extern const char *loongarch_output_move (rtx, rtx);
|
||
|
|
#ifdef RTX_CODE
|
||
|
|
extern void loongarch_expand_scc (rtx *);
|
||
|
|
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
|
||
|
|
index 260dd7b5f..53bd8d7ec 100644
|
||
|
|
--- a/gcc/config/loongarch/loongarch.cc
|
||
|
|
+++ b/gcc/config/loongarch/loongarch.cc
|
||
|
|
@@ -4351,10 +4351,10 @@ void
|
||
|
|
loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
|
||
|
|
{
|
||
|
|
loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
|
||
|
|
+
|
||
|
|
if (loop_vinfo)
|
||
|
|
- {
|
||
|
|
- m_suggested_unroll_factor = determine_suggested_unroll_factor (loop_vinfo);
|
||
|
|
- }
|
||
|
|
+ m_suggested_unroll_factor
|
||
|
|
+ = determine_suggested_unroll_factor (loop_vinfo);
|
||
|
|
|
||
|
|
vector_costs::finish_cost (scalar_costs);
|
||
|
|
}
|
||
|
|
@@ -4420,6 +4420,7 @@ loongarch_subword (rtx op, bool high_p)
|
||
|
|
return simplify_gen_subreg (word_mode, op, mode, byte);
|
||
|
|
}
|
||
|
|
|
||
|
|
+static bool loongarch_split_vector_move_p (rtx dest, rtx src);
|
||
|
|
/* Return true if a move from SRC to DEST should be split into two.
|
||
|
|
SPLIT_TYPE describes the split condition. */
|
||
|
|
|
||
|
|
@@ -4441,13 +4442,11 @@ loongarch_split_move_p (rtx dest, rtx src)
|
||
|
|
return false;
|
||
|
|
}
|
||
|
|
|
||
|
|
- /* Check if LSX moves need splitting. */
|
||
|
|
- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
- return loongarch_split_128bit_move_p (dest, src);
|
||
|
|
|
||
|
|
- /* Check if LASX moves need splitting. */
|
||
|
|
- if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
- return loongarch_split_256bit_move_p (dest, src);
|
||
|
|
+ /* Check if vector moves need splitting. */
|
||
|
|
+ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
|
||
|
|
+ || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
+ return loongarch_split_vector_move_p (dest, src);
|
||
|
|
|
||
|
|
/* Otherwise split all multiword moves. */
|
||
|
|
return size > UNITS_PER_WORD;
|
||
|
|
@@ -4460,10 +4459,9 @@ void
|
||
|
|
loongarch_split_move (rtx dest, rtx src)
|
||
|
|
{
|
||
|
|
gcc_checking_assert (loongarch_split_move_p (dest, src));
|
||
|
|
- if (LSX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
- loongarch_split_128bit_move (dest, src);
|
||
|
|
- else if (LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
- loongarch_split_256bit_move (dest, src);
|
||
|
|
+ if (LSX_SUPPORTED_MODE_P (GET_MODE (dest))
|
||
|
|
+ || LASX_SUPPORTED_MODE_P (GET_MODE (dest)))
|
||
|
|
+ loongarch_split_vector_move (dest, src);
|
||
|
|
else
|
||
|
|
gcc_unreachable ();
|
||
|
|
}
|
||
|
|
@@ -4585,224 +4583,117 @@ loongarch_output_move_index_float (rtx x, machine_mode mode, bool ldr)
|
||
|
|
|
||
|
|
return insn[ldr][index-2];
|
||
|
|
}
|
||
|
|
-/* Return true if a 128-bit move from SRC to DEST should be split. */
|
||
|
|
-
|
||
|
|
-bool
|
||
|
|
-loongarch_split_128bit_move_p (rtx dest, rtx src)
|
||
|
|
-{
|
||
|
|
- /* LSX-to-LSX moves can be done in a single instruction. */
|
||
|
|
- if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
|
||
|
|
- return false;
|
||
|
|
-
|
||
|
|
- /* Check for LSX loads and stores. */
|
||
|
|
- if (FP_REG_RTX_P (dest) && MEM_P (src))
|
||
|
|
- return false;
|
||
|
|
- if (FP_REG_RTX_P (src) && MEM_P (dest))
|
||
|
|
- return false;
|
||
|
|
-
|
||
|
|
- /* Check for LSX set to an immediate const vector with valid replicated
|
||
|
|
- element. */
|
||
|
|
- if (FP_REG_RTX_P (dest)
|
||
|
|
- && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
|
||
|
|
- return false;
|
||
|
|
-
|
||
|
|
- /* Check for LSX load zero immediate. */
|
||
|
|
- if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
|
||
|
|
- return false;
|
||
|
|
-
|
||
|
|
- return true;
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-/* Return true if a 256-bit move from SRC to DEST should be split. */
|
||
|
|
+/* Return true if a vector move from SRC to DEST should be split. */
|
||
|
|
|
||
|
|
-bool
|
||
|
|
-loongarch_split_256bit_move_p (rtx dest, rtx src)
|
||
|
|
+static bool
|
||
|
|
+loongarch_split_vector_move_p (rtx dest, rtx src)
|
||
|
|
{
|
||
|
|
- /* LSX-to-LSX moves can be done in a single instruction. */
|
||
|
|
+ /* Vector moves can be done in a single instruction. */
|
||
|
|
if (FP_REG_RTX_P (src) && FP_REG_RTX_P (dest))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- /* Check for LSX loads and stores. */
|
||
|
|
+ /* Check for vector loads and stores. */
|
||
|
|
if (FP_REG_RTX_P (dest) && MEM_P (src))
|
||
|
|
return false;
|
||
|
|
if (FP_REG_RTX_P (src) && MEM_P (dest))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- /* Check for LSX set to an immediate const vector with valid replicated
|
||
|
|
+ /* Check for vector set to an immediate const vector with valid replicated
|
||
|
|
element. */
|
||
|
|
if (FP_REG_RTX_P (dest)
|
||
|
|
&& loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
- /* Check for LSX load zero immediate. */
|
||
|
|
+ /* Check for vector load zero immediate. */
|
||
|
|
if (FP_REG_RTX_P (dest) && src == CONST0_RTX (GET_MODE (src)))
|
||
|
|
return false;
|
||
|
|
|
||
|
|
return true;
|
||
|
|
}
|
||
|
|
|
||
|
|
-/* Split a 128-bit move from SRC to DEST. */
|
||
|
|
+/* Split a vector move from SRC to DEST. */
|
||
|
|
|
||
|
|
void
|
||
|
|
-loongarch_split_128bit_move (rtx dest, rtx src)
|
||
|
|
+loongarch_split_vector_move (rtx dest, rtx src)
|
||
|
|
{
|
||
|
|
int byte, index;
|
||
|
|
- rtx low_dest, low_src, d, s;
|
||
|
|
+ rtx s, d;
|
||
|
|
+ machine_mode mode = GET_MODE (dest);
|
||
|
|
+ bool lsx_p = LSX_SUPPORTED_MODE_P (mode);
|
||
|
|
|
||
|
|
if (FP_REG_RTX_P (dest))
|
||
|
|
{
|
||
|
|
gcc_assert (!MEM_P (src));
|
||
|
|
|
||
|
|
- rtx new_dest = dest;
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (dest) != V4SImode)
|
||
|
|
- new_dest = simplify_gen_subreg (V4SImode, dest, GET_MODE (dest), 0);
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (dest) != V2DImode)
|
||
|
|
- new_dest = simplify_gen_subreg (V2DImode, dest, GET_MODE (dest), 0);
|
||
|
|
- }
|
||
|
|
-
|
||
|
|
- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
|
||
|
|
- byte += UNITS_PER_WORD, index++)
|
||
|
|
- {
|
||
|
|
- s = loongarch_subword_at_byte (src, byte);
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- emit_insn (gen_lsx_vinsgr2vr_w (new_dest, s, new_dest,
|
||
|
|
- GEN_INT (1 << index)));
|
||
|
|
- else
|
||
|
|
- emit_insn (gen_lsx_vinsgr2vr_d (new_dest, s, new_dest,
|
||
|
|
- GEN_INT (1 << index)));
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- else if (FP_REG_RTX_P (src))
|
||
|
|
- {
|
||
|
|
- gcc_assert (!MEM_P (dest));
|
||
|
|
-
|
||
|
|
- rtx new_src = src;
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (src) != V4SImode)
|
||
|
|
- new_src = simplify_gen_subreg (V4SImode, src, GET_MODE (src), 0);
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (src) != V2DImode)
|
||
|
|
- new_src = simplify_gen_subreg (V2DImode, src, GET_MODE (src), 0);
|
||
|
|
- }
|
||
|
|
+ rtx (*gen_vinsgr2vr_d) (rtx, rtx, rtx, rtx);
|
||
|
|
|
||
|
|
- for (byte = 0, index = 0; byte < GET_MODE_SIZE (TImode);
|
||
|
|
- byte += UNITS_PER_WORD, index++)
|
||
|
|
- {
|
||
|
|
- d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
|
||
|
|
- else
|
||
|
|
- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
|
||
|
|
- }
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- low_dest = loongarch_subword_at_byte (dest, 0);
|
||
|
|
- low_src = loongarch_subword_at_byte (src, 0);
|
||
|
|
- gcc_assert (REG_P (low_dest) && REG_P (low_src));
|
||
|
|
- /* Make sure the source register is not written before reading. */
|
||
|
|
- if (REGNO (low_dest) <= REGNO (low_src))
|
||
|
|
+ if (lsx_p)
|
||
|
|
{
|
||
|
|
- for (byte = 0; byte < GET_MODE_SIZE (TImode);
|
||
|
|
- byte += UNITS_PER_WORD)
|
||
|
|
- {
|
||
|
|
- d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
- s = loongarch_subword_at_byte (src, byte);
|
||
|
|
- loongarch_emit_move (d, s);
|
||
|
|
- }
|
||
|
|
+ mode = V2DImode;
|
||
|
|
+ gen_vinsgr2vr_d = gen_lsx_vinsgr2vr_d;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
|
||
|
|
- byte -= UNITS_PER_WORD)
|
||
|
|
- {
|
||
|
|
- d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
- s = loongarch_subword_at_byte (src, byte);
|
||
|
|
- loongarch_emit_move (d, s);
|
||
|
|
- }
|
||
|
|
+ mode = V4DImode;
|
||
|
|
+ gen_vinsgr2vr_d = gen_lasx_xvinsgr2vr_d;
|
||
|
|
}
|
||
|
|
- }
|
||
|
|
-}
|
||
|
|
-
|
||
|
|
-/* Split a 256-bit move from SRC to DEST. */
|
||
|
|
-
|
||
|
|
-void
|
||
|
|
-loongarch_split_256bit_move (rtx dest, rtx src)
|
||
|
|
-{
|
||
|
|
- int byte, index;
|
||
|
|
- rtx low_dest, low_src, d, s;
|
||
|
|
-
|
||
|
|
- if (FP_REG_RTX_P (dest))
|
||
|
|
- {
|
||
|
|
- gcc_assert (!MEM_P (src));
|
||
|
|
|
||
|
|
rtx new_dest = dest;
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (dest) != V8SImode)
|
||
|
|
- new_dest = simplify_gen_subreg (V8SImode, dest, GET_MODE (dest), 0);
|
||
|
|
- }
|
||
|
|
- else
|
||
|
|
- {
|
||
|
|
- if (GET_MODE (dest) != V4DImode)
|
||
|
|
- new_dest = simplify_gen_subreg (V4DImode, dest, GET_MODE (dest), 0);
|
||
|
|
- }
|
||
|
|
+
|
||
|
|
+ if (GET_MODE (dest) != mode)
|
||
|
|
+ new_dest = simplify_gen_subreg (mode, dest, GET_MODE (dest), 0);
|
||
|
|
|
||
|
|
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
|
||
|
|
byte += UNITS_PER_WORD, index++)
|
||
|
|
{
|
||
|
|
s = loongarch_subword_at_byte (src, byte);
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- emit_insn (gen_lasx_xvinsgr2vr_w (new_dest, s, new_dest,
|
||
|
|
- GEN_INT (1 << index)));
|
||
|
|
- else
|
||
|
|
- emit_insn (gen_lasx_xvinsgr2vr_d (new_dest, s, new_dest,
|
||
|
|
- GEN_INT (1 << index)));
|
||
|
|
+ emit_insn (gen_vinsgr2vr_d (new_dest, s, new_dest,
|
||
|
|
+ GEN_INT (1 << index)));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else if (FP_REG_RTX_P (src))
|
||
|
|
{
|
||
|
|
gcc_assert (!MEM_P (dest));
|
||
|
|
|
||
|
|
- rtx new_src = src;
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
+ rtx (*gen_vpickve2gr_d) (rtx, rtx, rtx);
|
||
|
|
+
|
||
|
|
+ if (lsx_p)
|
||
|
|
{
|
||
|
|
- if (GET_MODE (src) != V8SImode)
|
||
|
|
- new_src = simplify_gen_subreg (V8SImode, src, GET_MODE (src), 0);
|
||
|
|
+ mode = V2DImode;
|
||
|
|
+ gen_vpickve2gr_d = gen_lsx_vpickve2gr_d;
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
- if (GET_MODE (src) != V4DImode)
|
||
|
|
- new_src = simplify_gen_subreg (V4DImode, src, GET_MODE (src), 0);
|
||
|
|
+ mode = V4DImode;
|
||
|
|
+ gen_vpickve2gr_d = gen_lasx_xvpickve2gr_d;
|
||
|
|
}
|
||
|
|
|
||
|
|
+ rtx new_src = src;
|
||
|
|
+ if (GET_MODE (src) != mode)
|
||
|
|
+ new_src = simplify_gen_subreg (mode, src, GET_MODE (src), 0);
|
||
|
|
+
|
||
|
|
for (byte = 0, index = 0; byte < GET_MODE_SIZE (GET_MODE (src));
|
||
|
|
byte += UNITS_PER_WORD, index++)
|
||
|
|
{
|
||
|
|
d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
- if (!TARGET_64BIT)
|
||
|
|
- emit_insn (gen_lsx_vpickve2gr_w (d, new_src, GEN_INT (index)));
|
||
|
|
- else
|
||
|
|
- emit_insn (gen_lsx_vpickve2gr_d (d, new_src, GEN_INT (index)));
|
||
|
|
+ emit_insn (gen_vpickve2gr_d (d, new_src, GEN_INT (index)));
|
||
|
|
}
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
+ /* This part of the code is designed to handle the following situations:
|
||
|
|
+ (set (reg:V2DI 4 $r4)
|
||
|
|
+ (reg:V2DI 6 $r6))
|
||
|
|
+ The trigger test case is lsx-mov-1.c. */
|
||
|
|
+ rtx low_dest, low_src;
|
||
|
|
+
|
||
|
|
low_dest = loongarch_subword_at_byte (dest, 0);
|
||
|
|
low_src = loongarch_subword_at_byte (src, 0);
|
||
|
|
gcc_assert (REG_P (low_dest) && REG_P (low_src));
|
||
|
|
/* Make sure the source register is not written before reading. */
|
||
|
|
if (REGNO (low_dest) <= REGNO (low_src))
|
||
|
|
{
|
||
|
|
- for (byte = 0; byte < GET_MODE_SIZE (TImode);
|
||
|
|
+ for (byte = 0; byte < GET_MODE_SIZE (GET_MODE (dest));
|
||
|
|
byte += UNITS_PER_WORD)
|
||
|
|
{
|
||
|
|
d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
@@ -4812,8 +4703,8 @@ loongarch_split_256bit_move (rtx dest, rtx src)
|
||
|
|
}
|
||
|
|
else
|
||
|
|
{
|
||
|
|
- for (byte = GET_MODE_SIZE (TImode) - UNITS_PER_WORD; byte >= 0;
|
||
|
|
- byte -= UNITS_PER_WORD)
|
||
|
|
+ for (byte = GET_MODE_SIZE (GET_MODE (dest)) - UNITS_PER_WORD;
|
||
|
|
+ byte >= 0; byte -= UNITS_PER_WORD)
|
||
|
|
{
|
||
|
|
d = loongarch_subword_at_byte (dest, byte);
|
||
|
|
s = loongarch_subword_at_byte (src, byte);
|
||
|
|
@@ -7603,7 +7494,7 @@ loongarch_global_init (void)
|
||
|
|
|
||
|
|
/* Function to allocate machine-dependent function status. */
|
||
|
|
init_machine_status = &loongarch_init_machine_status;
|
||
|
|
-};
|
||
|
|
+}
|
||
|
|
|
||
|
|
static void
|
||
|
|
loongarch_reg_init (void)
|
||
|
|
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
|
||
|
|
index 16f9f37c8..8bcb43042 100644
|
||
|
|
--- a/gcc/config/loongarch/loongarch.md
|
||
|
|
+++ b/gcc/config/loongarch/loongarch.md
|
||
|
|
@@ -2406,6 +2406,7 @@
|
||
|
|
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m")
|
||
|
|
(match_operand:DF 1 "move_operand" "rG,m,rG"))]
|
||
|
|
"(TARGET_SOFT_FLOAT || TARGET_SINGLE_FLOAT)
|
||
|
|
+ && TARGET_64BIT
|
||
|
|
&& (register_operand (operands[0], DFmode)
|
||
|
|
|| reg_or_0_operand (operands[1], DFmode))"
|
||
|
|
{ return loongarch_output_move (operands[0], operands[1]); }
|
||
|
|
--
|
||
|
|
2.43.0
|
||
|
|
|