gcc/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch

From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001
From: Christophe Lyon <christophe.lyon@arm.com>
Date: Fri, 11 Mar 2022 16:21:02 +0000
Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP

Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22

This patch updates the aarch64 backend as needed to support DFP modes
(SD, DD and TD).

Changes	v1->v2:

* Drop	support	for DFP	modes in
  aarch64_gen_{load||store}[wb]_pair as these are only used in
  prologue/epilogue where DFP modes are not used.  Drop	the
  changes to the corresponding patterns in aarch64.md, and
  useless GPF_PAIR iterator.

* In aarch64_reinterpret_float_as_int, handle DDmode the same way
  as DFmode (needed in case the representation of the
  floating-point value can be loaded using mov/movk.

* In aarch64_float_const_zero_rtx_p, reject constants with DFP
  mode: when X is zero, the callers want to emit either '0' or
  'zr' depending on the context, which is not the way 0.0 is
  represented in DFP mode (in particular fmov d0, #0 is not right
  for DFP).

* In aarch64_legitimate_constant_p, accept DFP

2022-03-31  Christophe Lyon  <christophe.lyon@arm.com>

	gcc/
	* config/aarch64/aarch64.cc
	(aarch64_split_128bit_move): Handle DFP modes.
	(aarch64_mode_valid_for_sched_fusion_p): Likewise.
	(aarch64_classify_address): Likewise.
	(aarch64_legitimize_address_displacement): Likewise.
	(aarch64_reinterpret_float_as_int): Likewise.
	(aarch64_float_const_zero_rtx_p): Likewise.
	(aarch64_can_const_movi_rtx_p): Likewise.
	(aarch64_anchor_offset): Likewise.
	(aarch64_secondary_reload): Likewise.
	(aarch64_rtx_costs): Likewise.
	(aarch64_legitimate_constant_p): Likewise.
	(aarch64_gimplify_va_arg_expr): Likewise.
	(aapcs_vfp_sub_candidate): Likewise.
	(aarch64_vfp_is_call_or_return_candidate): Likewise.
	(aarch64_output_scalar_simd_mov_immediate): Likewise.
	(aarch64_gen_adjusted_ldpstp): Likewise.
	(aarch64_scalar_mode_supported_p): Accept DFP modes if enabled.
	* config/aarch64/aarch64.md
	(movsf_aarch64): Use SFD iterator and rename into
	mov<mode>_aarch64.
	(movdf_aarch64): Use DFD iterator and rename into
	mov<mode>_aarch64.
	(movtf_aarch64): Use TFD iterator and rename into
	mov<mode>_aarch64.
	(split pattern for move TF mode): Use TFD iterator.
	* config/aarch64/iterators.md
	(GPF_TF_F16_MOV): Add DFP modes.
	(SFD, DFD, TFD): New iterators.
	(GPF_TF): Add DFP modes.
	(TX, DX, DX2): Likewise.
---
 gcc/config/aarch64/aarch64.cc   | 82 ++++++++++++++++++++++-----------
 gcc/config/aarch64/aarch64.md   | 34 +++++++-------
 gcc/config/aarch64/iterators.md | 24 +++++++---
 3 files changed, 89 insertions(+), 51 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 055b436b1..02210ed13 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 
   machine_mode mode = GET_MODE (dst);
 
-  gcc_assert (mode == TImode || mode == TFmode);
+  gcc_assert (mode == TImode || mode == TFmode || mode == TDmode);
   gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
   gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
 
@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)
 {
   return mode == SImode || mode == DImode
 	 || mode == SFmode || mode == DFmode
+	 || mode == SDmode || mode == DDmode
 	 || (aarch64_vector_mode_supported_p (mode)
 	     && (known_eq (GET_MODE_SIZE (mode), 8)
 		 || (known_eq (GET_MODE_SIZE (mode), 16)
@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info,
   vec_flags &= ~VEC_PARTIAL;
 
   /* On BE, we use load/store pair for all large int mode load/stores.
-     TI/TFmode may also use a load/store pair.  */
+     TI/TF/TDmode may also use a load/store pair.  */
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
 			    || type == ADDR_QUERY_LDP_STP_N
 			    || mode == TImode
 			    || mode == TFmode
+			    || mode == TDmode
 			    || (BYTES_BIG_ENDIAN && advsimd_struct_p));
   /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
      corresponds to the actual size of the memory being loaded/stored and the
@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  info->offset = op1;
 	  info->const_offset = offset;
 
-	  /* TImode and TFmode values are allowed in both pairs of X
+	  /* TImode, TFmode and TDmode values are allowed in both pairs of X
 	     registers and individual Q registers.  The available
 	     address modes are:
 	     X,X: 7-bit signed scaled offset
@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     When performing the check for pairs of X registers i.e.  LDP/STP
 	     pass down DImode since that is the natural size of the LDP/STP
 	     instruction memory accesses.  */
-	  if (mode == TImode || mode == TFmode)
+	  if (mode == TImode || mode == TFmode || mode == TDmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
 		    && (aarch64_offset_9bit_signed_unscaled_p (mode, offset)
 			|| offset_12bit_unsigned_scaled_p (mode, offset)));
@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  info->offset = XEXP (XEXP (x, 1), 1);
 	  info->const_offset = offset;
 
-	  /* TImode and TFmode values are allowed in both pairs of X
+	  /* TImode, TFmode and TDmode values are allowed in both pairs of X
 	     registers and individual Q registers.  The available
 	     address modes are:
 	     X,X: 7-bit signed scaled offset
 	     Q:   9-bit signed offset
 	     We conservatively require an offset representable in either mode.
 	   */
-	  if (mode == TImode || mode == TFmode)
+	  if (mode == TImode || mode == TFmode || mode == TDmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (mode, offset)
 		    && aarch64_offset_9bit_signed_unscaled_p (mode, offset));
 
@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx *offset1, rtx *offset2,
 	 offset.  Use 4KB range for 1- and 2-byte accesses and a 16KB
 	 range otherwise to increase opportunities for sharing the base
 	 address of different sizes.  Unaligned accesses use the signed
-	 9-bit range, TImode/TFmode use the intersection of signed
+	 9-bit range, TImode/TFmode/TDmode use the intersection of signed
 	 scaled 7-bit and signed 9-bit offset.  */
-      if (mode == TImode || mode == TFmode)
+      if (mode == TImode || mode == TFmode || mode == TDmode)
 	second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;
       else if ((const_offset & (size - 1)) != 0)
 	second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;
@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)
 		  CONST_DOUBLE_REAL_VALUE (value),
 		  REAL_MODE_FORMAT (mode));
 
-  if (mode == DFmode)
+  if (mode == DFmode || mode == DDmode)
     {
       int order = BYTES_BIG_ENDIAN ? 1 : 0;
       ival = zext_hwi (res[order], 32);
@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x)
   return false;
 }
 
-/* Return TRUE if rtx X is immediate constant 0.0 */
+/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal
+   Floating Point).  */
 bool
 aarch64_float_const_zero_rtx_p (rtx x)
 {
-  if (GET_MODE (x) == VOIDmode)
+  /* 0.0 in Decimal Floating Point cannot be represented by #0 or
+     zr as our callers expect, so no need to check the actual
+     value if X is of Decimal Floating Point type.  */
+  if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT)
     return false;
 
   if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))
@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   else
     return false;
 
-   /* use a 64 bit mode for everything except for DI/DF mode, where we use
+   /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
      a 128 bit vector mode.  */
   int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;
 
@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,
   if (IN_RANGE (offset, -256, 0))
     return 0;
 
-  if (mode == TImode || mode == TFmode)
+  if (mode == TImode || mode == TFmode || mode == TDmode)
     return (offset + 0x100) & ~0x1ff;
 
   /* Use 12-bit offset by access size.  */
@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
 
   /* Without the TARGET_SIMD instructions we cannot move a Q register
      to a Q register directly.  We need a scratch.  */
-  if (REG_P (x) && (mode == TFmode || mode == TImode) && mode == GET_MODE (x)
+  if (REG_P (x)
+      && (mode == TFmode || mode == TImode || mode == TDmode)
+      && mode == GET_MODE (x)
       && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
       && reg_class_subset_p (rclass, FP_REGS))
     {
@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
       return NO_REGS;
     }
 
-  /* A TFmode or TImode memory access should be handled via an FP_REGS
+  /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS
      because AArch64 has richer addressing modes for LDR/STR instructions
      than LDP/STP instructions.  */
   if (TARGET_FLOAT && rclass == GENERAL_REGS
       && known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))
     return FP_REGS;
 
-  if (rclass == FP_REGS && (mode == TImode || mode == TFmode) && CONSTANT_P(x))
+  if (rclass == FP_REGS
+      && (mode == TImode || mode == TFmode || mode == TDmode)
+      && CONSTANT_P(x))
       return GENERAL_REGS;
 
   return NO_REGS;
@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 		*cost += extra_cost->ldst.storev;
 	      else if (GET_MODE_CLASS (mode) == MODE_INT)
 		*cost += extra_cost->ldst.store;
-	      else if (mode == SFmode)
+	      else if (mode == SFmode || mode == SDmode)
 		*cost += extra_cost->ldst.storef;
-	      else if (mode == DFmode)
+	      else if (mode == DFmode || mode == DDmode)
 		*cost += extra_cost->ldst.stored;
 
 	      *cost +=
@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	  /* mov[df,sf]_aarch64.  */
 	  if (aarch64_float_const_representable_p (x))
 	    /* FMOV (scalar immediate).  */
-	    *cost += extra_cost->fp[mode == DFmode].fpconst;
+	    *cost += extra_cost->fp[mode == DFmode || mode == DDmode].fpconst;
 	  else if (!aarch64_float_const_zero_rtx_p (x))
 	    {
 	      /* This will be a load from memory.  */
-	      if (mode == DFmode)
+	      if (mode == DFmode || mode == DDmode)
 		*cost += extra_cost->ldst.loadd;
 	      else
 		*cost += extra_cost->ldst.loadf;
@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	    *cost += extra_cost->ldst.loadv;
 	  else if (GET_MODE_CLASS (mode) == MODE_INT)
 	    *cost += extra_cost->ldst.load;
-	  else if (mode == SFmode)
+	  else if (mode == SFmode || mode == SDmode)
 	    *cost += extra_cost->ldst.loadf;
-	  else if (mode == DFmode)
+	  else if (mode == DFmode || mode == DDmode)
 	    *cost += extra_cost->ldst.loadd;
 
 	  *cost +=
@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
 {
   /* Support CSE and rematerialization of common constants.  */
   if (CONST_INT_P (x)
-      || (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT))
+      || CONST_DOUBLE_P (x))
     return true;
 
   /* Only accept variable-length vector constants if they can be
@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
 	  field_t = long_double_type_node;
 	  field_ptr_t = long_double_ptr_type_node;
 	  break;
+	case SDmode:
+	  field_t = dfloat32_type_node;
+	  field_ptr_t = build_pointer_type (dfloat32_type_node);
+	  break;
+	case DDmode:
+	  field_t = dfloat64_type_node;
+	  field_ptr_t = build_pointer_type (dfloat64_type_node);
+	  break;
+	case TDmode:
+	  field_t = dfloat128_type_node;
+	  field_ptr_t = build_pointer_type (dfloat128_type_node);
+	  break;
 	case E_HFmode:
 	  field_t = aarch64_fp16_type_node;
 	  field_ptr_t = aarch64_fp16_ptr_type_node;
@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,
     case REAL_TYPE:
       mode = TYPE_MODE (type);
       if (mode != DFmode && mode != SFmode
-	  && mode != TFmode && mode != HFmode)
+	  && mode != TFmode && mode != HFmode
+	  && mode != SDmode && mode != DDmode && mode != TDmode)
 	return -1;
 
       if (*modep == VOIDmode)
@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
   machine_mode new_mode = VOIDmode;
   bool composite_p = aarch64_composite_type_p (type, mode);
 
-  if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)
+  if ((!composite_p
+       && (GET_MODE_CLASS (mode) == MODE_FLOAT
+	   || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT))
       || aarch64_short_vector_p (type, mode))
     {
       *count = 1;
@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
     }
 
   machine_mode vmode;
-  /* use a 64 bit mode for everything except for DI/DF mode, where we use
+  /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use
      a 128 bit vector mode.  */
   int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;
 
@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,
     base_off = (off_val_1 + off_val_3) / 2;
   else
     /* However, due to issues with negative LDP/STP offset generation for
-       larger modes, for DF, DI and vector modes. we must not use negative
+       larger modes, for DF, DD, DI and vector modes. we must not use negative
        addresses smaller than 9 signed unadjusted bits can store.  This
        provides the most range in this case.  */
     base_off = off_val_1;
@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)
 static bool
 aarch64_scalar_mode_supported_p (scalar_mode mode)
 {
+  if (DECIMAL_FLOAT_MODE_P (mode))
+    return default_decimal_float_supported_p ();
+
   return (mode == HFmode
 	  ? true
 	  : default_scalar_mode_supported_p (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a78476c8a..8757a962f 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1476,11 +1476,11 @@
    (set_attr "arch" "simd,fp16,simd,*,simd,*,simd,*,fp16,simd,*,*,*,*,*")]
 )
 
-(define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
-	(match_operand:SF 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
-  "TARGET_FLOAT && (register_operand (operands[0], SFmode)
-    || aarch64_reg_or_fp_zero (operands[1], SFmode))"
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
+	(match_operand:SFD 1 "general_operand"      "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    movi\\t%0.2s, #0
    fmov\\t%s0, %w1
@@ -1500,11 +1500,11 @@
    (set_attr "arch" "simd,*,*,*,*,simd,*,*,*,*,*,*")]
 )
 
-(define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
-	(match_operand:DF 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
-  "TARGET_FLOAT && (register_operand (operands[0], DFmode)
-    || aarch64_reg_or_fp_zero (operands[1], DFmode))"
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w  ,?r,w,w  ,w  ,w,m,r,m ,r,r")
+	(match_operand:DFD 1 "general_operand"      "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    movi\\t%d0, #0
    fmov\\t%d0, %x1
@@ -1545,13 +1545,13 @@
   }
 )
 
-(define_insn "*movtf_aarch64"
-  [(set (match_operand:TF 0
+(define_insn "*mov<mode>_aarch64"
+  [(set (match_operand:TFD 0
 	 "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
-	(match_operand:TF 1
+	(match_operand:TFD 1
 	 "general_operand"      " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]
-  "TARGET_FLOAT && (register_operand (operands[0], TFmode)
-    || aarch64_reg_or_fp_zero (operands[1], TFmode))"
+  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
+    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
   "@
    mov\\t%0.16b, %1.16b
    #
@@ -1571,8 +1571,8 @@
 )
 
 (define_split
-   [(set (match_operand:TF 0 "register_operand" "")
-	 (match_operand:TF 1 "nonmemory_operand" ""))]
+   [(set (match_operand:TFD 0 "register_operand" "")
+	 (match_operand:TFD 1 "nonmemory_operand" ""))]
   "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"
   [(const_int 0)]
   {
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 967e6b0b1..d0cd1b788 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -67,14 +67,24 @@
 (define_mode_iterator GPF_TF_F16 [HF SF DF TF])
 
 ;; Iterator for all scalar floating point modes suitable for moving, including
-;; special BF type (HF, SF, DF, TF and BF)
-(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])
+;; special BF type and decimal floating point types (HF, SF, DF, TF, BF,
+;; SD, DD and TD)
+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD])
+
+;; Iterator for scalar 32bit fp modes (SF, SD)
+(define_mode_iterator SFD [SD SF])
+
+;; Iterator for scalar 64bit fp modes (DF, DD)
+(define_mode_iterator DFD [DD DF])
+
+;; Iterator for scalar 128bit fp modes (TF, TD)
+(define_mode_iterator TFD [TD TF])
 
 ;; Double vector modes.
 (define_mode_iterator VDF [V2SF V4HF])
 
-;; Iterator for all scalar floating point modes (SF, DF and TF)
-(define_mode_iterator GPF_TF [SF DF TF])
+;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD)
+(define_mode_iterator GPF_TF [SF DF TF SD DD TD])
 
 ;; Integer Advanced SIMD modes.
 (define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])
@@ -301,7 +311,7 @@
 ;; 2 and 4 lane SI modes.
 (define_mode_iterator VS [V2SI V4SI])
 
-(define_mode_iterator TX [TI TF])
+(define_mode_iterator TX [TI TF TD])
 
 ;; Advanced SIMD opaque structure modes.
 (define_mode_iterator VSTRUCT [OI CI XI])
@@ -403,10 +413,10 @@
 				  V4x8HF V4x4SF V4x2DF V4x8BF])
 
 ;; Double scalar modes
-(define_mode_iterator DX [DI DF])
+(define_mode_iterator DX [DI DF DD])
 
 ;; Duplicate of the above
-(define_mode_iterator DX2 [DI DF])
+(define_mode_iterator DX2 [DI DF DD])
 
 ;; Single scalar modes
 (define_mode_iterator SX [SI SF])
-- 
2.33.0
[SME] Apply SME patches 2024-11-18 20:14:52 +08:00			`From 8394394bd26c7be6129b9a4e673d2a3530d9efde Mon Sep 17 00:00:00 2001`
			`From: Christophe Lyon <christophe.lyon@arm.com>`
			`Date: Fri, 11 Mar 2022 16:21:02 +0000`
			`Subject: [PATCH 057/157] [Backport][SME] aarch64: Add backend support for DFP`

			`Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=0dc8e1e7026d9b8ec8b669c051786d426a52cd22`

			`This patch updates the aarch64 backend as needed to support DFP modes`
			`(SD, DD and TD).`

			`Changes v1->v2:`

			`* Drop support for DFP modes in`
			`aarch64_gen_{load\|\|store}[wb]_pair as these are only used in`
			`prologue/epilogue where DFP modes are not used. Drop the`
			`changes to the corresponding patterns in aarch64.md, and`
			`useless GPF_PAIR iterator.`

			`* In aarch64_reinterpret_float_as_int, handle DDmode the same way`
			`as DFmode (needed in case the representation of the`
			`floating-point value can be loaded using mov/movk.`

			`* In aarch64_float_const_zero_rtx_p, reject constants with DFP`
			`mode: when X is zero, the callers want to emit either '0' or`
			`'zr' depending on the context, which is not the way 0.0 is`
			`represented in DFP mode (in particular fmov d0, #0 is not right`
			`for DFP).`

			`* In aarch64_legitimate_constant_p, accept DFP`

			`2022-03-31 Christophe Lyon <christophe.lyon@arm.com>`

			`gcc/`
			`* config/aarch64/aarch64.cc`
			`(aarch64_split_128bit_move): Handle DFP modes.`
			`(aarch64_mode_valid_for_sched_fusion_p): Likewise.`
			`(aarch64_classify_address): Likewise.`
			`(aarch64_legitimize_address_displacement): Likewise.`
			`(aarch64_reinterpret_float_as_int): Likewise.`
			`(aarch64_float_const_zero_rtx_p): Likewise.`
			`(aarch64_can_const_movi_rtx_p): Likewise.`
			`(aarch64_anchor_offset): Likewise.`
			`(aarch64_secondary_reload): Likewise.`
			`(aarch64_rtx_costs): Likewise.`
			`(aarch64_legitimate_constant_p): Likewise.`
			`(aarch64_gimplify_va_arg_expr): Likewise.`
			`(aapcs_vfp_sub_candidate): Likewise.`
			`(aarch64_vfp_is_call_or_return_candidate): Likewise.`
			`(aarch64_output_scalar_simd_mov_immediate): Likewise.`
			`(aarch64_gen_adjusted_ldpstp): Likewise.`
			`(aarch64_scalar_mode_supported_p): Accept DFP modes if enabled.`
			`* config/aarch64/aarch64.md`
			`(movsf_aarch64): Use SFD iterator and rename into`
			`mov<mode>_aarch64.`
			`(movdf_aarch64): Use DFD iterator and rename into`
			`mov<mode>_aarch64.`
			`(movtf_aarch64): Use TFD iterator and rename into`
			`mov<mode>_aarch64.`
			`(split pattern for move TF mode): Use TFD iterator.`
			`* config/aarch64/iterators.md`
			`(GPF_TF_F16_MOV): Add DFP modes.`
			`(SFD, DFD, TFD): New iterators.`
			`(GPF_TF): Add DFP modes.`
			`(TX, DX, DX2): Likewise.`
			`---`
			`gcc/config/aarch64/aarch64.cc \| 82 ++++++++++++++++++++++-----------`
			`gcc/config/aarch64/aarch64.md \| 34 +++++++-------`
			`gcc/config/aarch64/iterators.md \| 24 +++++++---`
			`3 files changed, 89 insertions(+), 51 deletions(-)`

			`diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc`
			`index 055b436b1..02210ed13 100644`
			`--- a/gcc/config/aarch64/aarch64.cc`
			`+++ b/gcc/config/aarch64/aarch64.cc`
			`@@ -5068,7 +5068,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)`

			`machine_mode mode = GET_MODE (dst);`

			`- gcc_assert (mode == TImode \|\| mode == TFmode);`
			`+ gcc_assert (mode == TImode \|\| mode == TFmode \|\| mode == TDmode);`
			`gcc_assert (!(side_effects_p (src) \|\| side_effects_p (dst)));`
			`gcc_assert (mode == GET_MODE (src) \|\| GET_MODE (src) == VOIDmode);`

			`@@ -10834,6 +10834,7 @@ aarch64_mode_valid_for_sched_fusion_p (machine_mode mode)`
			`{`
			`return mode == SImode \|\| mode == DImode`
			`\|\| mode == SFmode \|\| mode == DFmode`
			`+ \|\| mode == SDmode \|\| mode == DDmode`
			`\|\| (aarch64_vector_mode_supported_p (mode)`
			`&& (known_eq (GET_MODE_SIZE (mode), 8)`
			`\|\| (known_eq (GET_MODE_SIZE (mode), 16)`
			`@@ -10876,12 +10877,13 @@ aarch64_classify_address (struct aarch64_address_info *info,`
			`vec_flags &= ~VEC_PARTIAL;`

			`/* On BE, we use load/store pair for all large int mode load/stores.`
			`- TI/TFmode may also use a load/store pair. */`
			`+ TI/TF/TDmode may also use a load/store pair. */`
			`bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD \| VEC_STRUCT));`
			`bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP`
			`\|\| type == ADDR_QUERY_LDP_STP_N`
			`\|\| mode == TImode`
			`\|\| mode == TFmode`
			`+ \|\| mode == TDmode`
			`\|\| (BYTES_BIG_ENDIAN && advsimd_struct_p));`
			`/* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode`
			`corresponds to the actual size of the memory being loaded/stored and the`
			`@@ -10955,7 +10957,7 @@ aarch64_classify_address (struct aarch64_address_info *info,`
			`info->offset = op1;`
			`info->const_offset = offset;`

			`- /* TImode and TFmode values are allowed in both pairs of X`
			`+ /* TImode, TFmode and TDmode values are allowed in both pairs of X`
			`registers and individual Q registers. The available`
			`address modes are:`
			`X,X: 7-bit signed scaled offset`
			`@@ -10964,7 +10966,7 @@ aarch64_classify_address (struct aarch64_address_info *info,`
			`When performing the check for pairs of X registers i.e. LDP/STP`
			`pass down DImode since that is the natural size of the LDP/STP`
			`instruction memory accesses. */`
			`- if (mode == TImode \|\| mode == TFmode)`
			`+ if (mode == TImode \|\| mode == TFmode \|\| mode == TDmode)`
			`return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)`
			`&& (aarch64_offset_9bit_signed_unscaled_p (mode, offset)`
			`\|\| offset_12bit_unsigned_scaled_p (mode, offset)));`
			`@@ -11087,14 +11089,14 @@ aarch64_classify_address (struct aarch64_address_info *info,`
			`info->offset = XEXP (XEXP (x, 1), 1);`
			`info->const_offset = offset;`

			`- /* TImode and TFmode values are allowed in both pairs of X`
			`+ /* TImode, TFmode and TDmode values are allowed in both pairs of X`
			`registers and individual Q registers. The available`
			`address modes are:`
			`X,X: 7-bit signed scaled offset`
			`Q: 9-bit signed offset`
			`We conservatively require an offset representable in either mode.`
			`*/`
			`- if (mode == TImode \|\| mode == TFmode)`
			`+ if (mode == TImode \|\| mode == TFmode \|\| mode == TDmode)`
			`return (aarch64_offset_7bit_signed_scaled_p (mode, offset)`
			`&& aarch64_offset_9bit_signed_unscaled_p (mode, offset));`

			`@@ -11256,9 +11258,9 @@ aarch64_legitimize_address_displacement (rtx offset1, rtx offset2,`
			`offset. Use 4KB range for 1- and 2-byte accesses and a 16KB`
			`range otherwise to increase opportunities for sharing the base`
			`address of different sizes. Unaligned accesses use the signed`
			`- 9-bit range, TImode/TFmode use the intersection of signed`
			`+ 9-bit range, TImode/TFmode/TDmode use the intersection of signed`
			`scaled 7-bit and signed 9-bit offset. */`
			`- if (mode == TImode \|\| mode == TFmode)`
			`+ if (mode == TImode \|\| mode == TFmode \|\| mode == TDmode)`
			`second_offset = ((const_offset + 0x100) & 0x1f8) - 0x100;`
			`else if ((const_offset & (size - 1)) != 0)`
			`second_offset = ((const_offset + 0x100) & 0x1ff) - 0x100;`
			`@@ -11339,7 +11341,7 @@ aarch64_reinterpret_float_as_int (rtx value, unsigned HOST_WIDE_INT *intval)`
			`CONST_DOUBLE_REAL_VALUE (value),`
			`REAL_MODE_FORMAT (mode));`

			`- if (mode == DFmode)`
			`+ if (mode == DFmode \|\| mode == DDmode)`
			`{`
			`int order = BYTES_BIG_ENDIAN ? 1 : 0;`
			`ival = zext_hwi (res[order], 32);`
			`@@ -11380,11 +11382,15 @@ aarch64_float_const_rtx_p (rtx x)`
			`return false;`
			`}`

			`-/* Return TRUE if rtx X is immediate constant 0.0 */`
			`+/* Return TRUE if rtx X is immediate constant 0.0 (but not in Decimal`
			`+ Floating Point). */`
			`bool`
			`aarch64_float_const_zero_rtx_p (rtx x)`
			`{`
			`- if (GET_MODE (x) == VOIDmode)`
			`+ /* 0.0 in Decimal Floating Point cannot be represented by #0 or`
			`+ zr as our callers expect, so no need to check the actual`
			`+ value if X is of Decimal Floating Point type. */`
			`+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_DECIMAL_FLOAT)`
			`return false;`

			`if (REAL_VALUE_MINUS_ZERO (*CONST_DOUBLE_REAL_VALUE (x)))`
			`@@ -11422,7 +11428,7 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)`
			`else`
			`return false;`

			`- /* use a 64 bit mode for everything except for DI/DF mode, where we use`
			`+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use`
			`a 128 bit vector mode. */`
			`int width = GET_MODE_BITSIZE (imode) == 64 ? 128 : 64;`

			`@@ -12628,7 +12634,7 @@ aarch64_anchor_offset (HOST_WIDE_INT offset, HOST_WIDE_INT size,`
			`if (IN_RANGE (offset, -256, 0))`
			`return 0;`

			`- if (mode == TImode \|\| mode == TFmode)`
			`+ if (mode == TImode \|\| mode == TFmode \|\| mode == TDmode)`
			`return (offset + 0x100) & ~0x1ff;`

			`/* Use 12-bit offset by access size. */`
			`@@ -12737,7 +12743,9 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,`

			`/* Without the TARGET_SIMD instructions we cannot move a Q register`
			`to a Q register directly. We need a scratch. */`
			`- if (REG_P (x) && (mode == TFmode \|\| mode == TImode) && mode == GET_MODE (x)`
			`+ if (REG_P (x)`
			`+ && (mode == TFmode \|\| mode == TImode \|\| mode == TDmode)`
			`+ && mode == GET_MODE (x)`
			`&& FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD`
			`&& reg_class_subset_p (rclass, FP_REGS))`
			`{`
			`@@ -12745,14 +12753,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,`
			`return NO_REGS;`
			`}`

			`- /* A TFmode or TImode memory access should be handled via an FP_REGS`
			`+ /* A TFmode, TImode or TDmode memory access should be handled via an FP_REGS`
			`because AArch64 has richer addressing modes for LDR/STR instructions`
			`than LDP/STP instructions. */`
			`if (TARGET_FLOAT && rclass == GENERAL_REGS`
			`&& known_eq (GET_MODE_SIZE (mode), 16) && MEM_P (x))`
			`return FP_REGS;`

			`- if (rclass == FP_REGS && (mode == TImode \|\| mode == TFmode) && CONSTANT_P(x))`
			`+ if (rclass == FP_REGS`
			`+ && (mode == TImode \|\| mode == TFmode \|\| mode == TDmode)`
			`+ && CONSTANT_P(x))`
			`return GENERAL_REGS;`

			`return NO_REGS;`
			`@@ -13883,9 +13893,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,`
			`*cost += extra_cost->ldst.storev;`
			`else if (GET_MODE_CLASS (mode) == MODE_INT)`
			`*cost += extra_cost->ldst.store;`
			`- else if (mode == SFmode)`
			`+ else if (mode == SFmode \|\| mode == SDmode)`
			`*cost += extra_cost->ldst.storef;`
			`- else if (mode == DFmode)`
			`+ else if (mode == DFmode \|\| mode == DDmode)`
			`*cost += extra_cost->ldst.stored;`

			`*cost +=`
			`@@ -14009,11 +14019,11 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,`
			`/* mov[df,sf]_aarch64. */`
			`if (aarch64_float_const_representable_p (x))`
			`/* FMOV (scalar immediate). */`
			`- *cost += extra_cost->fp[mode == DFmode].fpconst;`
			`+ *cost += extra_cost->fp[mode == DFmode \|\| mode == DDmode].fpconst;`
			`else if (!aarch64_float_const_zero_rtx_p (x))`
			`{`
			`/* This will be a load from memory. */`
			`- if (mode == DFmode)`
			`+ if (mode == DFmode \|\| mode == DDmode)`
			`*cost += extra_cost->ldst.loadd;`
			`else`
			`*cost += extra_cost->ldst.loadf;`
			`@@ -14039,9 +14049,9 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,`
			`*cost += extra_cost->ldst.loadv;`
			`else if (GET_MODE_CLASS (mode) == MODE_INT)`
			`*cost += extra_cost->ldst.load;`
			`- else if (mode == SFmode)`
			`+ else if (mode == SFmode \|\| mode == SDmode)`
			`*cost += extra_cost->ldst.loadf;`
			`- else if (mode == DFmode)`
			`+ else if (mode == DFmode \|\| mode == DDmode)`
			`*cost += extra_cost->ldst.loadd;`

			`*cost +=`
			`@@ -19623,7 +19633,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)`
			`{`
			`/* Support CSE and rematerialization of common constants. */`
			`if (CONST_INT_P (x)`
			`- \|\| (CONST_DOUBLE_P (x) && GET_MODE_CLASS (mode) == MODE_FLOAT))`
			`+ \|\| CONST_DOUBLE_P (x))`
			`return true;`

			`/* Only accept variable-length vector constants if they can be`
			`@@ -20064,6 +20074,18 @@ aarch64_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,`
			`field_t = long_double_type_node;`
			`field_ptr_t = long_double_ptr_type_node;`
			`break;`
			`+ case SDmode:`
			`+ field_t = dfloat32_type_node;`
			`+ field_ptr_t = build_pointer_type (dfloat32_type_node);`
			`+ break;`
			`+ case DDmode:`
			`+ field_t = dfloat64_type_node;`
			`+ field_ptr_t = build_pointer_type (dfloat64_type_node);`
			`+ break;`
			`+ case TDmode:`
			`+ field_t = dfloat128_type_node;`
			`+ field_ptr_t = build_pointer_type (dfloat128_type_node);`
			`+ break;`
			`case E_HFmode:`
			`field_t = aarch64_fp16_type_node;`
			`field_ptr_t = aarch64_fp16_ptr_type_node;`
			`@@ -20315,7 +20337,8 @@ aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep,`
			`case REAL_TYPE:`
			`mode = TYPE_MODE (type);`
			`if (mode != DFmode && mode != SFmode`
			`- && mode != TFmode && mode != HFmode)`
			`+ && mode != TFmode && mode != HFmode`
			`+ && mode != SDmode && mode != DDmode && mode != TDmode)`
			`return -1;`

			`if (*modep == VOIDmode)`
			`@@ -20631,7 +20654,9 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,`
			`machine_mode new_mode = VOIDmode;`
			`bool composite_p = aarch64_composite_type_p (type, mode);`

			`- if ((!composite_p && GET_MODE_CLASS (mode) == MODE_FLOAT)`
			`+ if ((!composite_p`
			`+ && (GET_MODE_CLASS (mode) == MODE_FLOAT`
			`+ \|\| GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT))`
			`\|\| aarch64_short_vector_p (type, mode))`
			`{`
			`*count = 1;`
			`@@ -23565,7 +23590,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)`
			`}`

			`machine_mode vmode;`
			`- /* use a 64 bit mode for everything except for DI/DF mode, where we use`
			`+ /* use a 64 bit mode for everything except for DI/DF/DD mode, where we use`
			`a 128 bit vector mode. */`
			`int width = GET_MODE_BITSIZE (mode) == 64 ? 128 : 64;`

			`@@ -26417,7 +26442,7 @@ aarch64_gen_adjusted_ldpstp (rtx *operands, bool load,`
			`base_off = (off_val_1 + off_val_3) / 2;`
			`else`
			`/* However, due to issues with negative LDP/STP offset generation for`
			`- larger modes, for DF, DI and vector modes. we must not use negative`
			`+ larger modes, for DF, DD, DI and vector modes. we must not use negative`
			`addresses smaller than 9 signed unadjusted bits can store. This`
			`provides the most range in this case. */`
			`base_off = off_val_1;`
			`@@ -26695,6 +26720,9 @@ aarch64_libgcc_floating_mode_supported_p (scalar_float_mode mode)`
			`static bool`
			`aarch64_scalar_mode_supported_p (scalar_mode mode)`
			`{`
			`+ if (DECIMAL_FLOAT_MODE_P (mode))`
			`+ return default_decimal_float_supported_p ();`
			`+`
			`return (mode == HFmode`
			`? true`
			`: default_scalar_mode_supported_p (mode));`
			`diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md`
			`index a78476c8a..8757a962f 100644`
			`--- a/gcc/config/aarch64/aarch64.md`
			`+++ b/gcc/config/aarch64/aarch64.md`
			`@@ -1476,11 +1476,11 @@`
			`(set_attr "arch" "simd,fp16,simd,,simd,,simd,,fp16,simd,,,,,")]`
			`)`

			`-(define_insn "*movsf_aarch64"`
			`- [(set (match_operand:SF 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")`
			`- (match_operand:SF 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]`
			`- "TARGET_FLOAT && (register_operand (operands[0], SFmode)`
			`- \|\| aarch64_reg_or_fp_zero (operands[1], SFmode))"`
			`+(define_insn "*mov<mode>_aarch64"`
			`+ [(set (match_operand:SFD 0 "nonimmediate_operand" "=w,w ,?r,w,w ,w ,w,m,r,m ,r,r")`
			`+ (match_operand:SFD 1 "general_operand" "Y ,?rY, w,w,Ufc,Uvi,m,w,m,rY,r,M"))]`
			`+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)`
			`+ \|\| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"`
			`"@`
			`movi\\t%0.2s, #0`
			`fmov\\t%s0, %w1`
			`@@ -1500,11 +1500,11 @@`
			`(set_attr "arch" "simd,,,,,simd,,,,,,")]`
			`)`

			`-(define_insn "*movdf_aarch64"`
			`- [(set (match_operand:DF 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")`
			`- (match_operand:DF 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]`
			`- "TARGET_FLOAT && (register_operand (operands[0], DFmode)`
			`- \|\| aarch64_reg_or_fp_zero (operands[1], DFmode))"`
			`+(define_insn "*mov<mode>_aarch64"`
			`+ [(set (match_operand:DFD 0 "nonimmediate_operand" "=w, w ,?r,w,w ,w ,w,m,r,m ,r,r")`
			`+ (match_operand:DFD 1 "general_operand" "Y , ?rY, w,w,Ufc,Uvi,m,w,m,rY,r,N"))]`
			`+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)`
			`+ \|\| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"`
			`"@`
			`movi\\t%d0, #0`
			`fmov\\t%d0, %x1`
			`@@ -1545,13 +1545,13 @@`
			`}`
			`)`

			`-(define_insn "*movtf_aarch64"`
			`- [(set (match_operand:TF 0`
			`+(define_insn "*mov<mode>_aarch64"`
			`+ [(set (match_operand:TFD 0`
			`"nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m")`
			`- (match_operand:TF 1`
			`+ (match_operand:TFD 1`
			`"general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))]`
			`- "TARGET_FLOAT && (register_operand (operands[0], TFmode)`
			`- \|\| aarch64_reg_or_fp_zero (operands[1], TFmode))"`
			`+ "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)`
			`+ \|\| aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"`
			`"@`
			`mov\\t%0.16b, %1.16b`
			`#`
			`@@ -1571,8 +1571,8 @@`
			`)`

			`(define_split`
			`- [(set (match_operand:TF 0 "register_operand" "")`
			`- (match_operand:TF 1 "nonmemory_operand" ""))]`
			`+ [(set (match_operand:TFD 0 "register_operand" "")`
			`+ (match_operand:TFD 1 "nonmemory_operand" ""))]`
			`"reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])"`
			`[(const_int 0)]`
			`{`
			`diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md`
			`index 967e6b0b1..d0cd1b788 100644`
			`--- a/gcc/config/aarch64/iterators.md`
			`+++ b/gcc/config/aarch64/iterators.md`
			`@@ -67,14 +67,24 @@`
			`(define_mode_iterator GPF_TF_F16 [HF SF DF TF])`

			`;; Iterator for all scalar floating point modes suitable for moving, including`
			`-;; special BF type (HF, SF, DF, TF and BF)`
			`-(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF])`
			`+;; special BF type and decimal floating point types (HF, SF, DF, TF, BF,`
			`+;; SD, DD and TD)`
			`+(define_mode_iterator GPF_TF_F16_MOV [HF BF SF DF TF SD DD TD])`
			`+`
			`+;; Iterator for scalar 32bit fp modes (SF, SD)`
			`+(define_mode_iterator SFD [SD SF])`
			`+`
			`+;; Iterator for scalar 64bit fp modes (DF, DD)`
			`+(define_mode_iterator DFD [DD DF])`
			`+`
			`+;; Iterator for scalar 128bit fp modes (TF, TD)`
			`+(define_mode_iterator TFD [TD TF])`

			`;; Double vector modes.`
			`(define_mode_iterator VDF [V2SF V4HF])`

			`-;; Iterator for all scalar floating point modes (SF, DF and TF)`
			`-(define_mode_iterator GPF_TF [SF DF TF])`
			`+;; Iterator for all scalar floating point modes (SF, DF, TF, SD, DD, and TD)`
			`+(define_mode_iterator GPF_TF [SF DF TF SD DD TD])`

			`;; Integer Advanced SIMD modes.`
			`(define_mode_iterator VDQ_I [V8QI V16QI V4HI V8HI V2SI V4SI V2DI])`
			`@@ -301,7 +311,7 @@`
			`;; 2 and 4 lane SI modes.`
			`(define_mode_iterator VS [V2SI V4SI])`

			`-(define_mode_iterator TX [TI TF])`
			`+(define_mode_iterator TX [TI TF TD])`

			`;; Advanced SIMD opaque structure modes.`
			`(define_mode_iterator VSTRUCT [OI CI XI])`
			`@@ -403,10 +413,10 @@`
			`V4x8HF V4x4SF V4x2DF V4x8BF])`

			`;; Double scalar modes`
			`-(define_mode_iterator DX [DI DF])`
			`+(define_mode_iterator DX [DI DF DD])`

			`;; Duplicate of the above`
			`-(define_mode_iterator DX2 [DI DF])`
			`+(define_mode_iterator DX2 [DI DF DD])`

			`;; Single scalar modes`
			`(define_mode_iterator SX [SI SF])`
			`--`
			`2.33.0`