76 lines
2.8 KiB
Diff
76 lines
2.8 KiB
Diff
From 90518c07dfb770b680fd8bdba76dd1b39103277d Mon Sep 17 00:00:00 2001
|
|
From: Richard Biener <rguenther@suse.de>
|
|
Date: Fri, 10 Nov 2023 12:39:11 +0100
|
|
Subject: [PATCH 151/157] [Backport][SME] tree-optimization/110221 - SLP and
|
|
loop mask/len
|
|
|
|
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169
|
|
|
|
The following fixes the issue that when SLP stmts are internal defs
|
|
but appear invariant because they end up only using invariant defs
|
|
then they get scheduled outside of the loop. This nice optimization
|
|
breaks down when loop masks or lens are applied since those are not
|
|
explicitly tracked as dependences. The following makes sure to never
|
|
schedule internal defs outside of the vectorized loop when the
|
|
loop uses masks/lens.
|
|
|
|
PR tree-optimization/110221
|
|
* tree-vect-slp.cc (vect_schedule_slp_node): When loop
|
|
masking / len is applied make sure to not schedule
|
|
intenal defs outside of the loop.
|
|
|
|
* gfortran.dg/pr110221.f: New testcase.
|
|
---
|
|
gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++
|
|
gcc/tree-vect-slp.cc | 10 ++++++++++
|
|
2 files changed, 27 insertions(+)
|
|
create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f
|
|
|
|
diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f
|
|
new file mode 100644
|
|
index 000000000..8b5738431
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gfortran.dg/pr110221.f
|
|
@@ -0,0 +1,17 @@
|
|
+C PR middle-end/68146
|
|
+C { dg-do compile }
|
|
+C { dg-options "-O2 -w" }
|
|
+C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } }
|
|
+ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY)
|
|
+ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y)
|
|
+ IMPLICIT COMPLEX*16 (C,Z)
|
|
+ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*)
|
|
+ N=INT(V)
|
|
+ CALL GAMMA2(VG,GA)
|
|
+ DO 65 K=1,N
|
|
+ CBY(K)=CYY
|
|
+65 CONTINUE
|
|
+ CDJ(0)=V0/Z*CBJ(0)-CBJ(1)
|
|
+ DO 70 K=1,N
|
|
+70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1)
|
|
+ END
|
|
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
|
|
index d02f0ce37..e3e246977 100644
|
|
--- a/gcc/tree-vect-slp.cc
|
|
+++ b/gcc/tree-vect-slp.cc
|
|
@@ -8531,6 +8531,16 @@ vect_schedule_slp_node (vec_info *vinfo,
|
|
/* Emit other stmts after the children vectorized defs which is
|
|
earliest possible. */
|
|
gimple *last_stmt = NULL;
|
|
+ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
|
|
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
|
|
+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
|
|
+ {
|
|
+ /* But avoid scheduling internal defs outside of the loop when
|
|
+ we might have only implicitly tracked loop mask/len defs. */
|
|
+ gimple_stmt_iterator si
|
|
+ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
|
|
+ last_stmt = *si;
|
|
+ }
|
|
bool seen_vector_def = false;
|
|
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
|
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
|
|
--
|
|
2.33.0
|
|
|