[RISCVInsertVSETVLI] Remove an unsound optimization This fixes a bug reported privately by @craig.topper. Here's an example which illustrates the problem: vsetivli a1, a0, e32, m1, ta, mu # both DefInfo and PrevInfo vsetivli a2, a1, e32, m4, ta, mu With the unsound result being: vsetivli a1, a0, e32, m1, ta, mu vsetivli a2, a0, e32, m4, ta, mu Consider the case where this is running on a machine with VLEN=512,. For this case, the VLMAXs are 16 and 64 respectively. Consider for a0 = 33. The correct result is: a1 = 16, and a2 = 16 After the unsound optimization: a1 = 16 and a2 = 33 This particular example used VLMAXs which differed by more than a power of two. With a difference of only one power of two, there's another form of this bug which involves the AVL < 2 x VLMAX special case, but that ones more complicated to construct as many examples turn out accidentally sound. This patch takes the approach of simply removing the unsound optimization, but there are multiple sound sub-cases of it. I plan to return to at least a couple of them, but figured it was cleaner to remove the unsound optimization (for ease of backporting), and then review the new optimizations on their own. Differential Revision: https://reviews.llvm.org/D131264 GitOrigin-RevId: 9a9848f4b95895ad97d3dc117f0a94773dc1607f

commit: e87e9b82b5fed9bf7860584bf29c679614edf319 [log] [tgz]
author: Philip Reames <preames@rivosinc.com> Fri Aug 05 12:08:03 2022 -0700
committer: Copybara-Service <copybara-worker@google.com> Fri Aug 05 12:17:35 2022 -0700
tree: 1055292a3f21d3af6fd4b70b5250bd7ff819cebf
parent: 45878b26f39a5e5a25d0f2e71d475ae7eec1aa7d [diff]
diff --git a/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index fc0a983..5d9bd2f 100644
--- a/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/lib/Target/RISCV/RISCVInsertVSETVLI.cpp

@@ -1022,16 +1022,10 @@
     return;
   }
 
-  // Two cases involving an AVL resulting from a previous vsetvli.
-  // 1) If the AVL is the result of a previous vsetvli which has the
-  //    same AVL and VLMAX as our current state, we can reuse the AVL
-  //    from the current state for the new one.  This allows us to
-  //    generate 'vsetvli x0, x0, vtype" or possible skip the transition
-  //    entirely.
-  // 2) If AVL is defined by a vsetvli with the same VLMAX, we can
-  //    replace the AVL operand with the AVL of the defining vsetvli.
-  //    We avoid general register AVLs to avoid extending live ranges
-  //    without being sure we can kill the original source reg entirely.
+  // If AVL is defined by a vsetvli with the same VLMAX, we can
+  // replace the AVL operand with the AVL of the defining vsetvli.
+  // We avoid general register AVLs to avoid extending live ranges
+  // without being sure we can kill the original source reg entirely.
   if (!Info.hasAVLReg() || !Info.getAVLReg().isVirtual())
     return;
   MachineInstr *DefMI = MRI->getVRegDef(Info.getAVLReg());
@@ -1039,17 +1033,6 @@
     return;
 
   VSETVLIInfo DefInfo = getInfoForVSETVLI(*DefMI);
-  // case 1
-  if (PrevInfo.isValid() && !PrevInfo.isUnknown() &&
-      DefInfo.hasSameAVL(PrevInfo) &&
-      DefInfo.hasSameVLMAX(PrevInfo)) {
-    if (PrevInfo.hasAVLImm())
-      Info.setAVLImm(PrevInfo.getAVLImm());
-    else
-      Info.setAVLReg(PrevInfo.getAVLReg());
-    return;
-  }
-  // case 2
   if (DefInfo.hasSameVLMAX(Info) &&
       (DefInfo.hasAVLImm() || DefInfo.getAVLReg() == RISCV::X0)) {
     if (DefInfo.hasAVLImm())

diff --git a/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
index 2184925..65d1bf6 100644
--- a/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll
+++ b/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll

@@ -837,7 +837,7 @@
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    andi a1, a0, 1
 ; CHECK-NEXT:    vsetvli a0, zero, e64, m1, ta, mu
-; CHECK-NEXT:    vsetvli a2, zero, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a0, e32, m1, ta, mu
 ; CHECK-NEXT:    vadd.vv v8, v8, v9
 ; CHECK-NEXT:    beqz a1, .LBB18_2
 ; CHECK-NEXT:  # %bb.1: # %if

diff --git a/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
index d3c3712..728139b 100644
--- a/test/CodeGen/RISCV/rvv/vsetvli-insert.ll
+++ b/test/CodeGen/RISCV/rvv/vsetvli-insert.ll

@@ -18,6 +18,7 @@
 define <vscale x 1 x double> @test1(i64 %avl, <vscale x 1 x double> %a, <vscale x 1 x double> %b) nounwind {
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a0, a0, e32, mf2, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a0, e64, m1, ta, mu
 ; CHECK-NEXT:    vfadd.vv v8, v8, v9
 ; CHECK-NEXT:    ret
@@ -412,7 +413,7 @@
 ; CHECK-LABEL: avl_forward1b_neg:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vsetivli a1, 6, e16, m1, ta, mu
-; CHECK-NEXT:    vsetivli zero, 6, e32, m1, ta, mu
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    ret
@@ -467,6 +468,7 @@
 define void @avl_forward4(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
 ; CHECK-LABEL: avl_forward4:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e16, m1, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
 ; CHECK-NEXT:    ret
@@ -480,10 +482,10 @@
 define i64 @avl_forward4b(<vscale x 2 x i32> %v, <vscale x 2 x i32>* %p, i64 %reg) nounwind {
 ; CHECK-LABEL: avl_forward4b:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetvli a2, a1, e16, m1, ta, mu
+; CHECK-NEXT:    vsetvli a1, a1, e16, m1, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    mv a0, a2
+; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    ret
 entry:
   %vl = tail call i64 @llvm.riscv.vsetvli(i64 %reg, i64 1, i64 0)
@@ -496,6 +498,7 @@
 define <vscale x 1 x i64> @vleNff(i64* %str, i64 %n, i64 %x) {
 ; CHECK-LABEL: vleNff:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
 ; CHECK-NEXT:    vsetvli zero, zero, e64, m1, tu, mu
@@ -516,6 +519,7 @@
 define <vscale x 1 x i64> @vleNff2(i64* %str, i64 %n, i64 %x) {
 ; CHECK-LABEL: vleNff2:
 ; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, mu
 ; CHECK-NEXT:    vle64ff.v v8, (a0)
 ; CHECK-NEXT:    vadd.vx v8, v8, a2
@@ -541,6 +545,7 @@
 ; CHECK-LABEL: avl_forward5:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a1, 32
+; CHECK-NEXT:    vsetvli a1, a1, e8, m4, ta, mu
 ; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
 ; CHECK-NEXT:    vle32.v v8, (a0)
 ; CHECK-NEXT:    ret
commit	e87e9b82b5fed9bf7860584bf29c679614edf319	[log] [tgz]
author	Philip Reames <preames@rivosinc.com>	Fri Aug 05 12:08:03 2022 -0700
committer	Copybara-Service <copybara-worker@google.com>	Fri Aug 05 12:17:35 2022 -0700
tree	1055292a3f21d3af6fd4b70b5250bd7ff819cebf
parent	45878b26f39a5e5a25d0f2e71d475ae7eec1aa7d [diff]