[DAGCombiner] Improved target independent vector shuffle combine rule. This patch improves the existing algorithm in DAGCombiner that attempts to fold shuffles according to rule: shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3) Before this change, there were cases where the DAGCombiner conservatively avoided folding shuffles even if the resulting mask would have been legal. That is because the algorithm wrongly assumed that commuting an illegal shuffle mask would always produce an illegal mask. With this change, we now correctly compute the commuted shuffle mask before calling method 'isShuffleMaskLegal' on it. On X86, this improves for example the codegen for the following function: define <4 x i32> @test(<4 x i32> %A, <4 x i32> %B) { %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7> %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> ret <4 x i32> %2 } Before this change the X86 backend (-mcpu=corei7) generated the following assembly code for function @test: shufps $-23, %xmm0, %xmm1 # xmm1 = xmm1[1,2],xmm0[2,3] movhlps %xmm1, %xmm1 # xmm1 = xmm1[1,1] movaps %xmm1, %xmm0 Now we produce: movhlps %xmm0, %xmm0 # xmm0 = xmm0[1,1] Added extra test cases in combine-vec-shuffle-2.ll to verify that we correctly fold according to the above-mentioned rule. llvm-svn: 215555

commit: ace8e1e3d44399de3933b068726eb90b11578ccf [log] [tgz]
author: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> Wed Aug 13 16:09:40 2014 +0000
committer: Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net> Wed Aug 13 16:09:40 2014 +0000
tree: 15e9a0bb4aed8533a656eee10c013156c95d4ad0
parent: f67672e41c8972f7a0e9f503e0cf58b21af24788 [diff]
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e8cbd8a..603ccb0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

@@ -10838,16 +10838,30 @@
 
     // It may still be beneficial to combine the two shuffles if the
     // resulting shuffle is legal.
-    if (TLI.isTypeLegal(VT) && TLI.isShuffleMaskLegal(Mask, VT)) {
-      if (!CommuteOperands)
-        // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
-        // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
-        return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
-                                    &Mask[0]);
-      
-      //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3)
-      return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1),
-                                  &Mask[0]);
+    if (TLI.isTypeLegal(VT)) {
+      if (!CommuteOperands) {
+        if (TLI.isShuffleMaskLegal(Mask, VT))
+          // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3).
+          // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3)
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1,
+                                      &Mask[0]);
+      } else {
+        // Compute the commuted shuffle mask.
+        for (unsigned i = 0; i != NumElts; ++i) {
+          int idx = Mask[i];
+          if (idx < 0)
+            continue;
+          else if (idx < (int)NumElts)
+            Mask[i] = idx + NumElts;
+          else
+            Mask[i] = idx - NumElts;
+        }
+
+        if (TLI.isShuffleMaskLegal(Mask, VT))
+          //   shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(y, undef, M3)
+          return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(1), N1,
+                                      &Mask[0]);
+      }
     }
   }
 

diff --git a/llvm/test/CodeGen/X86/combine-vec-shuffle-2.ll b/llvm/test/CodeGen/X86/combine-vec-shuffle-2.ll
index 877d382..872384c 100644
--- a/llvm/test/CodeGen/X86/combine-vec-shuffle-2.ll
+++ b/llvm/test/CodeGen/X86/combine-vec-shuffle-2.ll

@@ -204,8 +204,8 @@
   ret <4 x i32> %2
 }
 ; CHECK-LABEL: test18
-; CHECK: blendps $11
-; CHECK-NEXT: pshufd $-59
+; CHECK-NOT: blendps
+; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,0,3]
 ; CHECK-NEXT: ret
 
 define <4 x i32> @test19(<4 x i32> %A, <4 x i32> %B) {
@@ -240,6 +240,8 @@
 ; CHECK-NEXT: pshufd $-60
 ; CHECK-NEXT: ret
 
+; Test that we correctly combine shuffles according to rule
+;  shuffle(shuffle(x, y), undef) -> shuffle(y, undef)
 
 define <4 x i32> @test22(<4 x i32> %A, <4 x i32> %B) {
   %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
@@ -247,7 +249,69 @@
   ret <4 x i32> %2
 }
 ; CHECK-LABEL: test22
-; CHECK: blendps $11
-; CHECK-NEXT: pshufd $-43
+; CHECK-NOT: blendps
+; CHECK: pshufd {{.*}} # xmm0 = xmm1[1,1,1,3]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test23(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test23
+; CHECK-NOT: blendps
+; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,1,0,3]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test24(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 2, i32 4>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test24
+; CHECK-NOT: blendps
+; CHECK: pshufd {{.*}} # xmm0 = xmm1[0,3,2,0]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test25(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 5, i32 2, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 3, i32 1>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test25
+; CHECK-NOT:  shufps
+; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test26(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 6, i32 7>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test26
+; CHECK-NOT: shufps
+; CHECK: movhlps {{.*}} # xmm0 = xmm0[1,1]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test27(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 2, i32 1, i32 5, i32 4>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test27
+; CHECK-NOT: shufps
+; CHECK-NOT: movhlps
+; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,0,1]
+; CHECK-NEXT: ret
+
+define <4 x i32> @test28(<4 x i32> %A, <4 x i32> %B) {
+  %1 = shufflevector <4 x i32> %B, <4 x i32> %A, <4 x i32> <i32 1, i32 2, i32 4, i32 5>
+  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 2>
+  ret <4 x i32> %2
+}
+; CHECK-LABEL: test28
+; CHECK-NOT: shufps
+; CHECK-NOT: movhlps
+; CHECK: pshufd {{.*}} # xmm0 = xmm0[0,1,1,0]
 ; CHECK-NEXT: ret
commit	ace8e1e3d44399de3933b068726eb90b11578ccf	[log] [tgz]
author	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	Wed Aug 13 16:09:40 2014 +0000
committer	Andrea Di Biagio <Andrea_DiBiagio@sn.scee.net>	Wed Aug 13 16:09:40 2014 +0000
tree	15e9a0bb4aed8533a656eee10c013156c95d4ad0
parent	f67672e41c8972f7a0e9f503e0cf58b21af24788 [diff]