[X86] More aggressive shuffle mask widening in combineExtractWithShuffle
Use demanded extract index to set most of the shuffle mask to undef, making it easier to widen and peek through.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351013 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 06802aa..843d287 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -33691,6 +33691,15 @@
scaleShuffleMask<int>(Scale, Mask, ScaledMask);
Mask = std::move(ScaledMask);
} else if ((Mask.size() % NumSrcElts) == 0) {
+ // Simplify Mask based on demanded element.
+ int ExtractIdx = (int)N->getConstantOperandVal(1);
+ int Scale = Mask.size() / NumSrcElts;
+ int Lo = Scale * ExtractIdx;
+ int Hi = Scale * (ExtractIdx + 1);
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i)
+ if (i < Lo || Hi <= i)
+ Mask[i] = SM_SentinelUndef;
+
SmallVector<int, 16> WidenedMask;
while (Mask.size() > NumSrcElts &&
canWidenShuffleElements(Mask, WidenedMask))
diff --git a/test/CodeGen/X86/and-load-fold.ll b/test/CodeGen/X86/and-load-fold.ll
index 27520f8..367ef2a 100644
--- a/test/CodeGen/X86/and-load-fold.ll
+++ b/test/CodeGen/X86/and-load-fold.ll
@@ -7,8 +7,7 @@
; CHECK-LABEL: foo:
; CHECK: # %bb.0:
; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
-; CHECK-NEXT: pextrw $2, %xmm0, %eax
+; CHECK-NEXT: pextrw $1, %xmm0, %eax
; CHECK-NEXT: andb $95, %al
; CHECK-NEXT: # kill: def $al killed $al killed $eax
; CHECK-NEXT: retq