[X86][SSE] Add general 32-bit LOAD + VZEXT_MOVL support to EltsFromConsecutiveLoads

This patch adds support for consecutive (load/undef elements) 32-bit loads, followed by trailing undef/zero elements to be combined to a single MOVD load.

Differential Revision: http://reviews.llvm.org/D16729

llvm-svn: 259796
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
index 6f1daad..5610797 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-loads-128.ll
@@ -418,21 +418,18 @@
 define <8 x i16> @merge_8i16_i16_34uuuuuu(i16* %ptr) nounwind uwtable noinline ssp {
 ; SSE-LABEL: merge_8i16_i16_34uuuuuu:
 ; SSE:       # BB#0:
-; SSE-NEXT:    pinsrw $0, 6(%rdi), %xmm0
-; SSE-NEXT:    pinsrw $1, 8(%rdi), %xmm0
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: merge_8i16_i16_34uuuuuu:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpinsrw $0, 6(%rdi), %xmm0, %xmm0
-; AVX-NEXT:    vpinsrw $1, 8(%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    retq
 ;
 ; X32-SSE-LABEL: merge_8i16_i16_34uuuuuu:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT:    pinsrw $0, 6(%eax), %xmm0
-; X32-SSE-NEXT:    pinsrw $1, 8(%eax), %xmm0
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-SSE-NEXT:    retl
   %ptr0 = getelementptr inbounds i16, i16* %ptr, i64 3
   %ptr1 = getelementptr inbounds i16, i16* %ptr, i64 4
@@ -537,42 +534,20 @@
 }
 
 define <16 x i8> @merge_16i8_i8_01u3uuzzuuuuuzzz(i8* %ptr) nounwind uwtable noinline ssp {
-; SSE2-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
-; SSE2:       # BB#0:
-; SSE2-NEXT:    movzbl (%rdi), %eax
-; SSE2-NEXT:    movzbl 1(%rdi), %ecx
-; SSE2-NEXT:    shll $8, %ecx
-; SSE2-NEXT:    orl %eax, %ecx
-; SSE2-NEXT:    pxor %xmm0, %xmm0
-; SSE2-NEXT:    pinsrw $0, %ecx, %xmm0
-; SSE2-NEXT:    movzbl 3(%rdi), %eax
-; SSE2-NEXT:    shll $8, %eax
-; SSE2-NEXT:    pinsrw $1, %eax, %xmm0
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
-; SSE41:       # BB#0:
-; SSE41-NEXT:    pxor %xmm0, %xmm0
-; SSE41-NEXT:    pinsrb $0, (%rdi), %xmm0
-; SSE41-NEXT:    pinsrb $1, 1(%rdi), %xmm0
-; SSE41-NEXT:    pinsrb $3, 3(%rdi), %xmm0
-; SSE41-NEXT:    retq
+; SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
+; SSE:       # BB#0:
+; SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $0, (%rdi), %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm0
-; AVX-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm0
+; AVX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; AVX-NEXT:    retq
 ;
 ; X32-SSE-LABEL: merge_16i8_i8_01u3uuzzuuuuuzzz:
 ; X32-SSE:       # BB#0:
 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE-NEXT:    pxor %xmm0, %xmm0
-; X32-SSE-NEXT:    pinsrb $0, (%eax), %xmm0
-; X32-SSE-NEXT:    pinsrb $1, 1(%eax), %xmm0
-; X32-SSE-NEXT:    pinsrb $3, 3(%eax), %xmm0
+; X32-SSE-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; X32-SSE-NEXT:    retl
   %ptr0 = getelementptr inbounds i8, i8* %ptr, i64 0
   %ptr1 = getelementptr inbounds i8, i8* %ptr, i64 1