[AVX512] Don't mark EXTLOAD as legal with AVX512. Continue using custom lowering.

Summary:
This was impeding our ability to combine the extending shuffles with other shuffles as you can see from the test changes.

There's one special case that needed to be added to use VZEXT directly for v8i8->v8i64 since the custom lowering requires v64i8.

Reviewers: RKSimon, zvi, delena

Reviewed By: delena

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D38714

llvm-svn: 315860
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 08a1160..97b20b1 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -235,65 +235,33 @@
 }
 
 define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
-; X32-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
-; X32-AVX2:       ## BB#0:
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-AVX2-NEXT:    retl
+; X32-LABEL: broadcast_mem_v4i16_v8i16:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT:    retl
 ;
-; X64-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
-; X64-AVX2:       ## BB#0:
-; X64-AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
-; X64-AVX2-NEXT:    retq
-;
-; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
-; X32-AVX512VL:       ## BB#0:
-; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
-; X32-AVX512VL-NEXT:    retl
-;
-; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
-; X64-AVX512VL-NEXT:    retq
+; X64-LABEL: broadcast_mem_v4i16_v8i16:
+; X64:       ## BB#0:
+; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
+; X64-NEXT:    retq
   %load = load <4 x i16>, <4 x i16>* %ptr
   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <8 x i16> %shuf
 }
 
 define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
-; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
-; X32-AVX2:       ## BB#0:
-; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
-; X32-AVX2-NEXT:    retl
+; X32-LABEL: broadcast_mem_v4i16_v16i16:
+; X32:       ## BB#0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT:    vbroadcastsd %xmm0, %ymm0
+; X32-NEXT:    retl
 ;
-; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
-; X64-AVX2:       ## BB#0:
-; X64-AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
-; X64-AVX2-NEXT:    retq
-;
-; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
-; X32-AVX512VL:       ## BB#0:
-; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X32-AVX512VL-NEXT:    retl
-;
-; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X64-AVX512VL-NEXT:    retq
+; X64-LABEL: broadcast_mem_v4i16_v16i16:
+; X64:       ## BB#0:
+; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
+; X64-NEXT:    retq
   %load = load <4 x i16>, <4 x i16>* %ptr
   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i16> %shuf