[AVX512] Don't mark EXTLOAD as legal with AVX512. Continue using custom lowering.
Summary:
This was impeding our ability to combine the extending shuffles with other shuffles as you can see from the test changes.
There's one special case that needed to be added to use VZEXT directly for v8i8->v8i64 since the custom lowering requires v64i8.
Reviewers: RKSimon, zvi, delena
Reviewed By: delena
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D38714
llvm-svn: 315860
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 08a1160..97b20b1 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -235,65 +235,33 @@
}
define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
-; X32-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
-; X32-AVX2: ## BB#0:
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; X32-AVX2-NEXT: retl
+; X32-LABEL: broadcast_mem_v4i16_v8i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT: retl
;
-; X64-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
-; X64-AVX2: ## BB#0:
-; X64-AVX2-NEXT: vpbroadcastq (%rdi), %xmm0
-; X64-AVX2-NEXT: retq
-;
-; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
-; X32-AVX512VL: ## BB#0:
-; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
-; X32-AVX512VL-NEXT: retl
-;
-; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
-; X64-AVX512VL: ## BB#0:
-; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: broadcast_mem_v4i16_v8i16:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastq (%rdi), %xmm0
+; X64-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %ptr
%shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <8 x i16> %shuf
}
define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
-; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
-; X32-AVX2: ## BB#0:
-; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
-; X32-AVX2-NEXT: retl
+; X32-LABEL: broadcast_mem_v4i16_v16i16:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-NEXT: retl
;
-; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
-; X64-AVX2: ## BB#0:
-; X64-AVX2-NEXT: vbroadcastsd (%rdi), %ymm0
-; X64-AVX2-NEXT: retq
-;
-; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
-; X32-AVX512VL: ## BB#0:
-; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X32-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X32-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X32-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
-; X32-AVX512VL-NEXT: retl
-;
-; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
-; X64-AVX512VL: ## BB#0:
-; X64-AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
-; X64-AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
-; X64-AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; X64-AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: broadcast_mem_v4i16_v16i16:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
%load = load <4 x i16>, <4 x i16>* %ptr
%shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
ret <16 x i16> %shuf