[DAG, X86] Revert r327197 "Revert r327170, r327171, r327172"
Reland ISel cycle checking improvements after simplifying node id
invariant traversal and correcting typo.
llvm-svn: 327898
diff --git a/llvm/test/CodeGen/X86/avg.ll b/llvm/test/CodeGen/X86/avg.ll
index 0439c87..3c69728 100644
--- a/llvm/test/CodeGen/X86/avg.ll
+++ b/llvm/test/CodeGen/X86/avg.ll
@@ -90,12 +90,12 @@
define void @avg_v32i8(<32 x i8>* %a, <32 x i8>* %b) nounwind {
; SSE2-LABEL: avg_v32i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: movdqa (%rsi), %xmm1
-; SSE2-NEXT: pavgb (%rdi), %xmm1
-; SSE2-NEXT: pavgb 16(%rsi), %xmm0
-; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: pavgb (%rdi), %xmm0
+; SSE2-NEXT: pavgb 16(%rdi), %xmm1
; SSE2-NEXT: movdqu %xmm1, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX1-LABEL: avg_v32i8:
@@ -528,18 +528,18 @@
define void @avg_v64i8(<64 x i8>* %a, <64 x i8>* %b) nounwind {
; SSE2-LABEL: avg_v64i8:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 32(%rdi), %xmm0
-; SSE2-NEXT: movdqa (%rsi), %xmm1
-; SSE2-NEXT: movdqa 16(%rsi), %xmm2
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: movdqa 32(%rsi), %xmm2
; SSE2-NEXT: movdqa 48(%rsi), %xmm3
-; SSE2-NEXT: pavgb (%rdi), %xmm1
-; SSE2-NEXT: pavgb 16(%rdi), %xmm2
-; SSE2-NEXT: pavgb 32(%rsi), %xmm0
+; SSE2-NEXT: pavgb (%rdi), %xmm0
+; SSE2-NEXT: pavgb 16(%rdi), %xmm1
+; SSE2-NEXT: pavgb 32(%rdi), %xmm2
; SSE2-NEXT: pavgb 48(%rdi), %xmm3
; SSE2-NEXT: movdqu %xmm3, (%rax)
-; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: movdqu %xmm2, (%rax)
; SSE2-NEXT: movdqu %xmm1, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX1-LABEL: avg_v64i8:
@@ -565,23 +565,23 @@
;
; AVX2-LABEL: avg_v64i8:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX2-NEXT: vmovdqa (%rsi), %ymm1
-; AVX2-NEXT: vpavgb (%rdi), %ymm1, %ymm1
-; AVX2-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX2-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: avg_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
-; AVX512F-NEXT: vpavgb (%rdi), %ymm1, %ymm1
-; AVX512F-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX512F-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; AVX512F-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
+; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -661,12 +661,12 @@
define void @avg_v16i16(<16 x i16>* %a, <16 x i16>* %b) nounwind {
; SSE2-LABEL: avg_v16i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 16(%rdi), %xmm0
-; SSE2-NEXT: movdqa (%rsi), %xmm1
-; SSE2-NEXT: pavgw (%rdi), %xmm1
-; SSE2-NEXT: pavgw 16(%rsi), %xmm0
-; SSE2-NEXT: movdqu %xmm0, (%rax)
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: pavgw (%rdi), %xmm0
+; SSE2-NEXT: pavgw 16(%rdi), %xmm1
; SSE2-NEXT: movdqu %xmm1, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX1-LABEL: avg_v16i16:
@@ -712,18 +712,18 @@
define void @avg_v32i16(<32 x i16>* %a, <32 x i16>* %b) nounwind {
; SSE2-LABEL: avg_v32i16:
; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa 32(%rdi), %xmm0
-; SSE2-NEXT: movdqa (%rsi), %xmm1
-; SSE2-NEXT: movdqa 16(%rsi), %xmm2
+; SSE2-NEXT: movdqa (%rsi), %xmm0
+; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: movdqa 32(%rsi), %xmm2
; SSE2-NEXT: movdqa 48(%rsi), %xmm3
-; SSE2-NEXT: pavgw (%rdi), %xmm1
-; SSE2-NEXT: pavgw 16(%rdi), %xmm2
-; SSE2-NEXT: pavgw 32(%rsi), %xmm0
+; SSE2-NEXT: pavgw (%rdi), %xmm0
+; SSE2-NEXT: pavgw 16(%rdi), %xmm1
+; SSE2-NEXT: pavgw 32(%rdi), %xmm2
; SSE2-NEXT: pavgw 48(%rdi), %xmm3
; SSE2-NEXT: movdqu %xmm3, (%rax)
-; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: movdqu %xmm2, (%rax)
; SSE2-NEXT: movdqu %xmm1, (%rax)
+; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
;
; AVX1-LABEL: avg_v32i16:
@@ -749,23 +749,23 @@
;
; AVX2-LABEL: avg_v32i16:
; AVX2: # %bb.0:
-; AVX2-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX2-NEXT: vmovdqa (%rsi), %ymm1
-; AVX2-NEXT: vpavgw (%rdi), %ymm1, %ymm1
-; AVX2-NEXT: vpavgw 32(%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rax)
+; AVX2-NEXT: vmovdqa (%rsi), %ymm0
+; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX2-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX2-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
+; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512F-LABEL: avg_v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm0
-; AVX512F-NEXT: vmovdqa (%rsi), %ymm1
-; AVX512F-NEXT: vpavgw (%rdi), %ymm1, %ymm1
-; AVX512F-NEXT: vpavgw 32(%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
+; AVX512F-NEXT: vmovdqa (%rsi), %ymm0
+; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX512F-NEXT: vpavgw (%rdi), %ymm0, %ymm0
+; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
+; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -874,9 +874,9 @@
; SSE2-LABEL: avg_v32i8_2:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: movdqa 16(%rdi), %xmm1
; SSE2-NEXT: pavgb (%rsi), %xmm0
-; SSE2-NEXT: pavgb 16(%rdi), %xmm1
+; SSE2-NEXT: pavgb 16(%rsi), %xmm1
; SSE2-NEXT: movdqu %xmm1, (%rax)
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
@@ -1055,9 +1055,9 @@
; SSE2-LABEL: avg_v16i16_2:
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
-; SSE2-NEXT: movdqa 16(%rsi), %xmm1
+; SSE2-NEXT: movdqa 16(%rdi), %xmm1
; SSE2-NEXT: pavgw (%rsi), %xmm0
-; SSE2-NEXT: pavgw 16(%rdi), %xmm1
+; SSE2-NEXT: pavgw 16(%rsi), %xmm1
; SSE2-NEXT: movdqu %xmm1, (%rax)
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
@@ -1107,14 +1107,14 @@
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa (%rdi), %xmm0
; SSE2-NEXT: movdqa 16(%rdi), %xmm1
-; SSE2-NEXT: movdqa 48(%rdi), %xmm2
-; SSE2-NEXT: movdqa 32(%rsi), %xmm3
+; SSE2-NEXT: movdqa 32(%rdi), %xmm2
+; SSE2-NEXT: movdqa 48(%rdi), %xmm3
; SSE2-NEXT: pavgw (%rsi), %xmm0
; SSE2-NEXT: pavgw 16(%rsi), %xmm1
-; SSE2-NEXT: pavgw 32(%rdi), %xmm3
-; SSE2-NEXT: pavgw 48(%rsi), %xmm2
-; SSE2-NEXT: movdqu %xmm2, (%rax)
+; SSE2-NEXT: pavgw 32(%rsi), %xmm2
+; SSE2-NEXT: pavgw 48(%rsi), %xmm3
; SSE2-NEXT: movdqu %xmm3, (%rax)
+; SSE2-NEXT: movdqu %xmm2, (%rax)
; SSE2-NEXT: movdqu %xmm1, (%rax)
; SSE2-NEXT: movdqu %xmm0, (%rax)
; SSE2-NEXT: retq
@@ -1143,9 +1143,9 @@
; AVX2-LABEL: avg_v32i16_2:
; AVX2: # %bb.0:
; AVX2-NEXT: vmovdqa (%rdi), %ymm0
-; AVX2-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX2-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX2-NEXT: vpavgw (%rsi), %ymm0, %ymm0
-; AVX2-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
+; AVX2-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1
; AVX2-NEXT: vmovdqu %ymm1, (%rax)
; AVX2-NEXT: vmovdqu %ymm0, (%rax)
; AVX2-NEXT: vzeroupper
@@ -1154,9 +1154,9 @@
; AVX512F-LABEL: avg_v32i16_2:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovdqa (%rdi), %ymm0
-; AVX512F-NEXT: vmovdqa 32(%rsi), %ymm1
+; AVX512F-NEXT: vmovdqa 32(%rdi), %ymm1
; AVX512F-NEXT: vpavgw (%rsi), %ymm0, %ymm0
-; AVX512F-NEXT: vpavgw 32(%rdi), %ymm1, %ymm1
+; AVX512F-NEXT: vpavgw 32(%rsi), %ymm1, %ymm1
; AVX512F-NEXT: vmovdqu %ymm1, (%rax)
; AVX512F-NEXT: vmovdqu %ymm0, (%rax)
; AVX512F-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/avx-vbroadcastf128.ll b/llvm/test/CodeGen/X86/avx-vbroadcastf128.ll
index 7fdbf31..b502643 100644
--- a/llvm/test/CodeGen/X86/avx-vbroadcastf128.ll
+++ b/llvm/test/CodeGen/X86/avx-vbroadcastf128.ll
@@ -235,18 +235,16 @@
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovaps (%ecx), %xmm0
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-NEXT: vmovaps %ymm1, (%eax)
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: PR29088:
; X64: # %bb.0:
-; X64-NEXT: vmovaps (%rdi), %xmm0
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-NEXT: vmovaps %ymm1, (%rsi)
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
index 528dfcd..3ae6c0b 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -1065,9 +1065,7 @@
; X64: ## %bb.0: ## %eintry
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movb (%rdi), %al
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastb %xmm1, %xmm1
+; X64-NEXT: vpbroadcastb (%rdi), %xmm1
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
@@ -1118,9 +1116,7 @@
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovaps %ymm0, (%rsp)
-; X64-NEXT: movb (%rdi), %al
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastb %xmm1, %ymm1
+; X64-NEXT: vpbroadcastb (%rdi), %ymm1
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; X64-NEXT: movq %rbp, %rsp
@@ -1160,9 +1156,7 @@
; X64: ## %bb.0: ## %entry
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastw %xmm1, %xmm1
+; X64-NEXT: vpbroadcastw (%rdi), %xmm1
; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
; X64-NEXT: retq
@@ -1213,9 +1207,7 @@
; X64-NEXT: subq $128, %rsp
; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovaps %ymm0, (%rsp)
-; X64-NEXT: movzwl (%rdi), %eax
-; X64-NEXT: vmovd %eax, %xmm1
-; X64-NEXT: vpbroadcastw %xmm1, %ymm1
+; X64-NEXT: vpbroadcastw (%rdi), %ymm1
; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
; X64-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
; X64-NEXT: movq %rbp, %rsp
@@ -1251,26 +1243,14 @@
; X32-NEXT: addl $60, %esp
; X32-NEXT: retl
;
-; X64-AVX2-LABEL: isel_crash_4d:
-; X64-AVX2: ## %bb.0: ## %entry
-; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: movl (%rdi), %eax
-; X64-AVX2-NEXT: vmovd %eax, %xmm1
-; X64-AVX2-NEXT: vpbroadcastd %xmm1, %xmm1
-; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512VL-LABEL: isel_crash_4d:
-; X64-AVX512VL: ## %bb.0: ## %entry
-; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: movl (%rdi), %eax
-; X64-AVX512VL-NEXT: vpbroadcastd %eax, %xmm1
-; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: isel_crash_4d:
+; X64: ## %bb.0: ## %entry
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: vbroadcastss (%rdi), %xmm1
+; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: vmovaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
entry:
%__a.addr.i = alloca <2 x i64>, align 16
%__b.addr.i = alloca <2 x i64>, align 16
@@ -1307,46 +1287,24 @@
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
-; X64-AVX2-LABEL: isel_crash_8d:
-; X64-AVX2: ## %bb.0: ## %eintry
-; X64-AVX2-NEXT: pushq %rbp
-; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
-; X64-AVX2-NEXT: .cfi_offset %rbp, -16
-; X64-AVX2-NEXT: movq %rsp, %rbp
-; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
-; X64-AVX2-NEXT: andq $-32, %rsp
-; X64-AVX2-NEXT: subq $128, %rsp
-; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
-; X64-AVX2-NEXT: movl (%rdi), %eax
-; X64-AVX2-NEXT: vmovd %eax, %xmm1
-; X64-AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
-; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: movq %rbp, %rsp
-; X64-AVX2-NEXT: popq %rbp
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512VL-LABEL: isel_crash_8d:
-; X64-AVX512VL: ## %bb.0: ## %eintry
-; X64-AVX512VL-NEXT: pushq %rbp
-; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
-; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
-; X64-AVX512VL-NEXT: movq %rsp, %rbp
-; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
-; X64-AVX512VL-NEXT: andq $-32, %rsp
-; X64-AVX512VL-NEXT: subq $128, %rsp
-; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
-; X64-AVX512VL-NEXT: movl (%rdi), %eax
-; X64-AVX512VL-NEXT: vpbroadcastd %eax, %ymm1
-; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: movq %rbp, %rsp
-; X64-AVX512VL-NEXT: popq %rbp
-; X64-AVX512VL-NEXT: vzeroupper
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: isel_crash_8d:
+; X64: ## %bb.0: ## %eintry
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: .cfi_def_cfa_register %rbp
+; X64-NEXT: andq $-32, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovaps %ymm0, (%rsp)
+; X64-NEXT: vbroadcastss (%rdi), %ymm1
+; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
eintry:
%__a.addr.i = alloca <4 x i64>, align 16
%__b.addr.i = alloca <4 x i64>, align 16
@@ -1370,33 +1328,20 @@
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X32-NEXT: vmovaps %xmm0, (%esp)
-; X32-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; X32-NEXT: vpbroadcastq %xmm1, %xmm1
+; X32-NEXT: vpbroadcastq (%eax), %xmm1
; X32-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp)
; X32-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: addl $60, %esp
; X32-NEXT: retl
;
-; X64-AVX2-LABEL: isel_crash_2q:
-; X64-AVX2: ## %bb.0: ## %entry
-; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: movq (%rdi), %rax
-; X64-AVX2-NEXT: vmovq %rax, %xmm1
-; X64-AVX2-NEXT: vpbroadcastq %xmm1, %xmm1
-; X64-AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512VL-LABEL: isel_crash_2q:
-; X64-AVX512VL: ## %bb.0: ## %entry
-; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: movq (%rdi), %rax
-; X64-AVX512VL-NEXT: vpbroadcastq %rax, %xmm1
-; X64-AVX512VL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: isel_crash_2q:
+; X64: ## %bb.0: ## %entry
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: vpbroadcastq (%rdi), %xmm1
+; X64-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-NEXT: vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; X64-NEXT: retq
entry:
%__a.addr.i = alloca <2 x i64>, align 16
%__b.addr.i = alloca <2 x i64>, align 16
@@ -1433,46 +1378,24 @@
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
-; X64-AVX2-LABEL: isel_crash_4q:
-; X64-AVX2: ## %bb.0: ## %eintry
-; X64-AVX2-NEXT: pushq %rbp
-; X64-AVX2-NEXT: .cfi_def_cfa_offset 16
-; X64-AVX2-NEXT: .cfi_offset %rbp, -16
-; X64-AVX2-NEXT: movq %rsp, %rbp
-; X64-AVX2-NEXT: .cfi_def_cfa_register %rbp
-; X64-AVX2-NEXT: andq $-32, %rsp
-; X64-AVX2-NEXT: subq $128, %rsp
-; X64-AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX2-NEXT: vmovaps %ymm0, (%rsp)
-; X64-AVX2-NEXT: movq (%rdi), %rax
-; X64-AVX2-NEXT: vmovq %rax, %xmm1
-; X64-AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
-; X64-AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX2-NEXT: movq %rbp, %rsp
-; X64-AVX2-NEXT: popq %rbp
-; X64-AVX2-NEXT: vzeroupper
-; X64-AVX2-NEXT: retq
-;
-; X64-AVX512VL-LABEL: isel_crash_4q:
-; X64-AVX512VL: ## %bb.0: ## %eintry
-; X64-AVX512VL-NEXT: pushq %rbp
-; X64-AVX512VL-NEXT: .cfi_def_cfa_offset 16
-; X64-AVX512VL-NEXT: .cfi_offset %rbp, -16
-; X64-AVX512VL-NEXT: movq %rsp, %rbp
-; X64-AVX512VL-NEXT: .cfi_def_cfa_register %rbp
-; X64-AVX512VL-NEXT: andq $-32, %rsp
-; X64-AVX512VL-NEXT: subq $128, %rsp
-; X64-AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; X64-AVX512VL-NEXT: vmovaps %ymm0, (%rsp)
-; X64-AVX512VL-NEXT: movq (%rdi), %rax
-; X64-AVX512VL-NEXT: vpbroadcastq %rax, %ymm1
-; X64-AVX512VL-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp)
-; X64-AVX512VL-NEXT: movq %rbp, %rsp
-; X64-AVX512VL-NEXT: popq %rbp
-; X64-AVX512VL-NEXT: vzeroupper
-; X64-AVX512VL-NEXT: retq
+; X64-LABEL: isel_crash_4q:
+; X64: ## %bb.0: ## %eintry
+; X64-NEXT: pushq %rbp
+; X64-NEXT: .cfi_def_cfa_offset 16
+; X64-NEXT: .cfi_offset %rbp, -16
+; X64-NEXT: movq %rsp, %rbp
+; X64-NEXT: .cfi_def_cfa_register %rbp
+; X64-NEXT: andq $-32, %rsp
+; X64-NEXT: subq $128, %rsp
+; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT: vmovaps %ymm0, (%rsp)
+; X64-NEXT: vbroadcastsd (%rdi), %ymm1
+; X64-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-NEXT: movq %rbp, %rsp
+; X64-NEXT: popq %rbp
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
eintry:
%__a.addr.i = alloca <4 x i64>, align 16
%__b.addr.i = alloca <4 x i64>, align 16
diff --git a/llvm/test/CodeGen/X86/avx2-vbroadcasti128.ll b/llvm/test/CodeGen/X86/avx2-vbroadcasti128.ll
index 254cdfd..996e679 100644
--- a/llvm/test/CodeGen/X86/avx2-vbroadcasti128.ll
+++ b/llvm/test/CodeGen/X86/avx2-vbroadcasti128.ll
@@ -271,18 +271,16 @@
; X32: # %bb.0:
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: vmovaps (%ecx), %xmm0
; X32-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-NEXT: vmovaps %ymm1, (%eax)
-; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: PR29088:
; X64: # %bb.0:
-; X64-NEXT: vmovaps (%rdi), %xmm0
; X64-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-NEXT: vmovaps %ymm1, (%rsi)
-; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1
diff --git a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
index c5ecb15..2bf69cf 100644
--- a/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
+++ b/llvm/test/CodeGen/X86/avx512-vbroadcasti128.ll
@@ -186,26 +186,23 @@
define <8 x i32> @PR29088(<4 x i32>* %p0, <8 x float>* %p1) {
; X64-AVX512VL-LABEL: PR29088:
; X64-AVX512VL: ## %bb.0:
-; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512VL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512VL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512VL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512VL-NEXT: retq
;
; X64-AVX512BWVL-LABEL: PR29088:
; X64-AVX512BWVL: ## %bb.0:
-; X64-AVX512BWVL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512BWVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512BWVL-NEXT: vmovdqa %ymm1, (%rsi)
-; X64-AVX512BWVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BWVL-NEXT: retq
;
; X64-AVX512DQVL-LABEL: PR29088:
; X64-AVX512DQVL: ## %bb.0:
-; X64-AVX512DQVL-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512DQVL-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQVL-NEXT: vmovaps %ymm1, (%rsi)
-; X64-AVX512DQVL-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQVL-NEXT: retq
%ld = load <4 x i32>, <4 x i32>* %p0
store <8 x float> zeroinitializer, <8 x float>* %p1
diff --git a/llvm/test/CodeGen/X86/i256-add.ll b/llvm/test/CodeGen/X86/i256-add.ll
index 36d838a..85a885a 100644
--- a/llvm/test/CodeGen/X86/i256-add.ll
+++ b/llvm/test/CodeGen/X86/i256-add.ll
@@ -9,40 +9,30 @@
; X32-NEXT: pushl %ebx
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
-; X32-NEXT: subl $12, %esp
+; X32-NEXT: subl $8, %esp
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 8(%ecx), %edi
-; X32-NEXT: movl (%ecx), %edx
-; X32-NEXT: movl 4(%ecx), %ebx
-; X32-NEXT: movl 28(%eax), %esi
-; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT: movl 24(%eax), %ebp
-; X32-NEXT: addl (%eax), %edx
-; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT: adcl 4(%eax), %ebx
-; X32-NEXT: adcl 8(%eax), %edi
-; X32-NEXT: movl %edi, (%esp) # 4-byte Spill
-; X32-NEXT: movl 20(%eax), %edi
-; X32-NEXT: movl 12(%eax), %edx
-; X32-NEXT: movl 16(%eax), %esi
-; X32-NEXT: adcl 12(%ecx), %edx
-; X32-NEXT: adcl 16(%ecx), %esi
-; X32-NEXT: adcl 20(%ecx), %edi
-; X32-NEXT: movl %ebp, %eax
-; X32-NEXT: adcl 24(%ecx), %eax
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload
-; X32-NEXT: adcl %ebp, 28(%ecx)
-; X32-NEXT: movl (%esp), %ebp # 4-byte Reload
-; X32-NEXT: movl %ebp, 8(%ecx)
-; X32-NEXT: movl %ebx, 4(%ecx)
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload
-; X32-NEXT: movl %ebx, (%ecx)
-; X32-NEXT: movl %edx, 12(%ecx)
-; X32-NEXT: movl %esi, 16(%ecx)
-; X32-NEXT: movl %edi, 20(%ecx)
-; X32-NEXT: movl %eax, 24(%ecx)
-; X32-NEXT: addl $12, %esp
+; X32-NEXT: movl 28(%eax), %ecx
+; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT: movl 24(%eax), %ecx
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl 20(%eax), %esi
+; X32-NEXT: movl 16(%eax), %edi
+; X32-NEXT: movl 12(%eax), %ebx
+; X32-NEXT: movl 8(%eax), %ebp
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: addl %ecx, (%eax)
+; X32-NEXT: adcl %edx, 4(%eax)
+; X32-NEXT: adcl %ebp, 8(%eax)
+; X32-NEXT: adcl %ebx, 12(%eax)
+; X32-NEXT: adcl %edi, 16(%eax)
+; X32-NEXT: adcl %esi, 20(%eax)
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ecx, 24(%eax)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT: adcl %ecx, 28(%eax)
+; X32-NEXT: addl $8, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
; X32-NEXT: popl %ebx
@@ -51,17 +41,14 @@
;
; X64-LABEL: add:
; X64: # %bb.0:
-; X64-NEXT: movq 16(%rdi), %rax
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq 8(%rdi), %rdx
-; X64-NEXT: movq 24(%rsi), %r8
-; X64-NEXT: addq (%rsi), %rcx
-; X64-NEXT: adcq 8(%rsi), %rdx
-; X64-NEXT: adcq 16(%rsi), %rax
-; X64-NEXT: adcq %r8, 24(%rdi)
-; X64-NEXT: movq %rax, 16(%rdi)
-; X64-NEXT: movq %rdx, 8(%rdi)
-; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: movq 24(%rsi), %rax
+; X64-NEXT: movq 16(%rsi), %rcx
+; X64-NEXT: movq (%rsi), %rdx
+; X64-NEXT: movq 8(%rsi), %rsi
+; X64-NEXT: addq %rdx, (%rdi)
+; X64-NEXT: adcq %rsi, 8(%rdi)
+; X64-NEXT: adcq %rcx, 16(%rdi)
+; X64-NEXT: adcq %rax, 24(%rdi)
; X64-NEXT: retq
%a = load i256, i256* %p
%b = load i256, i256* %q
@@ -77,35 +64,28 @@
; X32-NEXT: pushl %edi
; X32-NEXT: pushl %esi
; X32-NEXT: subl $8, %esp
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl 16(%ecx), %eax
-; X32-NEXT: movl 12(%ecx), %edx
-; X32-NEXT: movl 8(%ecx), %edi
-; X32-NEXT: movl (%ecx), %ebx
-; X32-NEXT: movl 4(%ecx), %ebp
-; X32-NEXT: subl (%esi), %ebx
-; X32-NEXT: sbbl 4(%esi), %ebp
-; X32-NEXT: sbbl 8(%esi), %edi
-; X32-NEXT: sbbl 12(%esi), %edx
-; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill
-; X32-NEXT: sbbl 16(%esi), %eax
-; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X32-NEXT: movl 20(%ecx), %edx
-; X32-NEXT: sbbl 20(%esi), %edx
-; X32-NEXT: movl 24(%ecx), %eax
-; X32-NEXT: sbbl 24(%esi), %eax
-; X32-NEXT: movl 28(%esi), %esi
-; X32-NEXT: sbbl %esi, 28(%ecx)
-; X32-NEXT: movl %edi, 8(%ecx)
-; X32-NEXT: movl %ebp, 4(%ecx)
-; X32-NEXT: movl %ebx, (%ecx)
-; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, 12(%ecx)
-; X32-NEXT: movl (%esp), %esi # 4-byte Reload
-; X32-NEXT: movl %esi, 16(%ecx)
-; X32-NEXT: movl %edx, 20(%ecx)
-; X32-NEXT: movl %eax, 24(%ecx)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl 28(%eax), %ecx
+; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill
+; X32-NEXT: movl 24(%eax), %ecx
+; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill
+; X32-NEXT: movl 20(%eax), %esi
+; X32-NEXT: movl 16(%eax), %edi
+; X32-NEXT: movl 12(%eax), %ebx
+; X32-NEXT: movl 8(%eax), %ebp
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: subl %ecx, (%eax)
+; X32-NEXT: sbbl %edx, 4(%eax)
+; X32-NEXT: sbbl %ebp, 8(%eax)
+; X32-NEXT: sbbl %ebx, 12(%eax)
+; X32-NEXT: sbbl %edi, 16(%eax)
+; X32-NEXT: sbbl %esi, 20(%eax)
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
+; X32-NEXT: sbbl %ecx, 24(%eax)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload
+; X32-NEXT: sbbl %ecx, 28(%eax)
; X32-NEXT: addl $8, %esp
; X32-NEXT: popl %esi
; X32-NEXT: popl %edi
@@ -115,17 +95,14 @@
;
; X64-LABEL: sub:
; X64: # %bb.0:
-; X64-NEXT: movq 16(%rdi), %rax
-; X64-NEXT: movq (%rdi), %rcx
-; X64-NEXT: movq 8(%rdi), %rdx
-; X64-NEXT: movq 24(%rsi), %r8
-; X64-NEXT: subq (%rsi), %rcx
-; X64-NEXT: sbbq 8(%rsi), %rdx
-; X64-NEXT: sbbq 16(%rsi), %rax
-; X64-NEXT: sbbq %r8, 24(%rdi)
-; X64-NEXT: movq %rax, 16(%rdi)
-; X64-NEXT: movq %rdx, 8(%rdi)
-; X64-NEXT: movq %rcx, (%rdi)
+; X64-NEXT: movq 24(%rsi), %rax
+; X64-NEXT: movq 16(%rsi), %rcx
+; X64-NEXT: movq (%rsi), %rdx
+; X64-NEXT: movq 8(%rsi), %rsi
+; X64-NEXT: subq %rdx, (%rdi)
+; X64-NEXT: sbbq %rsi, 8(%rdi)
+; X64-NEXT: sbbq %rcx, 16(%rdi)
+; X64-NEXT: sbbq %rax, 24(%rdi)
; X64-NEXT: retq
%a = load i256, i256* %p
%b = load i256, i256* %q
diff --git a/llvm/test/CodeGen/X86/masked_memop.ll b/llvm/test/CodeGen/X86/masked_memop.ll
index 4a25020..aa6ae09 100644
--- a/llvm/test/CodeGen/X86/masked_memop.ll
+++ b/llvm/test/CodeGen/X86/masked_memop.ll
@@ -1264,8 +1264,7 @@
; AVX-LABEL: load_one_mask_bit_set5:
; AVX: ## %bb.0:
; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; AVX-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; AVX-NEXT: vmovhpd {{.*#+}} xmm2 = xmm2[0],mem[0]
; AVX-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
index af5fb47..4f511ef 100644
--- a/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
+++ b/llvm/test/CodeGen/X86/merge-consecutive-stores.ll
@@ -10,12 +10,11 @@
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl $0, 28(%eax)
; CHECK-NEXT: movl $0, 24(%eax)
-; CHECK-NEXT: movl 20(%eax), %ecx
-; CHECK-NEXT: movl $0, 20(%eax)
-; CHECK-NEXT: xorl %edx, %edx
-; CHECK-NEXT: cmpl 16(%eax), %edx
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: cmpl 16(%eax), %ecx
; CHECK-NEXT: movl $0, 16(%eax)
-; CHECK-NEXT: sbbl %ecx, %edx
+; CHECK-NEXT: sbbl 20(%eax), %ecx
+; CHECK-NEXT: movl $0, 20(%eax)
; CHECK-NEXT: setl %al
; CHECK-NEXT: movzbl %al, %eax
; CHECK-NEXT: negl %eax
diff --git a/llvm/test/CodeGen/X86/nontemporal.ll b/llvm/test/CodeGen/X86/nontemporal.ll
index f53982a..472c3e47 100644
--- a/llvm/test/CodeGen/X86/nontemporal.ll
+++ b/llvm/test/CodeGen/X86/nontemporal.ll
@@ -13,36 +13,35 @@
; X32-SSE-NEXT: andl $-16, %esp
; X32-SSE-NEXT: subl $16, %esp
; X32-SSE-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero
-; X32-SSE-NEXT: movl 12(%ebp), %eax
+; X32-SSE-NEXT: movl 12(%ebp), %ecx
; X32-SSE-NEXT: movdqa 56(%ebp), %xmm4
; X32-SSE-NEXT: movdqa 40(%ebp), %xmm5
; X32-SSE-NEXT: movdqa 24(%ebp), %xmm6
-; X32-SSE-NEXT: movl 8(%ebp), %edx
-; X32-SSE-NEXT: movl 80(%ebp), %ecx
-; X32-SSE-NEXT: movl (%ecx), %esi
+; X32-SSE-NEXT: movl 8(%ebp), %esi
+; X32-SSE-NEXT: movl 80(%ebp), %edx
+; X32-SSE-NEXT: movl (%edx), %eax
; X32-SSE-NEXT: addps {{\.LCPI.*}}, %xmm0
-; X32-SSE-NEXT: movntps %xmm0, (%edx)
+; X32-SSE-NEXT: movntps %xmm0, (%esi)
; X32-SSE-NEXT: paddq {{\.LCPI.*}}, %xmm2
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntdq %xmm2, (%edx)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntdq %xmm2, (%esi)
; X32-SSE-NEXT: addpd {{\.LCPI.*}}, %xmm1
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntpd %xmm1, (%edx)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntpd %xmm1, (%esi)
; X32-SSE-NEXT: paddd {{\.LCPI.*}}, %xmm6
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntdq %xmm6, (%edx)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntdq %xmm6, (%esi)
; X32-SSE-NEXT: paddw {{\.LCPI.*}}, %xmm5
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntdq %xmm5, (%edx)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntdq %xmm5, (%esi)
; X32-SSE-NEXT: paddb {{\.LCPI.*}}, %xmm4
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntdq %xmm4, (%edx)
-; X32-SSE-NEXT: addl (%ecx), %esi
-; X32-SSE-NEXT: movntil %eax, (%edx)
-; X32-SSE-NEXT: movl (%ecx), %eax
-; X32-SSE-NEXT: addl %esi, %eax
-; X32-SSE-NEXT: movsd %xmm3, (%edx)
-; X32-SSE-NEXT: addl (%ecx), %eax
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntdq %xmm4, (%esi)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movntil %ecx, (%esi)
+; X32-SSE-NEXT: addl (%edx), %eax
+; X32-SSE-NEXT: movsd %xmm3, (%esi)
+; X32-SSE-NEXT: addl (%edx), %eax
; X32-SSE-NEXT: leal -4(%ebp), %esp
; X32-SSE-NEXT: popl %esi
; X32-SSE-NEXT: popl %ebp
@@ -56,36 +55,35 @@
; X32-AVX-NEXT: andl $-16, %esp
; X32-AVX-NEXT: subl $16, %esp
; X32-AVX-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero
-; X32-AVX-NEXT: movl 12(%ebp), %eax
+; X32-AVX-NEXT: movl 12(%ebp), %ecx
; X32-AVX-NEXT: vmovdqa 56(%ebp), %xmm4
; X32-AVX-NEXT: vmovdqa 40(%ebp), %xmm5
; X32-AVX-NEXT: vmovdqa 24(%ebp), %xmm6
-; X32-AVX-NEXT: movl 8(%ebp), %ecx
-; X32-AVX-NEXT: movl 80(%ebp), %edx
-; X32-AVX-NEXT: movl (%edx), %esi
+; X32-AVX-NEXT: movl 8(%ebp), %edx
+; X32-AVX-NEXT: movl 80(%ebp), %esi
+; X32-AVX-NEXT: movl (%esi), %eax
; X32-AVX-NEXT: vaddps {{\.LCPI.*}}, %xmm0, %xmm0
-; X32-AVX-NEXT: vmovntps %xmm0, (%ecx)
+; X32-AVX-NEXT: vmovntps %xmm0, (%edx)
; X32-AVX-NEXT: vpaddq {{\.LCPI.*}}, %xmm2, %xmm0
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vaddpd {{\.LCPI.*}}, %xmm1, %xmm0
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: vmovntpd %xmm0, (%ecx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovntpd %xmm0, (%edx)
; X32-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm6, %xmm0
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vpaddw {{\.LCPI.*}}, %xmm5, %xmm0
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
; X32-AVX-NEXT: vpaddb {{\.LCPI.*}}, %xmm4, %xmm0
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: vmovntdq %xmm0, (%ecx)
-; X32-AVX-NEXT: addl (%edx), %esi
-; X32-AVX-NEXT: movntil %eax, (%ecx)
-; X32-AVX-NEXT: movl (%edx), %eax
-; X32-AVX-NEXT: addl %esi, %eax
-; X32-AVX-NEXT: vmovsd %xmm3, (%ecx)
-; X32-AVX-NEXT: addl (%edx), %eax
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovntdq %xmm0, (%edx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: movntil %ecx, (%edx)
+; X32-AVX-NEXT: addl (%esi), %eax
+; X32-AVX-NEXT: vmovsd %xmm3, (%edx)
+; X32-AVX-NEXT: addl (%esi), %eax
; X32-AVX-NEXT: leal -4(%ebp), %esp
; X32-AVX-NEXT: popl %esi
; X32-AVX-NEXT: popl %ebp
diff --git a/llvm/test/CodeGen/X86/pr36274.ll b/llvm/test/CodeGen/X86/pr36274.ll
new file mode 100644
index 0000000..97b958c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr36274.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s
+
+; This tests is checking for a case where the x86 load-op-store fusion
+; misses a dependence between the fused load and a non-fused operand
+; to the load causing a cycle. Here the dependence in question comes
+; from the carry in input of the adcl.
+
+@vx = external local_unnamed_addr global <2 x i32>, align 8
+
+define void @pr36274(i32* %somewhere) {
+; CHECK-LABEL: pr36274:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl vx+4, %eax
+; CHECK-NEXT: addl $1, vx
+; CHECK-NEXT: adcl $0, %eax
+; CHECK-NEXT: movl %eax, vx+4
+; CHECK-NEXT: retl
+ %a0 = getelementptr <2 x i32>, <2 x i32>* @vx, i32 0, i32 0
+ %a1 = getelementptr <2 x i32>, <2 x i32>* @vx, i32 0, i32 1
+ %x1 = load volatile i32, i32* %a1, align 4
+ %x0 = load volatile i32, i32* %a0, align 8
+ %vx0 = insertelement <2 x i32> undef, i32 %x0, i32 0
+ %vx1 = insertelement <2 x i32> %vx0, i32 %x1, i32 1
+ %x = bitcast <2 x i32> %vx1 to i64
+ %add = add i64 %x, 1
+ %vadd = bitcast i64 %add to <2 x i32>
+ %vx1_0 = extractelement <2 x i32> %vadd, i32 0
+ %vx1_1 = extractelement <2 x i32> %vadd, i32 1
+ store i32 %vx1_0, i32* %a0, align 8
+ store i32 %vx1_1, i32* %a1, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/pr36312.ll b/llvm/test/CodeGen/X86/pr36312.ll
new file mode 100644
index 0000000..6404851
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr36312.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+%struct.anon = type { i32, i32 }
+
+@c = common global %struct.anon zeroinitializer, align 4
+@d = local_unnamed_addr global %struct.anon* @c, align 8
+@a = common local_unnamed_addr global i32 0, align 4
+@b = common local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: norecurse nounwind uwtable
+define void @g() local_unnamed_addr #0 {
+; CHECK-LABEL: g:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq {{.*}}(%rip), %rax
+; CHECK-NEXT: movl 4(%rax), %eax
+; CHECK-NEXT: xorl %ecx, %ecx
+; CHECK-NEXT: incl {{.*}}(%rip)
+; CHECK-NEXT: setne %cl
+; CHECK-NEXT: addl %eax, %ecx
+; CHECK-NEXT: movl %ecx, {{.*}}(%rip)
+; CHECK-NEXT: retq
+entry:
+ %0 = load %struct.anon*, %struct.anon** @d, align 8
+ %y = getelementptr inbounds %struct.anon, %struct.anon* %0, i64 0, i32 1
+ %1 = load i32, i32* %y, align 4
+ %2 = load i32, i32* @b, align 4
+ %inc = add nsw i32 %2, 1
+ store i32 %inc, i32* @b, align 4
+ %tobool = icmp ne i32 %inc, 0
+ %land.ext = zext i1 %tobool to i32
+ %add = add nsw i32 %1, %land.ext
+ store i32 %add, i32* @a, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/X86/required-vector-width.ll b/llvm/test/CodeGen/X86/required-vector-width.ll
index 257d3f0..dcca540 100644
--- a/llvm/test/CodeGen/X86/required-vector-width.ll
+++ b/llvm/test/CodeGen/X86/required-vector-width.ll
@@ -39,12 +39,12 @@
define void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "required-vector-width"="256" {
; CHECK-LABEL: avg_v64i8_256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0
-; CHECK-NEXT: vmovdqa (%rsi), %ymm1
-; CHECK-NEXT: vpavgb (%rdi), %ymm1, %ymm1
-; CHECK-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0
-; CHECK-NEXT: vmovdqu %ymm0, (%rax)
+; CHECK-NEXT: vmovdqa (%rsi), %ymm0
+; CHECK-NEXT: vmovdqa 32(%rsi), %ymm1
+; CHECK-NEXT: vpavgb (%rdi), %ymm0, %ymm0
+; CHECK-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1
; CHECK-NEXT: vmovdqu %ymm1, (%rax)
+; CHECK-NEXT: vmovdqu %ymm0, (%rax)
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
%1 = load <64 x i8>, <64 x i8>* %a
diff --git a/llvm/test/CodeGen/X86/store_op_load_fold2.ll b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
index f47d87f..674b8d8 100644
--- a/llvm/test/CodeGen/X86/store_op_load_fold2.ll
+++ b/llvm/test/CodeGen/X86/store_op_load_fold2.ll
@@ -17,14 +17,14 @@
store i64 %tmp2676.us.us, i64* %tmp2666
ret i32 0
-; INTEL: and {{e..}}, dword ptr [360]
-; INTEL: and dword ptr [356], {{e..}}
-; FIXME: mov dword ptr [360], {{e..}}
+; INTEL: and {{e..}}, dword ptr [356]
+; INTEL: and dword ptr [360], {{e..}}
+; FIXME: mov dword ptr [356], {{e..}}
; The above line comes out as 'mov 360, eax', but when the register is ecx it works?
-; ATT: andl 360, %{{e..}}
-; ATT: andl %{{e..}}, 356
-; ATT: movl %{{e..}}, 360
+; ATT: andl 356, %{{e..}}
+; ATT: andl %{{e..}}, 360
+; ATT: movl %{{e..}}, 356
}
diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll
index 00cd4df..218ce26 100644
--- a/llvm/test/CodeGen/X86/subvector-broadcast.ll
+++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll
@@ -751,72 +751,64 @@
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-AVX-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: retl
;
; X32-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
; X32-AVX512BW: # %bb.0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
; X32-AVX512DQ: # %bb.0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512BW-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_8i32_chain:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX512DQ-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX512DQ-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1
@@ -829,10 +821,9 @@
; X32-AVX: # %bb.0:
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX-NEXT: vmovaps (%ecx), %xmm0
; X32-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X32-AVX-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X32-AVX-NEXT: vmovaps %ymm0, %ymm1
; X32-AVX-NEXT: retl
;
@@ -840,63 +831,56 @@
; X32-AVX512F: # %bb.0:
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512F-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512F-NEXT: retl
;
; X32-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
; X32-AVX512BW: # %bb.0:
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512BW-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512BW-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X32-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: vmovdqa %xmm1, (%eax)
-; X32-AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512BW-NEXT: retl
;
; X32-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
; X32-AVX512DQ: # %bb.0:
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-AVX512DQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-AVX512DQ-NEXT: vmovdqa (%ecx), %xmm0
; X32-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X32-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512DQ-NEXT: vmovaps %xmm1, (%eax)
-; X32-AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X32-AVX512DQ-NEXT: retl
;
; X64-AVX-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX: # %bb.0:
-; X64-AVX-NEXT: vmovaps (%rdi), %xmm0
; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
; X64-AVX-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; X64-AVX-NEXT: vmovaps %ymm0, %ymm1
; X64-AVX-NEXT: retq
;
; X64-AVX512F-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512F: # %bb.0:
-; X64-AVX512F-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512F-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512F-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512F-NEXT: retq
;
; X64-AVX512BW-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512BW: # %bb.0:
-; X64-AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; X64-AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: vmovdqa %xmm1, (%rsi)
-; X64-AVX512BW-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512BW-NEXT: retq
;
; X64-AVX512DQ-LABEL: test_broadcast_4i32_16i32_chain:
; X64-AVX512DQ: # %bb.0:
-; X64-AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
; X64-AVX512DQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
+; X64-AVX512DQ-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512DQ-NEXT: vmovaps %xmm1, (%rsi)
-; X64-AVX512DQ-NEXT: vshufi32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
; X64-AVX512DQ-NEXT: retq
%1 = load <4 x i32>, <4 x i32>* %p0
store <4 x float> zeroinitializer, <4 x float>* %p1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll
index 38c4a11..19fd7d3 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-variable-256.ll
@@ -47,8 +47,7 @@
; ALL-NEXT: andl $3, %edx
; ALL-NEXT: andl $3, %esi
; ALL-NEXT: vmovaps %ymm0, (%rsp)
-; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
-; ALL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; ALL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; ALL-NEXT: movq %rbp, %rsp