[CodeGen] Replace the reused stores' chain for extractelt expansion.
This fixes a subtle issue that was introduced in r205153.
When reusing a store for the extractelement expansion (to load directly
from it, inserting of going through the stack), later stores to the
same location might have overwritten the data we were expecting to
extract from.
To fix that, we need to explicitly replace the chain going out of the
reused store, so that later stores also have an explicit dependency on
the generated element-extracting loads, and can't clobber them.
rdar://20066785
Differential Revision: http://reviews.llvm.org/D8180
llvm-svn: 231721
diff --git a/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
new file mode 100644
index 0000000..946516c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/extractelement-legalization-store-ordering.ll
@@ -0,0 +1,57 @@
+; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128"
+
+; Make sure we don't break load/store ordering when turning an extractelement
+; into loads, off the stack or a previous store.
+; Be very explicit about the ordering/stack offsets.
+
+; CHECK-LABEL: test_extractelement_legalization_storereuse:
+; CHECK: # BB#0
+; CHECK-NEXT: pushl %ebx
+; CHECK-NEXT: pushl %edi
+; CHECK-NEXT: pushl %esi
+; CHECK-NEXT: movl 16(%esp), %eax
+; CHECK-NEXT: movl 24(%esp), %ecx
+; CHECK-NEXT: movl 20(%esp), %edx
+; CHECK-NEXT: paddd (%edx), %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%edx)
+; CHECK-NEXT: shll $4, %ecx
+; CHECK-NEXT: movl (%ecx,%edx), %esi
+; CHECK-NEXT: movl 12(%ecx,%edx), %edi
+; CHECK-NEXT: movl 8(%ecx,%edx), %ebx
+; CHECK-NEXT: movl 4(%ecx,%edx), %edx
+; CHECK-NEXT: movl %esi, 12(%eax,%ecx)
+; CHECK-NEXT: movl %edx, (%eax,%ecx)
+; CHECK-NEXT: movl %ebx, 8(%eax,%ecx)
+; CHECK-NEXT: movl %edi, 4(%eax,%ecx)
+; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: popl %ebx
+; CHECK-NEXT: retl
+define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 {
+entry:
+ %0 = bitcast i32* %y to <4 x i32>*
+ %1 = load <4 x i32>, <4 x i32>* %0, align 16
+ %am = add <4 x i32> %a, %1
+ store <4 x i32> %am, <4 x i32>* %0, align 16
+ %ip0 = shl nsw i32 %i, 2
+ %ip1 = or i32 %ip0, 1
+ %ip2 = or i32 %ip0, 2
+ %ip3 = or i32 %ip0, 3
+ %vecext = extractelement <4 x i32> %am, i32 %ip0
+ %arrayidx = getelementptr inbounds i32, i32* %x, i32 %ip3
+ store i32 %vecext, i32* %arrayidx, align 4
+ %vecext5 = extractelement <4 x i32> %am, i32 %ip1
+ %arrayidx8 = getelementptr inbounds i32, i32* %x, i32 %ip0
+ store i32 %vecext5, i32* %arrayidx8, align 4
+ %vecext11 = extractelement <4 x i32> %am, i32 %ip2
+ %arrayidx14 = getelementptr inbounds i32, i32* %x, i32 %ip2
+ store i32 %vecext11, i32* %arrayidx14, align 4
+ %vecext17 = extractelement <4 x i32> %am, i32 %ip3
+ %arrayidx20 = getelementptr inbounds i32, i32* %x, i32 %ip1
+ store i32 %vecext17, i32* %arrayidx20, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }