[CGP] Check for existing inttotpr before creating new one

Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions of
the same integer value while sinking address computations, but rather
CSEs them on the fly: excessive inttoptr's confuse SCEV into thinking
that related pointers have nothing to do with each other.

This problem blocks LoadStoreVectorizer from vectorizing some of the
loads / stores in a downstream target.

Reviewed By: hfinkel

Differential Revision: https://reviews.llvm.org/D56838

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@351582 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c35f866..0c7c9de 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -4664,13 +4664,22 @@
     // will look through it and provide only the integer value. In that case,
     // use it here.
     if (!DL->isNonIntegralPointerType(Addr->getType())) {
+      const auto getResultPtr = [MemoryInst, Addr,
+                                 &Builder](Value *Reg) -> Value * {
+        for (User *U : Reg->users())
+          if (auto *I2P = dyn_cast<IntToPtrInst>(U))
+            if (I2P->getType() == Addr->getType() &&
+                I2P->getParent() == MemoryInst->getParent()) {
+              I2P->moveBefore(MemoryInst->getParent()->getFirstNonPHI());
+              return I2P;
+            }
+        return Builder.CreateIntToPtr(Reg, Addr->getType(), "sunkaddr");
+      };
       if (!ResultPtr && AddrMode.BaseReg) {
-        ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
-                                           "sunkaddr");
+        ResultPtr = getResultPtr(AddrMode.BaseReg);
         AddrMode.BaseReg = nullptr;
       } else if (!ResultPtr && AddrMode.Scale == 1) {
-        ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
-                                           "sunkaddr");
+        ResultPtr = getResultPtr(AddrMode.ScaledReg);
         AddrMode.Scale = 0;
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll
new file mode 100644
index 0000000..5f1fd98
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode-cse-inttoptrs.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mtriple=x86_64-- -codegenprepare                        %s -S -o - | FileCheck %s --check-prefix=CGP
+; RUN: opt -mtriple=x86_64-- -codegenprepare -load-store-vectorizer %s -S -o - | FileCheck %s --check-prefix=LSV
+; Make sure CodeGenPrepare doesn't emit multiple inttoptr instructions
+; of the same integer value while sinking address computations, but
+; rather CSEs them on the fly: excessive inttoptr's confuse SCEV
+; into thinking that related pointers have nothing to do with each other.
+; Triggering this problem involves having just right addressing modes,
+; and verifying that the motivating pass (LoadStoreVectorizer) is able
+; to benefit from it - just right LSV-policies. Hence the atypical combination
+; of the target and datalayout / address spaces in this test.
+target datalayout = "p1:32:32:32"
+define void @main(i32 %tmp, i32 %off) {
+; CGP:     = inttoptr
+; CGP-NOT: = inttoptr
+; LSV:     = load <2 x float>
+; LSV:     = load <2 x float>
+  %tmp1 = inttoptr i32 %tmp to float addrspace(1)*
+  %arrayidx.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %off
+  %add20.i.7 = add i32 %off, 1
+  %arrayidx22.i.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 %add20.i.7
+  br label %for.body
+  %tmp8 = phi float [ undef, %entry ], [ %tmp62, %for.body ]
+  %tmp28 = load float, float addrspace(1)* %arrayidx.i.7
+  %tmp29 = load float, float addrspace(1)* %arrayidx22.i.7
+  %arrayidx.i321.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 0
+  %tmp43 = load float, float addrspace(1)* %arrayidx.i321.7
+  %arrayidx22.i327.7 = getelementptr inbounds float, float addrspace(1)* %tmp1, i32 1
+  %tmp44 = load float, float addrspace(1)* %arrayidx22.i327.7
+  %tmp62 = tail call fast float @foo(float %tmp8, float %tmp44, float %tmp43, float %tmp29, float %tmp28)
+  br label %for.body
+declare float @foo(float, float, float, float, float)