Only use instructions as insert locations for SCEVExpander

SCEVExpander, which we are using during code generation, only allows
instructions as insert locations, but breaks in case BasicBlock->end() iterators
are passed to it due to it trying to obtain the basic block in which code should
be generated by calling Instruction->getParent(), which is not defined for
->end() iterators.

This change adds an assert to Polly that ensures we only pass valid instructions
to SCEVExpander and it fixes one case, where we used IRBuilder->SetInsertBlock()
to set an ->end() insert location which was later passed to SCEVExpander.

In general, Polly is always trying to build up the CFG first, before we actually
insert instructions into the CFG sceleton. As a result, each basic block should
already have at least one branch instruction before we start adding code. Hence,
always requiring the IRBuilder insert location to be set to a real instruction
should always be possible.

Thanks Utpal Bora <cs14mtech11017@iith.ac.in> for his help with test case
reduction.

This is a backport from r243830 as it was committed on trunk.

git-svn-id: https://llvm.org/svn/llvm-project/polly/branches/release_37@246029 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/BlockGenerators.cpp b/lib/CodeGen/BlockGenerators.cpp
index efe8281..4c667dd 100644
--- a/lib/CodeGen/BlockGenerators.cpp
+++ b/lib/CodeGen/BlockGenerators.cpp
@@ -123,6 +123,8 @@
                                       ->getParent()
                                       ->getDataLayout(),
                               "polly");
+        assert(Builder.GetInsertPoint() != Builder.GetInsertBlock()->end() &&
+               "Only instructions can be insert points for SCEVExpander");
         Value *Expanded = Expander.expandCodeFor(NewScev, Old->getType(),
                                                  Builder.GetInsertPoint());
 
@@ -1116,7 +1118,7 @@
     ValueMapT &RegionMap = RegionMaps[BBCopy];
     RegionMap.insert(BlockMap.begin(), BlockMap.end());
 
-    Builder.SetInsertPoint(BBCopy);
+    Builder.SetInsertPoint(BICopy);
     copyInstScalar(Stmt, BI, RegionMap, GlobalMap, LTS);
     BICopy->eraseFromParent();
   }
diff --git a/test/Isl/CodeGen/non-affine-phi-node-expansion.ll b/test/Isl/CodeGen/non-affine-phi-node-expansion.ll
new file mode 100644
index 0000000..461eaa6
--- /dev/null
+++ b/test/Isl/CodeGen/non-affine-phi-node-expansion.ll
@@ -0,0 +1,54 @@
+; RUN: opt %loadPolly -polly-codegen -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%struct.wombat = type {[4 x i32]}
+
+; CHECK: polly.stmt.bb3.entry:                             ; preds = %polly.start
+; CHECK:   br label %polly.stmt.bb3
+
+; CHECK: polly.stmt.bb3:                                   ; preds = %polly.stmt.bb3.entry
+; CHECK:   %polly.subregion.iv = phi i32 [ 0, %polly.stmt.bb3.entry ]
+; CHECK:   %polly.subregion.iv.inc = add i32 %polly.subregion.iv, 1
+; CHECK:   br i1 true, label %polly.stmt.bb4, label %polly.stmt.bb5
+
+; CHECK: polly.stmt.bb4:                                   ; preds = %polly.stmt.bb3
+; CHECK:   br label %polly.stmt.bb13.exit
+
+; CHECK: polly.stmt.bb5:                                   ; preds = %polly.stmt.bb3
+; CHECK:   %tmp7_p_scalar_ = load i32, i32* %B, !alias.scope !0, !noalias !2
+; CHECK:   store i32 %tmp7_p_scalar_, i32* %polly.access.cast.arg2, !alias.scope !3, !noalias !4
+; CHECK:   br label %polly.stmt.bb13.exit
+
+; Function Attrs: nounwind uwtable
+define void @quux(%struct.wombat* %arg, i32* %B) {
+bb:
+  br i1 undef, label %bb2, label %bb1
+
+bb1:                                              ; preds = %bb
+  br label %bb2
+
+bb2:                                              ; preds = %bb1, %bb
+  %tmp = phi i1 [ true, %bb ], [ undef, %bb1 ]
+  br label %bb3
+
+bb3:                                              ; preds = %bb13, %bb2
+  br i1 %tmp, label %bb4, label %bb5
+
+bb4:                                              ; preds = %bb3
+  br label %bb13
+
+bb5:                                              ; preds = %bb3
+  %tmp7 = load i32, i32* %B
+  %tmp12 = getelementptr inbounds %struct.wombat, %struct.wombat* %arg, i64 0, i32 0, i64 0
+  store i32 %tmp7, i32* %tmp12
+  br label %bb13
+
+bb13:                                             ; preds = %bb5, %bb4
+  br i1 false, label %bb3, label %bb14
+
+bb14:                                             ; preds = %bb13
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  unreachable
+}