[LV] Use correct insertion point when type shrinking reductions When type shrinking reductions, we should insert the truncations and extends at the end of the loop latch block. Previously, these instructions were inserted at the end of the loop header block. The difference is only a problem for loops with predicated instructions (e.g., conditional stores and instructions that may divide by zero). For these instructions, we create new basic blocks inside the vectorized loop, which cause the loop header and latch to no longer be the same block. This should fix PR34687. Reference: https://bugs.llvm.org/show_bug.cgi?id=34687 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@314542 91177308-0d34-0410-b5e6-96231b3b80d8

commit: e0d97dea2f357daa9c052a91bfb5c0248cf901eb [log] [tgz]
author: Matthew Simpson <mssimpso@codeaurora.org> Fri Sep 29 18:07:39 2017 +0000
committer: Matthew Simpson <mssimpso@codeaurora.org> Fri Sep 29 18:07:39 2017 +0000
tree: d2044bef84102c09471f62aa2ae2e7a6d0ee6c67
parent: afe863129f5d3a40bc2bdb30cdab47ccf07b813d [diff]
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2b3ea8b..6f53b08 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp

@@ -4235,7 +4235,8 @@
   // entire expression in the smaller type.
   if (VF > 1 && Phi->getType() != RdxDesc.getRecurrenceType()) {
     Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
-    Builder.SetInsertPoint(LoopVectorBody->getTerminator());
+    Builder.SetInsertPoint(
+        LI->getLoopFor(LoopVectorBody)->getLoopLatch()->getTerminator());
     VectorParts RdxParts(UF);
     for (unsigned Part = 0; Part < UF; ++Part) {
       RdxParts[Part] = VectorLoopValueMap.getVectorValue(LoopExitInst, Part);

diff --git a/test/Transforms/LoopVectorize/reduction-small-size.ll b/test/Transforms/LoopVectorize/reduction-small-size.ll
new file mode 100644
index 0000000..b44beb8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/reduction-small-size.ll

@@ -0,0 +1,40 @@
+; RUN: opt < %s -force-vector-width=4 -force-vector-interleave=1 -loop-vectorize -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @PR34687(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %[[LATCH:.*]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ [[TMP17:%.*]], %[[LATCH]] ]
+; CHECK:       [[LATCH]]:
+; CHECK:         [[TMP13:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
+; CHECK-NEXT:    [[TMP14:%.*]] = add nuw nsw <4 x i32> [[TMP13]], {{.*}}
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK:         [[TMP16:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i8>
+; CHECK-NEXT:    [[TMP17]] = zext <4 x i8> [[TMP16]] to <4 x i32>
+; CHECK-NEXT:    br i1 {{.*}}, label %middle.block, label %vector.body
+;
+define void @PR34687(i1 %c, i32 %x, i32 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
+  %r = phi i32 [ 0, %entry ], [ %r.next, %if.end ]
+  br i1 %c, label %if.then, label %if.end
+
+if.then:
+  %tmp0 = sdiv i32 undef, undef
+  br label %if.end
+
+if.end:
+  %tmp1 = and i32 %r, 255
+  %i.next = add nsw i32 %i, 1
+  %r.next = add nuw nsw i32 %tmp1, %x
+  %cond = icmp eq i32 %i.next, %n
+  br i1 %cond, label %for.end, label %for.body
+
+for.end:
+  %tmp2 = phi i32 [ %r.next, %if.end ]
+  ret void
+}
commit	e0d97dea2f357daa9c052a91bfb5c0248cf901eb	[log] [tgz]
author	Matthew Simpson <mssimpso@codeaurora.org>	Fri Sep 29 18:07:39 2017 +0000
committer	Matthew Simpson <mssimpso@codeaurora.org>	Fri Sep 29 18:07:39 2017 +0000
tree	d2044bef84102c09471f62aa2ae2e7a6d0ee6c67
parent	afe863129f5d3a40bc2bdb30cdab47ccf07b813d [diff]