| ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "scalar.ph:" --version 5 |
| ; RUN: opt -passes='loop-vectorize' -force-vector-width=2 -S < %s | FileCheck %s |
| ; |
| ; Forcing VF=2 to trigger vector code gen |
| ; |
| ; This is a test case that let's vectorizer's code gen to generate |
| ; more than one BasicBlocks in the loop body (emulated masked scatter) |
| ; for those targets that do not support masked scatter. Broadcast |
| ; code generation was previously dependent on loop body being |
| ; a single basic block and this test case exposed incorrect code gen |
| ; resulting in an assert in IL verification. |
| ; |
| @a = external global [2 x i16], align 1 |
| |
| define void @f1(ptr noalias %b, i1 %c, i32 %start) { |
| ; CHECK-LABEL: define void @f1( |
| ; CHECK-SAME: ptr noalias [[B:%.*]], i1 [[C:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START]], 1 |
| ; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) |
| ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]] |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] |
| ; CHECK: [[VECTOR_SCEVCHECK]]: |
| ; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16 |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535 |
| ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] |
| ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0 |
| ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer |
| ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] |
| ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] |
| ; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 |
| ; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP11]], i32 0 |
| ; CHECK-NEXT: br i1 [[TMP13]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] |
| ; CHECK: [[PRED_STORE_IF]]: |
| ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] |
| ; CHECK: [[PRED_STORE_CONTINUE]]: |
| ; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[TMP11]], i32 1 |
| ; CHECK-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] |
| ; CHECK: [[PRED_STORE_IF2]]: |
| ; CHECK-NEXT: store i32 10, ptr [[B]], align 1 |
| ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] |
| ; CHECK: [[PRED_STORE_CONTINUE3]]: |
| ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP12]] |
| ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i16, ptr [[TMP15]], i32 0 |
| ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i16, ptr [[TMP16]], i32 -1 |
| ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP17]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 |
| ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] |
| br i1 %c, label %loop.latch, label %else |
| |
| else: |
| store i32 10, ptr %b, align 1 |
| br label %loop.latch |
| |
| loop.latch: |
| %t = trunc i32 %iv to i16 |
| %arrayidx = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 %t |
| store i16 0, ptr %arrayidx, align 1 |
| %iv.next = add nsw i32 %iv, -1 |
| %cmp = icmp sgt i32 %iv, 1 |
| br i1 %cmp, label %loop.header, label %exit |
| |
| exit: |
| ret void |
| } |
| |
| define void @f2(ptr noalias %b, i1 %c, i32 %start) { |
| ; CHECK-LABEL: define void @f2( |
| ; CHECK-SAME: ptr noalias [[B:%.*]], i1 [[C:%.*]], i32 [[START:%.*]]) { |
| ; CHECK-NEXT: [[ENTRY:.*:]] |
| ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[START]], 1 |
| ; CHECK-NEXT: [[SMIN1:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) |
| ; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN1]] |
| ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 |
| ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] |
| ; CHECK: [[VECTOR_SCEVCHECK]]: |
| ; CHECK-NEXT: [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[START]], i32 1) |
| ; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[START]], [[SMIN]] |
| ; CHECK-NEXT: [[TMP3:%.*]] = trunc i32 [[START]] to i16 |
| ; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP2]] to i16 |
| ; CHECK-NEXT: [[TMP5:%.*]] = sub i16 [[TMP3]], [[TMP4]] |
| ; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i16 [[TMP5]], [[TMP3]] |
| ; CHECK-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP2]], 65535 |
| ; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] |
| ; CHECK-NEXT: br i1 [[TMP9]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] |
| ; CHECK: [[VECTOR_PH]]: |
| ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 |
| ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] |
| ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[START]], [[N_VEC]] |
| ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] |
| ; CHECK: [[VECTOR_BODY]]: |
| ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] |
| ; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i32 [[START]], [[INDEX]] |
| ; CHECK-NEXT: [[TMP11:%.*]] = trunc i32 [[OFFSET_IDX]] to i16 |
| ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 [[TMP11]] |
| ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i16, ptr [[TMP12]], i32 0 |
| ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP13]], i32 -1 |
| ; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP14]], align 1 |
| ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 |
| ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] |
| ; CHECK: [[MIDDLE_BLOCK]]: |
| ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] |
| ; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] |
| ; CHECK: [[SCALAR_PH]]: |
| ; |
| entry: |
| br label %loop.header |
| |
| loop.header: |
| %iv = phi i32 [ %start, %entry ], [ %iv.next, %loop.latch ] |
| br i1 %c, label %loop.latch, label %else |
| |
| else: |
| %1 = load i32, ptr %b, align 1 |
| br label %loop.latch |
| |
| loop.latch: |
| %2 = trunc i32 %iv to i16 |
| %arrayidx = getelementptr inbounds [2 x i16], ptr @a, i16 0, i16 %2 |
| store i16 0, ptr %arrayidx, align 1 |
| %iv.next = add nsw i32 %iv, -1 |
| %cmp = icmp sgt i32 %iv, 1 |
| br i1 %cmp, label %loop.header, label %exit |
| |
| exit: |
| ret void |
| } |